├── .gitattributes ├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── guides ├── getting-started.md ├── synthesis.md └── your-own-repo.md ├── internal └── notes.md ├── labs ├── bp │ ├── global_predictor.drawio │ ├── global_predictor.svg │ ├── saturation_counter.drawio │ └── saturation_counter.svg ├── branch-prediction.md ├── caching.md ├── caching │ ├── figures │ │ ├── SRAM_Array.png │ │ └── SRAM_Cell_6T.svg │ └── part2 │ │ ├── starter │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── synth │ │ │ ├── Makefile │ │ │ └── yosys.tcl │ │ ├── tb │ │ │ ├── assert.svh │ │ │ └── victim_cache_tb.sv │ │ ├── ucsbece154b_victim_cache.sv │ │ ├── unread.sv │ │ └── victim_cache.core │ │ └── ucsbece154b_icache.sv ├── intro.md ├── intro │ └── git-example.txt ├── ooo.md ├── ooo │ └── figures │ │ └── scoreboard.svg ├── sv.md ├── sv │ └── starter │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── README.md │ │ ├── lint.vlt │ │ ├── requirements.txt │ │ ├── synth │ │ ├── Makefile │ │ └── yosys.tcl │ │ ├── tb │ │ ├── assert.svh │ │ └── fifo_tb.sv │ │ ├── ucsbece154b_fifo.core │ │ └── ucsbece154b_fifo.sv ├── vm.md └── vm │ └── priv_levels.png ├── programs ├── Makefile ├── aux │ └── link.ld ├── bp │ ├── div.S │ ├── loop.S │ └── spaghetti.S ├── examples │ └── asm.S ├── rvf │ └── fpu_example.S └── vm │ ├── os.S │ └── privilege.S └── setup.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | 2 | # EOL 3 | * text eol=lf 4 | 5 | # Language Corrections 6 | *.vh linguist-language=Verilog 7 | *.svh linguist-language=SystemVerilog 8 | *.vlt linguist-language=SystemVerilog 9 | *.core linguist-language=YAML 10 | *.drawio linguist-language=XML 11 | 12 | # Markdown 13 | *.md linguist-documentation=false 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | ece 3 | tools* 4 | *.elf 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "cva6"] 2 | path = cva6 3 | url = https://github.com/openhwgroup/cva6.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2023 Ethan Sifferman 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Architecture Labs with CVA6 3 | 4 | ## Labs 5 | 6 | 1. [Introducion to CVA6](./labs/intro.md) 7 | 2. [Branch Prediction](./labs/branch-prediction.md) 8 | 3. [Synthesizable SystemVerilog](./labs/sv.md) 9 | 4. [Caching](./labs/caching.md) 10 | 5. [Out-of-Order](./labs/ooo.md) 11 | 6. [Virtual Memory](./labs/vm.md) 12 | 13 | ## Guides 14 | 15 | * [Getting Started](./guides/getting-started.md) 16 | * [Using GtHub for Storing Class Materials](./guides/your-own-repo.md) 17 | * [Writing Synthesizable SystemVerilog](./guides/synthesis.md) 18 | -------------------------------------------------------------------------------- /guides/getting-started.md: -------------------------------------------------------------------------------- 1 | 2 | # Getting Started 3 | 4 | ## About CVA6 5 | 6 | From the [cva6 README](https://github.com/openhwgroup/cva6/tree/b44a696bbead23dafb068037eff00a90689d4faf#readme): 7 | 8 | > CVA6 is a 6-stage, single issue, in-order CPU which implements the 64-bit RISC-V instruction set. It fully implements I, M, A and C extensions as specified in Volume I: User-Level ISA V 2.3 as well as the draft privilege extension 1.10. It implements three privilege levels M, S, U to fully support a Unix-like operating system. Furthermore it is compliant to the draft external debug spec 0.13. 9 | 10 | CVA6 is an open-source CPU core, widely used in academia and industry. It is primarily written by [Florian Zaruba](https://github.com/zarubaf), which makes its code style and organization very consistent. 11 | 12 | ## About "Labs with CVA6" 13 | 14 | This repository includes several labs aimed at teaching advanced architecture techniques. To use this repository, you will need to: 15 | 16 | 1. Ensure you are on a Linux/WSL2 machine. 17 | 2. [Create an ssh key for GitHub](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent?platform=linux) and [add it to GitHub](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/adding-a-new-ssh-key-to-your-github-account?platform=linux). 18 | 3. Follow the [GitHub setup guide](./your-own-repo.md) 19 | 20 | ## Tool Setup 21 | 22 | All necessary tools have been installed to linux.engr.ucsb.edu. Therefore we recommend using those machines to expedite the setup. If you want to run CVA6 simulations using your local Linux/WSL2 machine, you will need to install everything yourself. 23 | 24 | ### Local Tool Setup 25 | 26 | 1. Set `$RISCV` to wherever you want the tools to be installed to. i.e. run `RISCV=~/riscv-tools` 27 | 2. Run [`./cva6/ci/setup.sh`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/ci/setup.sh). (If you get an error, you may need to rerun parts of the script manually). 28 | 3. Install the [OSS CAD Suite](https://github.com/YosysHQ/oss-cad-suite-build). You can do this by un-taring the latest release to a `"~/Utils/oss-cad-suite"` directory. 29 | 4. Add the following to your `"~/.bashrc"`, and replace the values of the 3 environment variables: 30 | 31 | ```bash 32 | # ECE 154B local machine ~/.bashrc additions 33 | # Author: Ethan Sifferman 34 | # Purpose: Configure riscv64-unknown-elf-gcc, fesvr, verilator-4.110, gtkwave, pip user 35 | export RISCV_ROOT= 36 | export VERILATOR_ROOT= 37 | export OSS_CAD_SUITE= 38 | export PATH=$RISCV_ROOT/bin:$VERILATOR_ROOT/bin:$PATH:$OSS_CAD_SUITE/bin:~/.local/bin 39 | # end ECE 154B 40 | ``` 41 | 42 | ### linux.engr.ucsb.edu Tool Setup 43 | 44 | Add the following to your `"~/.bashrc"`: 45 | 46 | ```bash 47 | # ECE 154B linux.engr ~/.bashrc additions 48 | # Author: Ethan Sifferman 49 | # Purpose: Configure riscv64-unknown-elf-gcc, fesvr, verilator-4.110, gtkwave, modelsim, pip user 50 | export RISCV_ROOT=/ece/riscv 51 | export VERILATOR_ROOT=/ece/verilator-4.110 52 | export MODEL_TECH=/ece/mentor/ModelSimSE-10.6e/modeltech/bin 53 | export LM_LICENSE_FILE=1717@license.ece.ucsb.edu 54 | export PATH=$MODEL_TECH:$RISCV_ROOT/bin:$VERILATOR_ROOT/bin:$PATH:/ece/oss-cad-suite/bin:~/.local/bin 55 | # end ECE 154B 56 | ``` 57 | 58 | ## Regular Setup 59 | 60 | Before starting CVA6 simulations on a new terminal session, ensure proper environment configuration by running `source setup.sh`. 61 | 62 | ## Running Simulations 63 | 64 | After setup is completed, you should be able to run CVA6 simulations. The primary way to run CVA6 simulations is to build and load an [ELF file](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format). An ELF file is the standard file format used for Linux executables, so this means we can write any C/C++/RISC-V assembly program, and gcc/g++ will output a binary readable by CVA6. (You can see how the Verilog processes the ELF file [here](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/corev_apu/tb/ariane_tb.sv#L132-L152) and [here](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/corev_apu/tb/dpi/elfloader.cc)). 65 | 66 | ### Building an ELF 67 | 68 | You can see example programs in [`"./programs/examples"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/examples). You can compile the program to an ELF file using the [`"./programs/Makefile"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/Makefile). To use the Makefile, run `cd programs`, then run `make .elf`, i.e. `make examples/asm.elf`. (Be sure that you've run `source setup.sh` first.) 69 | 70 | ### Running the Simulation 71 | 72 | 1. Ensure you've run `source setup.sh` 73 | 2. `cd cva6` 74 | 3. `make verilate DEBUG=1 TRACE_FAST=1` 75 | 4. `./work-ver/Variane_testharness -v dump.vcd ` (You can change the vcd filename to whatever you want.) 76 | 5. To view the waves, run `gtkwave dump.vcd`. (This should probably be done in another terminal to not interfere with running more simulations.) 77 | 6. Navigate to **TOP.ariane_testharness.i_ariane.i_cva6** to see all the logic for the core. 78 | 7. Once you've selected all the waves necessary for your lab, be sure to save your workspace using "File->Write Save File", so next time you don't have to reopen all necessary waves again. 79 | -------------------------------------------------------------------------------- /guides/synthesis.md: -------------------------------------------------------------------------------- 1 | 2 | # Writing Synthesizable SystemVerilog 3 | 4 | This guide gives a brief overview of how to write synthesizable SystemVerilog. (When this guide fails, please move on to this guide: ["Busting the Myth that SystemVerilog is only for Verification"](https://sutherland-hdl.com/papers/2013-SNUG-SV_Synthesizable-SystemVerilog_paper.pdf).) 5 | 6 | SystemVerilog and Verilog are overwhelmingly popular in digital design, but are partnered with extremely underdeveloped and unreliable software. This leads to extremely frequent instances of Verilog software not warning on bad code, and instances of software not understanding good code. For example, here is an example of a Verilog-2005 design that works with some tools, but not with others: . 7 | 8 | By following this guide, you will learn how to overcome the shortcomings of Verilog/SystemVerilog tools and write synthesizable code that meets your desired specifications. 9 | 10 | All code examples follow the [lowRISC Verilog Coding Style Specifications](https://github.com/lowRISC/style-guides/blob/master/VerilogCodingStyle.md). 11 | 12 | ## Table of Contents 13 | 14 | * [Table of Contents](#table-of-contents) 15 | * [DigitalJS Online](#digitaljs-online) 16 | * [Inference](#inference) 17 | * [Combinational Logic](#combinational-logic) 18 | * [Latches](#latches) 19 | * [Flip-Flops](#flip-flops) 20 | * [Memory](#memory) 21 | * [Clock and Reset](#clock-and-reset) 22 | * [Simulation Tools](#simulation-tools) 23 | * [Synthesis Tools](#synthesis-tools) 24 | * [OSS CAD Suite](#oss-cad-suite) 25 | 26 | ## DigitalJS Online 27 | 28 | [DigitalJS Online](https://digitaljs.tilk.eu/) is an incredible website that uses Verilator, Yosys, and ElkJS to lint, synthesize, and visualize any valid Verilog design. Playing around with this website for more than an hour will teach you more about Verilog than reading 90% of all the available online Verilog resources. 29 | 30 | ## Inference 31 | 32 | Verilog and SystemVerilog are behavioral, which means that these languages describe the intended behavior of a circuit, and not the logic required to implement the design using physical hardware. 33 | 34 | Inference in Verilog and SystemVerilog refers to the process by which logic cells or hardware components are automatically generated from the behavioral code. Synthesis tools will automatically infer which logic gates and other hardware components are necessary to implement the specified behavior. 35 | 36 | Additionally, when mapping a synthesized design to a specified target (FPGA/ASIC), the synthesis tool will need to adjust its netlist to ensure that it only uses what logic cells are available. For example, an FPGA without any dedicated adder cells must use the gates it has available to implement an adder if one was described behaviorally. 37 | 38 | The exception to this is that Verilog and SystemVerilog allow explicit instantiation of synthesis primitives to force logic cells to be added to the netlist. For example, [`$_DFF_N_`](https://github.com/YosysHQ/yosys/blob/master/techlibs/ice40/ff_map.v), [`SB_MAC16`](https://www.latticesemi.com/-/media/LatticeSemi/Documents/ApplicationNotes/AD/DSPFunctionUsageGuideforICE40Devices.ashx?document_id=50669), [`DSP48E1`](https://docs.xilinx.com/v/u/en-US/ug479_7Series_DSP48E1), etc. 39 | 40 | ## Combinational Logic 41 | 42 | Combinational logic can be generated either by using `assign` or `always_comb`. The rule of thumb is, if you can do it cleanly in 1 line of code, use `assign`. Otherwise, use `always_comb`. 43 | 44 | ```systemVerilog 45 | logic [WIDTH-1:0] data_i, data_plus1, data_plus2, data_o; 46 | 47 | assign data_plus1 = data_i + WIDTH'(1); 48 | assign data_plus2 = data_plus1 + WIDTH'(1); 49 | assign data_o = data_plus2 + WIDTH'(1); 50 | ``` 51 | 52 | `always_comb` blocks allow for procedural assignment, which enables greater design flexibility. 53 | 54 | ```systemVerilog 55 | always_comb begin : data_set 56 | data_o = data_i + WIDTH'(1); 57 | data_o = data_o + WIDTH'(1); 58 | data_o = data_o + WIDTH'(1); 59 | end 60 | ``` 61 | 62 | ## Latches 63 | 64 | Latches are generally frowned-upon. Many FPGAs don't even have a latch cell you can use. Only use a latch if you REALLY know what you are doing. 65 | 66 | To infer a latch, you should structure your code like this: 67 | 68 | ```systemVerilog 69 | always_latch begin : y_latch 70 | if (en_i) 71 | y_o <= x_i; 72 | end 73 | ``` 74 | 75 | An unwanted latch is generated in an `always_comb` block when a net is not updated for a possible input condition. A common good practice to avoid unwanted latches is to set default values for all combinational nets at the top of the `always_comb` block. 76 | 77 | ```systemVerilog 78 | always_comb begin : y_latch 79 | y1_o = 0; // default value 80 | if (en_i) begin 81 | y0_o = x0_i; // latch (gives error) 82 | y1_o = x1_i; // no latch 83 | end 84 | end 85 | ``` 86 | 87 | ## Flip-Flops 88 | 89 | A common practice is to split your flip-flops into `_d` and `_q` nets. This way, your code is organized better because all your combinational logic is clearly done to your `_d` net in an `always_comb` block, and your `_q` nets are assigned in a `always_ff` block using reset and the `_d` nets. Plus, `always_ff` blocks do not allow for procedural assignment, so `always_comb` blocks are always better for combinational logic. 90 | 91 | Another popular naming strategy is to use `_next` and `_reg` instead of `_d` and `_q`. This is a personal preference, but it is crucial to match the coding style already introduced by the developers of the project. If it's your project, pick your favorite, and stick to it! 92 | 93 | Note: the following are infamously buggy in synthesis tools: 94 | 95 | * FF initial values (`initial data = 0;` or `logic data = 0;`). Instead, use a reset value for all FFs. 96 | * Non-clock/reset logic in `always_ff`. If you are following the lowRISC naming style with `_d` and `_q`, this should never happen. 97 | 98 | To infer a flip-flop, you should structure your code like this: 99 | 100 | ```systemVerilog 101 | logic [WIDTH-1:0] data_d, data_q; 102 | 103 | always_comb begin : data_set 104 | data_d = input_i; 105 | end 106 | 107 | always_ff @(posedge clk_i) begin : data_ff 108 | if (rst_i) begin 109 | data_q <= '0; 110 | end else begin 111 | data_q <= data_d; 112 | end 113 | end 114 | ``` 115 | 116 | ## Memory 117 | 118 | Memories in designs are common, so FPGAs often have built-in block RAMs (BRAMs) that you can use. Unfortunately, synthesis tools are usually really bad at inferring memories from a design, and will often incorrectly infer an array of DFFs. If you want a BRAM, then you should either use target-specific BRAM primitives ([SB_RAM40_4K](https://www.latticesemi.com/-/media/LatticeSemi/Documents/ApplicationNotes/MO/MemoryUsageGuideforiCE40Devices.ashx?document_id=47775), [RAMB36E1](https://docs.xilinx.com/r/en-US/ug953-vivado-7series-libraries/RAMB36E1), etc.) or write clear behavioral code that the tool developers allow for memory inference. 119 | 120 | For a tool to have the greatest success of inferring a memory, you should structure your code like this: 121 | 122 | ```systemVerilog 123 | // instantiation 124 | logic [DATA_WIDTH-1:0] MEM [0:NR_ENTRIES-1]; 125 | 126 | // read port 127 | assign rdata_o = MEM[raddr_i]; 128 | 129 | // write port 130 | always_ff @(posedge clk_i) begin : mem_write 131 | if (we_i) 132 | MEM[waddr_i] <= wdata_i; 133 | end 134 | ``` 135 | 136 | ## Clock and Reset 137 | 138 | Clock and reset nets hold highly sensitive global signals, and are better optimized when they are only used in `always_ff` blocks and not `always_comb` blocks. 139 | 140 | When synthesizing your design, you often want to manually tell your synthesis software which nets are clocks. (Vivado example: [ucsbieee/mapache64 `"clk_constraints.xdc"`](https://github.com/ucsbieee/mapache64/blob/6ab8816c592a68c5168a956eed243ba345927583/hardware-level/rtl/top/synth/boards/cmod_a7/clk_constraints.xdc).) 141 | 142 | ## Simulation Tools 143 | 144 | There are several Verilog Simulators to choose from, and each have pros and cons. Here is a quick summary of a few important ones: 145 | 146 | ### Open Source Simulation Tools 147 | 148 | * Icarus Verilog 149 | * Pros: Easy to use, good Verilog-2005 support 150 | * Cons: Poor SystemVerilog support, slow for large designs, minimal error messages 151 | * Verilator 152 | * Pros: Fast, gives incredibly detailed warnings, great SystemVerilog/Verilog support 153 | * Cons: [Does not support unknown (X) values](https://github.com/verilator/verilator/issues/3645) 154 | 155 | ### Proprietary Simulation Tools 156 | 157 | (All good, but all expensive) 158 | 159 | * Siemens ModelSim 160 | * Synopsys VCS 161 | * Cadence NCSim 162 | 163 | ## Synthesis Tools 164 | 165 | ### Open Source Synthesis Tools 166 | 167 | The only good open-source synthesis software is Yosys. Yosys is a buggy mess that has laughable SystemVerilog support, and is infamous for absolutely scrambling designs without giving any warnings. Therefore, it is CRUCIAL that you ensure your code is well-linted and follows all best-practices before using Yosys. 168 | 169 | ### Proprietary Synthesis Tools 170 | 171 | The proprietary tool will depend on what your FPGA supports. Though [Vivado](https://www.xilinx.com/products/design-tools/vivado.html) is well-liked and offers free synthesis for most Xilinx FPGAs. 172 | 173 | ## OSS CAD Suite 174 | 175 | If you want a fully open-source design flow, you will need to install nearly a dozen tools that each do different things. You will need [Icarus](https://github.com/steveicarus/iverilog), [Verilator](https://github.com/verilator/verilator), [GTKWave](https://github.com/gtkwave/gtkwave), [Yosys](https://github.com/YosysHQ/yosys), [Surelog](https://github.com/chipsalliance/Surelog), [sv2v](https://github.com/zachjs/sv2v), [nextpnr](https://github.com/YosysHQ/nextpnr), [IceStorm](https://github.com/YosysHQ/icestorm), [openFPGALoader](https://github.com/trabucayre/openFPGALoader), and more. Most of these tools do not provide updated binaries and expect you to compile them yourself. However, as of 2021 this overwhelming scavenger hunt has been made exponentially easier! 176 | 177 | [OSS CAD Suite](https://github.com/YosysHQ/oss-cad-suite-build) is a project that releases updated binaries of all common open-source digital design tools in one TAR file. 178 | 179 | This is how to curl a release from OSS: 180 | 181 | *Note: check the [latest release](https://github.com/YosysHQ/oss-cad-suite-build/releases/latest) and edit the filenames in the script accordingly.* 182 | 183 | ```bash 184 | cd ~/Downloads 185 | curl -JOL https://github.com/YosysHQ/oss-cad-suite-build/releases/download/YYYY-MM-DD/oss-cad-suite-linux-x64-YYYYMMDD.tgz 186 | tar -xzvf oss-cad-suite-linux-x64-YYYYMMDD.tgz -C ~/Utils/ 187 | ``` 188 | 189 | Be sure to add the OSS CAD Suite `"bin"` directory to `PATH`. 190 | -------------------------------------------------------------------------------- /guides/your-own-repo.md: -------------------------------------------------------------------------------- 1 | 2 | # Storing Class Materials in Your Own GitHub Repository 3 | 4 | This guide offers the method of code organization and collaboration that we recommend. 5 | 6 | ## Git/GitHub Setup Steps 7 | 8 | 1. The labs are recommended to be completed in groups of 3, so we recommend [creating a GitHub organization](https://github.com/account/organizations/new?plan=free) for your group. 9 | 10 | 2. Create a new *public or private*[^1] GitHub repository and clone it to your machine. You can either make a new repository for each lab, or use the same repository for all labs. 11 | 12 | 3. If you made a private repository, give the instructors read-access to your repository. Their GitHub usernames are "dstrukov" and "sifferman". 13 | 14 | 4. Add [labs-with-cva6](https://github.com/sifferman/labs-with-cva6) as a submodule with `git submodule add git@github.com:sifferman/labs-with-cva6.git`. 15 | 16 | 5. If you ever need to change the commit of your [labs-with-cva6](https://github.com/sifferman/labs-with-cva6) submodule, you will need to do the following: 17 | 18 | 1. `cd labs-with-cva6` 19 | 2. `git pull origin main` to grab the latest commit, or `git checkout ` to grab a specific commit 20 | 3. `cd ..` 21 | 4. `git submodule update --init --recursive` 22 | 5. `git add labs-with-cva6` 23 | 6. `git commit -m ""` 24 | 7. `git push origin main` 25 | 26 | 6. Add your source files to your repository where desired. You may optionally add a script to automatically overwrite CVA6's source files with your own source files. 27 | 28 | ## Footnotes 29 | 30 | [^1]: Disclaimer: You are welcome to make any code you write yourself publicly available, although your submission will be verified with a strong similarity report checker. Learning from online resources is encouraged, but blatant plagiarism will not be tolerated. 31 | -------------------------------------------------------------------------------- /internal/notes.md: -------------------------------------------------------------------------------- 1 | 2 | # Changes in Setup 3 | 4 | * `"cva6/ci/build-riscv-gcc.sh"`: riscv-gnu-toolchain now uses latest version 5 | * `"cva6/ci/install-spike.sh"`: no longer re-clones riscv/riscv-isa-sim 6 | * `"cva6/Makefile"`: change to `$(if $(DEBUG), --trace --trace-structs,)` 7 | * `"cva6/core/cva6.sv"`: add `initial begin $dumpfile( "cva6.vcd" ); $dumpvars; end` 8 | 9 | ```bash 10 | ../scripts/mk-install-dirs.sh /home/ethan/GitHub/cva6_wrapper/tools/include 11 | for dir in fesvr ; \ 12 | do \ 13 | ../scripts/mk-install-dirs.sh /home/ethan/GitHub/cva6_wrapper/tools/include/$dir; \ 14 | /bin/install -c -m 644 config.h /home/ethan/GitHub/cva6_wrapper/tools/include/$dir; \ 15 | done 16 | mkdir /home/ethan/GitHub/cva6_wrapper/tools/include/fesvr 17 | ../scripts/mk-install-dirs.sh /home/ethan/GitHub/cva6_wrapper/tools/include 18 | for file in fesvr/byteorder.h fesvr/elf.h fesvr/elfloader.h fesvr/htif.h fesvr/dtm.h fesvr/memif.h fesvr/syscall.h fesvr/context.h fesvr/htif_pthread.h fesvr/htif_hexwriter.h fesvr/option_parser.h fesvr/term.h fesvr/device.h fesvr/rfb.h fesvr/tsi.h riscv/mmio_plugin.h; \ 19 | do \ 20 | ../scripts/mk-install-dirs.sh /home/ethan/GitHub/cva6_wrapper/tools/include/`dirname $file`; \ 21 | /bin/install -c -m 644 ../$file /home/ethan/GitHub/cva6_wrapper/tools/include/`dirname $file`; \ 22 | done 23 | ``` 24 | -------------------------------------------------------------------------------- /labs/bp/global_predictor.drawio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /labs/bp/global_predictor.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
BHT
BHT
Gshare
or
Gselect
Gshare...
Predict index
Predict in...
Prediction
Prediction
Update index
Update ind...
Resolution
Resolution
Update PC
Update PC
GHR
GHR
Predict PC
Predict PC
Text is not SVG - cannot display
-------------------------------------------------------------------------------- /labs/bp/saturation_counter.drawio: -------------------------------------------------------------------------------- 1 | 5VrdkpowGH0aLzvDn6xernZ32860s1N32pnedFKIkBqIE6Jon76JJAgEuq5VUbgyOUm+mHO+QxJ0YE+jzRMFy/Az8SEeWIa/GdjvB5ZlDo0x/xDINkNGhpMBAUW+7LQHZugPlKAh0RXyYVLqyAjBDC3LoEfiGHqshAFKSVruNie4POsSBFADZh7AOvod+SyUqxgae/wDREGoZjYN2RIB1VkCSQh8khYg+2FgTykhLCtFmynEgjzFSzbusaE1/2IUxuyQAeDH1+Tbs/PJcn//XAQv5tPHaPJORlkDvJILll+WbRUDPAonm1cmaYgYnC2BJ1pSrjfHQhZhXjN5ESTLTIE52kA+6WSOMJ4STCjHYhKLEHI6SBncNK7DzNnhaQVJBBnd8i5pgX/JaVigXmFAKh7kI/ek8ILk5Q0cWTfEkRrgSjLa4sy+Pc5sq2XOnNvjzBm1zJl5CGmxfy92Al7zMEgS5JW58lZ0LTjaVbLR0Nd2hbc8noY1FCiMQgwYWpfD1/EiZ3gmiE+cM66YlNupVSE2ISvqQTmmuBlUwuQPiIY4DNAAMi3OTqN8zf8h21CT7UXTjacl526SMEoWsJK7AKMgFoJyVSCtz++CxCLJEd/U7+WwCPk+brIRJavY3yWEcRqXjHWTuDUZYp/NJG7PTGI5p3FJviW05JK7PrkkF781m4x6ZhPHOI1NnFc2pXPbZNwnmzhmyzZR6vbGJtWzknukTUb/DnNml1j6Vf9Ld10yLnPttO2ZQ94hdMkz1ZPTsZ6pnuQubRr9PUaHTZOLfy2u6dvlvnqQOtY11YPdpV2j3+477Jr8QHYtrtFv+yIdXCzo/kV5KVDExwHe6i0xEanEwALG3VWtIppK2dZE0++ehqlLk0Kw6K9kd1emmX7vNGuM1qRZx9Wy7etSSwUuqlXjsObHYsf1csaX0otX9796Z2eO/X8H7Ie/ -------------------------------------------------------------------------------- /labs/bp/saturation_counter.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
T
T
T
T
T
T
N
N
N
N
N
N
00
strongly
not taken
00...
01
weakly
not taken
01...
10
weakly
taken
10...
11
strongly
taken
11...
Text is not SVG - cannot display
-------------------------------------------------------------------------------- /labs/branch-prediction.md: -------------------------------------------------------------------------------- 1 | 2 | # Branch Prediction 3 | 4 | In this lab, you will need to modify the existing [branch predictor](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/bht.sv). See the [Your Own Repository Guide](../guides/your-own-repo.md) for our recommended way to organize and collaborate on labs. 5 | 6 | ## Pre-Lab Questions 7 | 8 | 1. What is the purpose of a branch predictor? Why does a single-cycle core not need branch prediction? 9 | 2. Define and compare and contrast the following: 10 | * Static branch prediction vs. Dynamic branch prediction 11 | * One-level branch prediction vs. Two-level predictor 12 | * Local branch prediction vs. Global branch prediction 13 | 3. Define BHT, BTB and RAS. What are they used for? 14 | 4. Look at [frontend.sv](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/frontend.sv). What are the 4 types of instructions that the branch predictor handles, and how are they handled? 15 | 5. How is a branch resolution handled? 16 | 6. What kind of dynamic branch predictor does CVA6 use? 17 | 7. Provide a GitHub permalink to where in `ariane_pkg` the branch predictor structs are defined. 18 | 8. When can more than 1 instruction be fetched per cycle? 19 | 20 | ## Part 1 - CVA6 Predictor 21 | 22 | Add functionality to [frontend.sv](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/frontend.sv) that records the branch predictor hit-rate. 23 | 24 | To do this, you can create an `always_ff @(posedge clk)` block that counts how many times a branch has been resolved, and how many of those resolutions were mispredicts. ([Branch resolve net](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/frontend.sv#L30); [branch resolve type](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/include/ariane_pkg.sv#L338-L345).) The hit-rate can be calculated with 1-num_mispredicts/num_valid_resolutions. You can then record the hit-rate to a file on every clock cycle; this way, you are certain to get the final hit-rate when the simulation terminates. 25 | 26 | ### Part 1 Questions 27 | 28 | 1. Highlight your changes to `"frontend.sv"` that records the hit-rate. 29 | 2. What are the final hit-rate percentages of each of the [bp benchmarks](https://github.com/sifferman/labs-with-cva6/tree/main/programs/bp)? 30 | 3. Compare the performance of the [bp benchmarks](https://github.com/sifferman/labs-with-cva6/tree/main/programs/bp) after choosing 3 new values for [`NR_ENTRIES`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/frontend.sv#L419). Display the 4 hit-rates in a table and explain how and why each program changes its hit-rate as BHT size changes. *Note: When changing `NR_ENTRIES`, be sure to change `NR_ENTRIES` in the `bht` instantiation in `"frontend.sv"`, not the `bht` declaration in `"bht.sv"`. Also your `NR_ENTRIES` values should be on the order of 16 to have interesting results.* 31 | 32 | ### Example of How to Write to a File in Verilog/SystemVerilog 33 | 34 | ```systemverilog 35 | integer f; 36 | integer counter; 37 | initial begin 38 | f = $fopen("bp.txt","w"); 39 | end 40 | always @(posedge clk) begin 41 | $fwrite(f, "%f\n", $itor(counter)); 42 | end 43 | ``` 44 | 45 | ## Part 2 - Global Predictor 46 | 47 | In this part, you will modify the [bht.sv](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/bht.sv) and turn it into a [global two-level adaptive branch predictor](https://en.wikipedia.org/wiki/Branch_predictor#Global_branch_prediction). First, read through the [Global Branch Predictor Specifications](#global-branch-predictor-specifications) section. 48 | 49 | A few notes on your implementation: 50 | 51 | * Choose your own values for `n` and `m` 52 | * You can use whatever algorithm you want to calculate the BHT index 53 | * You can use Gshare or Gselect 54 | * You may also choose to use [XORShift](https://en.wikipedia.org/wiki/Xorshift), a simple random-number-generation algorithm, to create a hash that will index into your BHT 55 | * Be sure that your BHT size is decided by the parameter `NR_ENTRIES` 56 | * Be sure to remove all unused lines of code leftover from the initial implementation 57 | * Be sure to comment your design clearly 58 | 59 | ### Part 2 Questions 60 | 61 | 1. Share your modified `"bht.sv"` that implements the global two-level adaptive branch predictor. 62 | 2. What specifications did you decide on for your predictor? What is your BHT index generation algorithm? How wide is your GHR? Which address bits do you use for your address? 63 | 3. Briefly explain your reasoning behind the BHT index generation algorithm you chose. 64 | 4. Compare the performance of the [bp benchmarks](https://github.com/sifferman/labs-with-cva6/tree/main/programs/bp) after choosing 3 new values for [`NR_ENTRIES`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/frontend/frontend.sv#L419). Display the 4 hit-rates in a table and explain how and why each program changes its hit-rate as BHT size changes. 65 | 66 | ## Global Branch Predictor Specifications 67 | 68 | *This section describes the specifications required to build a global two-level adaptive branch predictor.* 69 | 70 | For global branch predictors, a global history record (GHR) must be kept. A GHR keeps a record of the past `n` branches using a FIFO method. To maintain the GHR, when a branch has been resolved, the branch result must be shifted into the GHR while simultaneously dropping the `n`th result. 71 | 72 | Similar to a one-level branch predictor, a two-level branch predictor contains a branch history table (BHT) where its entries are (*often*) two-bit saturation counters. However, one-level and two-level predictors differ in how the BHT index is calculated. 73 | 74 | For a global two-level adaptive branch predictor, the BHT index is calculated using the current GHR value and using `m` bits from the resolved branch's program counter. The GHR and PC can either be concatenated together (called Gselect) or xor'ed together (called Gshare). Other BHT index calculation algorithms exist but have little effect on the predictor performance. 75 | 76 | ### 2-Bit Saturation Counter 77 | 78 | ![2-Bit Saturation Counter](bp/saturation_counter.svg) 79 | 80 | ### Global Predictor 81 | 82 | ![Global Predictor](bp/global_predictor.svg) 83 | 84 | ## Extra Credit 85 | 86 | Branches come in many patterns in real code, and it is not uncommon to find that different styles of branch prediction have value in different situations. Extend your predictor to contain a "Tournament" of CVA6's predictor and your global predictor, with a sensible mechanism for updating trust based on successful or failed predictions. 87 | 88 | ## Code Submission 89 | 90 | Submit to the Gradescope Autograder your modified `"bht.sv"` that implements a global two-level adaptive branch predictor. The autograder will verify the hit-rate for several different programs, but your implementation will be verified manually. 91 | -------------------------------------------------------------------------------- /labs/caching.md: -------------------------------------------------------------------------------- 1 | 2 | # Caching 3 | 4 | In this lab, you will add a victim cache to the [CVA6 I-Cache](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/cache_subsystem/cva6_icache.sv). 5 | 6 | ## Pre-Lab Questions 7 | 8 | 1. What is the purpose of a cache? 9 | 2. What is the purpose of a L1, L2, and L3 cache? 10 | 3. For the following questions, assume you have a CPU operating at 3.6 GHz. 11 | 1. How many CPU clock cycles are needed to read from a DDR5-4800 CL40? (Assume a total latency of 16.67ns) 12 | 2. How many CPU clock cycles are needed to read from an SSD over NVMe? (Assume a total latency of 10μs) 13 | 3. How many CPU clock cycles are needed to read from an SSD over SATA? (Assume a total latency of 70μs) 14 | 4. How many CPU clock cycles are needed to read from an HDD over SATA? (Assume a total latency of 10ms) 15 | 4. Why are caches designed using SRAM? What are the pros and cons of the 3 most-common volatile storage elements: flip-flops, SRAM, and DRAM? 16 | 5. Provided is a circuit diagram of an SRAM cell and SRAM array. (BL - Bit Line, WL - Word Line, Q - data) Use them to give a 1-sentence response for each of the following questions: 17 | [![SRAM Cell 6T](./caching/figures/SRAM_Cell_6T.svg)](https://en.wikipedia.org/wiki/Static_random-access_memory) 18 | [![SRAM Array](./caching/figures/SRAM_Array.png)](http://www.barth-dev.de/knowledge-corner/digital-design/memory-array-architectures/) 19 | 1. How is a bit read from an SRAM cell? 20 | 2. How is a bit written to an SRAM cell? 21 | 3. How is a word read from an SRAM array? 22 | 4. How is a word written to an SRAM array? 23 | 6. What is the purpose of a Victim Cache? When is it written to and read from? 24 | 25 | ## Part 1 26 | 27 | In this part, you will finish an implementation of a Victim Cache. 28 | 29 | The implementation should be a fully-associative cache with LRU replacement policy. It should have support for any positive integer cache size, meaning that the LRU algorithm will change a bit depending on the specified size. For a cache size of 1, there is no LRU logic necessary because only one way can be replaced. For a cache size of 2, there should be a single bit specifying which way was least recently accessed, and therefore which way should be replaced. For a cache size >2, there should be a doubly-linked-list (DLL) that orders each way from LRU to MRU; every read/write should bump the corresponding way to the MRU of the DLL, and every write should replace the LRU of the DLL. 30 | 31 | Note that since it is a fully-associative cache, you should not infer a BRAM, but should instead infer an array of registers. 32 | 33 | The module you need to finish is [`"ucsbece154b_victim_cache.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/starter/ucsbece154b_victim_cache.sv), found in [`"labs/caching/part2/starter"`](https://github.com/sifferman/labs-with-cva6/tree/main/labs/caching/part2/starter). Your job will be to fix all the lines labeled `// TODO`. You can simulate your changes with ModelSim using `make sim TOOL=modelsim` (or Verilator 5 using `make sim TOOL=verilator` assuming that you have it set up). A [sample testbench](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/starter/tb/victim_cache_tb.sv) is provided that you may edit as desired. 34 | 35 | ## Part 2 36 | 37 | In this part, you will change the CVA6 filelist to add your victim cache to the I-Cache. Additionally, you will write a simple assembly program that simulates CVA6 with the victim-cache. 38 | 39 | ### Updates to I-Cache 40 | 41 | CVA6's I-Cache is implemented here: [`"cva6/core/cache_subsystem/cva6_icache.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/cache_subsystem/cva6_icache.sv). It is highly parameterizable, allowing you to change the number of entries, the number of ways, and more. A modified version of this implementation is provided to you here, [`"ucsbece154b_icache.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/ucsbece154b_icache.sv), which calls the victim cache that you created in the previous part. 42 | 43 | Try to read through the files and answer the questions below. 44 | 45 | ### Verilog/SystemVerilog Generate 46 | 47 | The starter code for [`"ucsbece154b_victim_cache.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/starter/ucsbece154b_victim_cache.sv) includes a [`generate` construct](https://www.chipverify.com/verilog/verilog-generate-block). A `generate` construct provides the ability for a module to be built based on parameters. These statements are used when an operation or module instance needs to be conditionally included or repeated. 48 | 49 | Here are some examples inside CVA6 of using `generate` blocks: 50 | 51 | * [`generate for` example](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/alu.sv#L42-L50) 52 | * [`generate if` example](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/cva6.sv#L328-L338) 53 | 54 | ### Option `-f ` 55 | 56 | (Nearly) all Verilog/SystemVerilog tools have the command-line option `-f ` which reads the specified file as additional command line arguments. ([Verilator `-f` documentation](https://veripool.org/guide/latest/exe_verilator.html#cmdoption-0)). This is extremely useful and common for providing a list of RTL files, because any files specified will be treated as source files to be compiled. CVA6's [Makefile](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/Makefile#L542) calls `-f` on [`"cva6/core/Flist.cva6"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/Flist.cva6). 57 | 58 | Modify [`"cva6/core/Flist.cva6"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/Flist.cva6) by removing the file [`"cva6/cache_subsystem/cva6_icache.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/cache_subsystem/cva6_icache.sv), and adding the files [`"ucsbece154b_icache.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/ucsbece154b_icache.sv) and [`"ucsbece154b_victim_cache.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/starter/ucsbece154b_victim_cache.sv). (Be sure that you get the paths correct; feel free to move files as needed.) 59 | 60 | ### Simulating the Victim Cache 61 | 62 | Provided are list of hints to help you write a program that meets the specifications described in [Part 2](#part-2-questions) question 4: 63 | 64 | * The simulation should finish in under a minute. 65 | * You should not modify the cache size. 66 | * The victim cache will only be written to when all the ways of a cache index are full. 67 | * You should jump between multiple instructions with PCs that give the same cache index value. 68 | * You can align instructions to a PC with a power of 2 by prefacing the instruction with the directive `.align `. Use this to generate an instruction with a specific PC. Be sure to speed up the simulation by jumping past the `nop` instructions that `.align` inserted. 69 | * You can view the instructions and PCs of an ELF file with the following command: `riscv64-unknown-elf-objdump -d .elf` 70 | * Refer to the [Trace Log](#cva6-trace-log) before looking at the waves to more-quickly see if your program is working. 71 | 72 | ### CVA6 Trace Log 73 | 74 | CVA6 simulations create a log file: `"cva6/trace_hart_00.dasm"`. For every instruction that the simulation ran, it shows the cycle number, VPC, privilege mode, and instruction. It will be a very useful reference for this lab. 75 | 76 | Notes: 77 | 78 | * "Hart" means hardware thread, which is the same thing as a core.* 79 | * Sometimes the core randomly enters Debug mode. (Observe `TOP.ariane_testharness.i_ariane.i_cva6.debug_mode`). As long as the core returns to normal execution, you can ignore this. If the simulation never exits, then your code has a bug. 80 | 81 | Additional resource: [RISC-V Instruction Encoder/Decoder](https://luplab.gitlab.io/rvcodecjs/). 82 | 83 | ### Part 2 Questions 84 | 85 | 1. Using the original CVA6 icache, [`"cva6/cache_subsystem/cva6_icache.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/cache_subsystem/cva6_icache.sv), answer the following questions: 86 | 1. How is the table index calculated? 87 | 2. How is the tag calculated? 88 | 3. Provide a permalink to the logic that causes the core to stall, assuming a miss has occurred and the main memory request hasn't been fulfilled yet. (In `"frontend.sv"`.) 89 | 2. Using the modified icache, [`"ucsbece154b_icache.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/caching/part2/ucsbece154b_icache.sv), answer the following questions: 90 | 1. When is the victim cache written to? 91 | 2. What occurs in the `VICTIM_HIT` state? 92 | 3. What occurs in the `VICTIM_MISS` state? 93 | 3. Show the changes you made to [`"cva6/core/Flist.cva6"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/Flist.cva6). 94 | 4. Provide a program that demonstrates the following behaviors of the I$ and victim cache, and provide waveform screenshots of each event. 95 | 1. An I$ miss. 96 | 2. An I$ hit. What value+tag was read? 97 | 3. A write to the victim cache. What value+tag was written? 98 | 4. A victim cache hit. What value+tag was read? 99 | -------------------------------------------------------------------------------- /labs/caching/figures/SRAM_Array.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sifferman/labs-with-cva6/ab590c6b8dc0cf1032f51438c7aad7a2012f4a73/labs/caching/figures/SRAM_Array.png -------------------------------------------------------------------------------- /labs/caching/figures/SRAM_Cell_6T.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 22 | 29 | 36 | 43 | 50 | 57 | 64 | 71 | 78 | 85 | 92 | 99 | 106 | 113 | 120 | 121 | 144 | 149 | 150 | 152 | 153 | 155 | image/svg+xml 156 | 158 | 159 | 160 | 161 | 166 | 177 | 188 | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 221 | 225 | 229 | 233 | 237 | 242 | 252 | 262 | 272 | 276 | 280 | 284 | 289 | 293 | 297 | 301 | 306 | 316 | 326 | 336 | 339 | 343 | 347 | 348 | 353 | 358 | 363 | 373 | 383 | 388 | 393 | 398 | 408 | 418 | 421 | 425 | 429 | 430 | 440 | 450 | 460 | 470 | 475 | 482 | 489 | 496 | 500 | V 511 | DD 522 | 523 | 527 | M 538 | 6 549 | 550 | 553 | M 564 | 5 575 | 576 | 580 | M 591 | 2 602 | 603 | 607 | M 618 | 4 629 | 630 | 634 | M 645 | 3 656 | 657 | 661 | M 672 | 1 683 | 684 | WL 695 | BL 706 | BL 717 | 721 | Q 732 | Q 743 | 747 | 748 | 749 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | fusesoc.conf 3 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # File: Makefile 3 | # Description: Makefile for running the victim cache testbench. 4 | # 5 | CORE_FILE = victim_cache.core 6 | VENDOR = ucsbece154b 7 | LIBRARY = caching 8 | CORE = victim_cache 9 | VERSION = 1.0.0 10 | PROJECT = ${VENDOR}:${LIBRARY}:${CORE}:${VERSION} 11 | 12 | TESTS = $(shell find tests -type f) 13 | 14 | # ======================================================= # 15 | 16 | .PHONY: init build sim lint_1way lint_2way lint_4way lint_16way synth clean 17 | 18 | # ======================================================= # 19 | # Targets 20 | 21 | sim: fusesoc.conf 22 | fusesoc run --target tb --tool ${TOOL} ${PROJECT} 23 | 24 | init: fusesoc.conf 25 | 26 | lint_1way: fusesoc.conf 27 | fusesoc run --target $@ ${PROJECT} 28 | lint_2way: fusesoc.conf 29 | fusesoc run --target $@ ${PROJECT} 30 | lint_4way: fusesoc.conf 31 | fusesoc run --target $@ ${PROJECT} 32 | lint_16way: fusesoc.conf 33 | fusesoc run --target $@ ${PROJECT} 34 | 35 | synth: 36 | cd synth && make 37 | 38 | clean: 39 | rm -rf build fusesoc.conf 40 | 41 | # ======================================================= # 42 | # Files 43 | 44 | fusesoc.conf: 45 | pip3 install --user -r requirements.txt 46 | rm -f fusesoc.conf 47 | fusesoc library add ${VENDOR}_${LIBRARY}_${CORE}_${VERSION} . --sync-type=local 48 | 49 | # ======================================================= # 50 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/README.md: -------------------------------------------------------------------------------- 1 | # Victim Cache Starter 2 | 3 | ## To-Do 4 | 5 | Finish `"ucsbece154b_victim_cache.sv"` and `"tb/victim_cache_tb.sv"`. 6 | 7 | ## Usage 8 | 9 | ```bash 10 | make sim TOOL=modelsim 11 | gtkwave build/ucsbece154b_caching_victim_cache_1.0.0/tb-verilator/dump.fst 12 | make lint_1way 13 | make lint_2way 14 | make lint_4way 15 | make lint_16way 16 | make synth 17 | ``` 18 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/requirements.txt: -------------------------------------------------------------------------------- 1 | fusesoc 2 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/synth/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: clean stats timing 3 | 4 | build/synth.json: 5 | @mkdir -p build 6 | cd build && yosys -l yosys.log -p 'tcl ../yosys.tcl' 7 | 8 | build/synth.asc: build/synth.json 9 | @mkdir -p build 10 | cd build && nextpnr-ice40 -l next.log --hx8k --package ct256 --pcf-allow-unconstrained --json synth.json --asc synth.asc 11 | 12 | build/synth.stat: build/synth.asc 13 | @mkdir -p build 14 | cd build && icebox_stat synth.asc > synth.stat 15 | 16 | build/synth.tim: build/synth.asc 17 | @mkdir -p build 18 | cd build && icetime -r synth.tim synth.asc 19 | 20 | stats: build/synth.stat 21 | 22 | timing: build/synth.tim 23 | 24 | clean: 25 | rm -rf build 26 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/synth/yosys.tcl: -------------------------------------------------------------------------------- 1 | 2 | set top ucsbece154b_victim_cache 3 | set name ucsbece154b_caching_victim_cache_1.0.0 4 | 5 | yosys -import 6 | plugin -i systemverilog 7 | 8 | yosys -import 9 | 10 | read_systemverilog -noinfo -nonote {../../unread.sv} 11 | read_systemverilog -PNR_ENTRIES=4 -PADDR_WIDTH=16 -PLINE_WIDTH=16 -noinfo -nonote {../../ucsbece154b_victim_cache.sv} 12 | 13 | synth_ice40 -top $top 14 | opt 15 | write_json synth.json 16 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/tb/assert.svh: -------------------------------------------------------------------------------- 1 | /* 2 | * File: assert.svh 3 | * Description: Macro to help with handling errors in testbences. 4 | */ 5 | 6 | `ifndef __ASSERT_SVH 7 | `define __ASSERT_SVH 8 | 9 | `define ASSERT(CONDITION, MESSAGE) if ((CONDITION)==1'b1); else begin $display($sformatf("Error: %s", $sformatf MESSAGE)); ERROR = 1; end 10 | 11 | `endif 12 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/tb/victim_cache_tb.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * File: victim_cache_tb.sv 3 | * Description: Example testbench to test a victim cache. 4 | */ 5 | 6 | `include "assert.svh" 7 | 8 | module victim_cache_tb (); 9 | 10 | parameter int unsigned ADDR_WIDTH = 56; 11 | parameter int unsigned LINE_WIDTH = 128; 12 | 13 | logic clk_i = 0; 14 | logic rst_ni; 15 | logic flush_i; 16 | logic en_i; 17 | logic [ADDR_WIDTH-1:0] raddr_i; 18 | logic [LINE_WIDTH-1:0] rdata_o; 19 | logic hit_o; 20 | logic we_i; 21 | logic [ADDR_WIDTH-1:0] waddr_i; 22 | logic [LINE_WIDTH-1:0] wdata_i; 23 | 24 | always #10 clk_i = ~clk_i; 25 | ucsbece154b_victim_cache #( 26 | .ADDR_WIDTH(ADDR_WIDTH), 27 | .LINE_WIDTH(LINE_WIDTH), 28 | .NR_ENTRIES(2) // modify as needed 29 | ) victim_cache (.*); 30 | 31 | logic ERROR = 0; 32 | initial begin 33 | $dumpfile( "dump.fst" ); 34 | $dumpvars; 35 | $display( "Begin simulation." ); 36 | //\\ =========================== \\// 37 | 38 | 39 | 40 | // reset/initialize 41 | rst_ni = 1'b1; 42 | flush_i = 1'b1; 43 | en_i = 1'b1; 44 | 45 | raddr_i = '0; 46 | 47 | we_i = '0; 48 | waddr_i = '0; 49 | wdata_i = '0; 50 | 51 | @(negedge clk_i); 52 | @(negedge clk_i); 53 | 54 | // write beef to 1000 55 | rst_ni = 1'b1; 56 | flush_i = 1'b0; 57 | 58 | we_i = 1'b1; 59 | waddr_i = (ADDR_WIDTH)'(32'h1000); 60 | wdata_i = (LINE_WIDTH)'(32'hbeef); 61 | 62 | @(negedge clk_i); 63 | 64 | // assert read beef at 1000 65 | raddr_i = (ADDR_WIDTH)'(32'h1000); 66 | 67 | we_i = 1'b0; 68 | 69 | #1; 70 | `ASSERT(hit_o==1'b1, ("Write then read did not give hit")); 71 | `ASSERT(rdata_o==(LINE_WIDTH)'(32'hbeef), ("Write then read gave wrong data")); 72 | 73 | @(negedge clk_i); 74 | 75 | // write bead to 2000 76 | we_i = 1'b1; 77 | waddr_i = (ADDR_WIDTH)'(32'h2000); 78 | wdata_i = (LINE_WIDTH)'(32'hbead); 79 | 80 | @(negedge clk_i); 81 | 82 | we_i = 1'b0; 83 | 84 | // assert read beef at 1000 85 | raddr_i = (ADDR_WIDTH)'(32'h1000); 86 | 87 | #1; 88 | `ASSERT(hit_o==1'b1, ("2 writes then read did not give hit")); 89 | `ASSERT(rdata_o==(LINE_WIDTH)'(32'hbeef), ("2 writes then read gave wrong data")); 90 | 91 | // assert read bead at 2000 92 | raddr_i = (ADDR_WIDTH)'(32'h2000); 93 | 94 | #1; 95 | `ASSERT(hit_o==1'b1, ("2 writes then read did not give hit")); 96 | `ASSERT(rdata_o==(LINE_WIDTH)'(32'hbead), ("2 writes then write then read gave wrong data")); 97 | 98 | 99 | 100 | //\\ =========================== \\// 101 | $display("End simulation."); 102 | if (ERROR) $fatal(); 103 | $stop; 104 | end 105 | 106 | endmodule 107 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/ucsbece154b_victim_cache.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ucsbece154b_victim_cache.sv 3 | * Description: Starter file for a victim cache. 4 | * Directions: 5 | * The implementation should be a fully-associative cache with LRU replacement policy. It should 6 | * have support for any positive integer cache size, meaning that the LRU algorithm will change a 7 | * bit depending on the specified size. For a cache size of 1, there is no LRU logic necessary 8 | * because only one way can be replaced. For a cache size of 2, there should be a single bit 9 | * specifying which way was least recently accessed, and therefore which way should be replaced. 10 | * For a cache size >2, there should be a doubly-linked-list (DLL) that orders each way from LRU 11 | * to MRU; every read/write should bump the corresponding way to the MRU of the DLL, and every 12 | * write should replace the LRU of the DLL. 13 | */ 14 | 15 | module ucsbece154b_victim_cache #( 16 | parameter int unsigned ADDR_WIDTH = 56, 17 | parameter int unsigned LINE_WIDTH = 128, 18 | parameter int unsigned NR_ENTRIES = 4 19 | ) ( 20 | input logic clk_i, 21 | input logic rst_ni, 22 | input logic flush_i, 23 | input logic en_i, 24 | 25 | input logic [ADDR_WIDTH-1:0] raddr_i, 26 | output logic [LINE_WIDTH-1:0] rdata_o, 27 | output logic hit_o, 28 | 29 | input logic we_i, 30 | input logic [ADDR_WIDTH-1:0] waddr_i, 31 | input logic [LINE_WIDTH-1:0] wdata_i 32 | ); 33 | 34 | localparam OFFSET_WIDTH = 1; // TODO (in terms of ADDR_WIDTH and LINE_WIDTH) 35 | localparam TAG_SIZE = 1; // TODO (in terms of ADDR_WIDTH and LINE_WIDTH) 36 | 37 | logic [TAG_SIZE-1:0] rtag, wtag; 38 | assign rtag = raddr_i[OFFSET_WIDTH +: TAG_SIZE]; // "indexed part-select" operator 39 | assign wtag = waddr_i[OFFSET_WIDTH +: TAG_SIZE]; 40 | unread tag_unread (.d_i((|raddr_i[OFFSET_WIDTH-1:0])|(|waddr_i[OFFSET_WIDTH-1:0]))); 41 | 42 | integer i = 0; 43 | unread i_unread (.d_i(|i)); 44 | 45 | 46 | 47 | 48 | if (NR_ENTRIES==1) begin : one_register 49 | // 1-way fully associative cache 50 | // no LRU needed 51 | // 52 | 53 | 54 | 55 | 56 | struct packed { 57 | logic [LINE_WIDTH-1:0] data; 58 | logic [TAG_SIZE-1:0] tag; 59 | logic valid; 60 | } MEM_d, MEM_q; 61 | 62 | assign hit_o = 0; // TODO 63 | assign rdata_o = 0; // TODO 64 | 65 | always_comb begin 66 | MEM_d = MEM_q; 67 | if (en_i && we_i) begin 68 | MEM_d.data = '0; // TODO 69 | MEM_d.tag = '0; // TODO 70 | MEM_d.valid = '0; // TODO 71 | end 72 | end 73 | always_ff @(posedge clk_i) begin 74 | if (!rst_ni || flush_i || !en_i) begin 75 | MEM_q <= '0; 76 | end else begin 77 | MEM_q <= MEM_d; 78 | end 79 | end 80 | 81 | 82 | 83 | 84 | // 85 | end else if (NR_ENTRIES==2) begin : lru_bit 86 | // 2-way fully associative cache 87 | // LRU is 1 bit to show which way should be replaced on a write 88 | // 89 | 90 | 91 | 92 | // cache memory 93 | struct packed { 94 | logic [LINE_WIDTH-1:0] data; 95 | logic [TAG_SIZE-1:0] tag; 96 | logic valid; 97 | } MEM_d[2], MEM_q[2]; 98 | 99 | // lru register 100 | logic lru_d, lru_q; 101 | 102 | always_comb begin 103 | // combinational nets 104 | rdata_o = 'x; 105 | hit_o = 1'b0; 106 | // registers 107 | lru_d = lru_q; 108 | MEM_d = MEM_q; 109 | 110 | // assign read port 111 | for (i = 0; i < 2; i++) begin 112 | if (en_i && MEM_q[i].valid && (rtag==MEM_q[i].tag)) begin 113 | hit_o = 0; // TODO 114 | rdata_o = 0; // TODO 115 | lru_d = 0; // TODO 116 | end 117 | end 118 | // handle write port 119 | if (en_i && we_i) begin 120 | MEM_d[lru_d].data = 0; // TODO 121 | MEM_d[lru_d].tag = 0; // TODO 122 | MEM_d[lru_d].valid = 0; // TODO 123 | lru_d = 0; // TODO 124 | end 125 | end 126 | always_ff @(posedge clk_i) begin 127 | if ((!rst_ni) || flush_i || (!en_i)) begin 128 | MEM_q[0].valid <= '0; 129 | MEM_q[1].valid <= '0; 130 | lru_q <= '0; 131 | end else begin 132 | MEM_q <= MEM_d; 133 | lru_q <= lru_d; 134 | end 135 | end 136 | 137 | 138 | 139 | 140 | // 141 | end else begin : lru_linked_list 142 | // n-way fully associative cache 143 | // LRU implemented as linked list 144 | // 145 | 146 | 147 | 148 | // DLL Structure // 149 | // MRU - ... - way.mru - way - way.lru - ... - LRU // 150 | 151 | typedef logic [$clog2(NR_ENTRIES)-1:0] way_index_t; 152 | 153 | struct packed { 154 | logic [TAG_SIZE-1:0] tag; 155 | way_index_t lru; // less recently used 156 | way_index_t mru; // more recently used 157 | logic valid; 158 | } dll_d[NR_ENTRIES], dll_q[NR_ENTRIES]; 159 | 160 | // lru register 161 | way_index_t lru_d, lru_q, mru_d, mru_q; 162 | 163 | // index to bump 164 | way_index_t read_index, write_index; 165 | 166 | 167 | // separate the data from the dll help with optimization 168 | logic [LINE_WIDTH-1:0] data_d[NR_ENTRIES], data_q[NR_ENTRIES]; 169 | 170 | 171 | always_comb begin 172 | // combinational nets 173 | rdata_o = 'x; 174 | hit_o = 1'b0; 175 | read_index = 'x; 176 | write_index = 'x; 177 | // registers 178 | lru_d = lru_q; 179 | mru_d = mru_q; 180 | data_d = data_q; 181 | dll_d = dll_q; 182 | 183 | // assign read port 184 | for (i = 0; i < NR_ENTRIES; i++) begin 185 | if (en_i && dll_d[i].valid && (rtag==dll_d[i].tag)) begin 186 | hit_o = 1'b1; 187 | read_index = way_index_t'(i); 188 | break; 189 | end 190 | end 191 | if (hit_o) begin 192 | // read data 193 | rdata_o = '0; // TODO 194 | 195 | // bump read_index to mru 196 | // TODO 197 | end 198 | // handle write port 199 | if (en_i && we_i) begin 200 | write_index = lru_d; 201 | 202 | // write data 203 | // TODO 204 | 205 | // bump write_index to mru 206 | // TODO 207 | end 208 | // handle reset/flush/disable 209 | if (!rst_ni || flush_i || !en_i) begin 210 | for (i = 0; i < NR_ENTRIES; i++) begin 211 | dll_d[i].valid = 1'b0; 212 | dll_d[i].lru = way_index_t'(i-1); 213 | dll_d[i].mru = way_index_t'(i+1); 214 | end 215 | lru_d = '0; 216 | mru_d = way_index_t'(NR_ENTRIES-1); 217 | end 218 | end 219 | always_ff @(posedge clk_i) begin 220 | data_q <= data_d; 221 | dll_q <= dll_d; 222 | lru_q <= lru_d; 223 | mru_q <= mru_d; 224 | end 225 | 226 | 227 | 228 | 229 | // 230 | end 231 | 232 | endmodule 233 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/unread.sv: -------------------------------------------------------------------------------- 1 | // Copyright 2018 ETH Zurich and University of Bologna. 2 | // Copyright and related rights are licensed under the Solderpad Hardware 3 | // License, Version 0.51 (the "License"); you may not use this file except in 4 | // compliance with the License. You may obtain a copy of the License at 5 | // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law 6 | // or agreed to in writing, software, hardware and materials distributed under 7 | // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 8 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 9 | // specific language governing permissions and limitations under the License. 10 | // 11 | // Author: Florian Zaruba, ETH Zurich 12 | // Date: 29.10.2018 13 | // Description: Dummy circuit to mitigate Open Pin warnings 14 | 15 | /* verilator lint_off UNUSED */ 16 | module unread ( 17 | input logic d_i 18 | ); 19 | 20 | endmodule 21 | /* verilator lint_on UNUSED */ 22 | -------------------------------------------------------------------------------- /labs/caching/part2/starter/victim_cache.core: -------------------------------------------------------------------------------- 1 | CAPI=2: 2 | name: ucsbece154b:caching:victim_cache:1.0.0 3 | description: UCSB ECE154B Victim Cache 4 | 5 | filesets: 6 | # default 7 | rtl: 8 | files: 9 | - unread.sv 10 | - ucsbece154b_victim_cache.sv 11 | file_type: systemVerilogSource 12 | 13 | # tb 14 | tb: 15 | files: 16 | - tb/assert.svh: {is_include_file: true} 17 | - tb/victim_cache_tb.sv 18 | file_type: systemVerilogSource 19 | 20 | 21 | targets: 22 | default: &default 23 | filesets: 24 | - rtl 25 | 26 | tb: # fusesoc run --target tb ucsbece154b:caching:victim_cache 27 | <<: *default 28 | description: Simulate the design 29 | toplevel: victim_cache_tb 30 | default_tool: verilator 31 | filesets_append: 32 | - tb 33 | tools: 34 | verilator: 35 | verilator_options: 36 | - --binary 37 | - -j 0 38 | - --trace-fst 39 | - --trace-structs 40 | - --x-assign unique 41 | - --x-initial unique 42 | - -Wno-fatal 43 | run_options: 44 | - +verilator+rand+reset+2 45 | 46 | 47 | lint: &lint 48 | <<: *default 49 | description: Lint the design 50 | toplevel: ucsbece154b_victim_cache 51 | default_tool: verilator 52 | tools: 53 | verilator: 54 | mode: lint-only 55 | verilator_options: 56 | - -Wall 57 | - +1364-2005ext+v 58 | - +1800-2012ext+sv 59 | 60 | 61 | lint_1way: # fusesoc run --target lint_1way ucsbece154b:caching:victim_cache 62 | <<: *lint 63 | parameters: 64 | - NR_ENTRIES=1 65 | lint_2way: # fusesoc run --target lint_2way ucsbece154b:caching:victim_cache 66 | <<: *lint 67 | parameters: 68 | - NR_ENTRIES=2 69 | lint_4way: # fusesoc run --target lint_4way ucsbece154b:caching:victim_cache 70 | <<: *lint 71 | parameters: 72 | - NR_ENTRIES=4 73 | lint_16way: # fusesoc run --target lint_16way ucsbece154b:caching:victim_cache 74 | <<: *lint 75 | parameters: 76 | - NR_ENTRIES=16 77 | 78 | 79 | parameters: 80 | NR_ENTRIES: 81 | datatype: int 82 | description: Set victim cache size 83 | paramtype: vlogparam 84 | -------------------------------------------------------------------------------- /labs/caching/part2/ucsbece154b_icache.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ucsbece154b_icache.sv 3 | * Description: Originally "cva6_icache.sv", modified so that it has a victim cache. 4 | */ 5 | 6 | // Copyright 2018 ETH Zurich and University of Bologna. 7 | // Copyright and related rights are licensed under the Solderpad Hardware 8 | // License, Version 0.51 (the "License"); you may not use this file except in 9 | // compliance with the License. You may obtain a copy of the License at 10 | // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law 11 | // or agreed to in writing, software, hardware and materials distributed under 12 | // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 13 | // CONDITIONS OF ANY KIND, either express or implied. See the License for the 14 | // specific language governing permissions and limitations under the License. 15 | // 16 | // Author: Michael Schaffner , ETH Zurich 17 | // Date: 15.08.2018 18 | // Description: Instruction cache that is compatible with openpiton. 19 | // 20 | // Some notes: 21 | // 22 | // 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped 23 | // to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width 24 | // of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be 25 | // consumed unconditionally. 26 | // 27 | // 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored) 28 | // 29 | // 3) NC accesses to I/O space are expected to return 32bit from memory. 30 | // 31 | 32 | 33 | module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #( 34 | parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 0, // ID to be used for read transactions 35 | parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig // contains cacheable regions 36 | ) ( 37 | input logic clk_i, 38 | input logic rst_ni, 39 | 40 | input logic flush_i, // flush the icache, flush and kill have to be asserted together 41 | input logic en_i, // enable icache 42 | output logic miss_o, // to performance counter 43 | // address translation requests 44 | input icache_areq_i_t areq_i, 45 | output icache_areq_o_t areq_o, 46 | // data requests 47 | input icache_dreq_i_t dreq_i, 48 | output icache_dreq_o_t dreq_o, 49 | // refill port 50 | input logic mem_rtrn_vld_i, 51 | input icache_rtrn_t mem_rtrn_i, 52 | output logic mem_data_req_o, 53 | input logic mem_data_ack_i, 54 | output icache_req_t mem_data_o 55 | ); 56 | 57 | // signals 58 | logic cache_en_d, cache_en_q; // cache is enabled 59 | logic [riscv::VLEN-1:0] vaddr_d, vaddr_q; 60 | logic paddr_is_nc; // asserted if physical address is non-cacheable 61 | logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare 62 | logic cache_rden; // triggers cache lookup 63 | logic cache_wren; // triggers write to cacheline 64 | logic cmp_en_d, cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal. 65 | logic flush_d, flush_q; // used to register and signal pending flushes 66 | 67 | // replacement strategy 68 | logic update_lfsr; // shift the LFSR 69 | logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered 70 | logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement 71 | logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace 72 | logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) 73 | logic all_ways_valid; // we need to switch repl strategy since all are valid 74 | 75 | // invalidations / flushing 76 | logic inv_en; // incoming invalidations 77 | logic inv_d, inv_q; // invalidation in progress 78 | logic flush_en, flush_done; // used to flush cache entries 79 | logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries 80 | 81 | // mem arrays 82 | logic cl_we; // write enable to memory array 83 | logic [ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array 84 | logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array 85 | logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line 86 | logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag 87 | logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem 88 | logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache 89 | logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache 90 | logic [ICACHE_LINE_WIDTH-1:0] cl_wdata; // value written to cache line 91 | logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline 92 | logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline 93 | logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs 94 | logic vld_we; // valid bits write enable 95 | logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write 96 | logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs 97 | logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit 98 | 99 | // cpmtroller FSM 100 | typedef enum logic[3:0] {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS, VICTIM_HIT, VICTIM_MISS} state_e; 101 | state_e state_d, state_q; 102 | 103 | // victim cache 104 | logic victim_cache_we; 105 | logic [ICACHE_LINE_WIDTH-1:0] victim_cache_rdata_d, victim_cache_rdata_q; 106 | logic victim_cache_hit; 107 | logic [riscv::PLEN-1:0] victim_cache_waddr; 108 | 109 | /////////////////////////////////////////////////////// 110 | // address -> cl_index mapping, interface plumbing 111 | /////////////////////////////////////////////////////// 112 | 113 | // extract tag from physical address, check if NC 114 | assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q; 115 | 116 | // noncacheable if request goes to I/O space, or if cache is disabled 117 | assign paddr_is_nc = (~cache_en_q) | (~ariane_pkg::is_inside_cacheable_regions(ArianeCfg, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}})); 118 | 119 | // pass exception through 120 | assign dreq_o.ex = areq_i.fetch_exception; 121 | 122 | // latch this in case we have to stall later on 123 | // make sure this is 32bit aligned 124 | assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q; 125 | assign areq_o.fetch_vaddr = {vaddr_q>>2, 2'b0}; 126 | 127 | // split virtual address into index and offset to address cache arrays 128 | assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; 129 | 130 | 131 | if (ArianeCfg.AxiCompliant) begin : gen_axi_offset 132 | // if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory 133 | assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} : 134 | ( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case 135 | cl_offset_q; 136 | // request word address instead of cl address in case of NC access 137 | assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit 138 | {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl 139 | end else begin : gen_piton_offset 140 | // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. 141 | // since the piton cache system replicates the data, we can always index the full CL 142 | assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} : 143 | cl_offset_q; 144 | 145 | // request word address instead of cl address in case of NC access 146 | assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit 147 | {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl 148 | end 149 | 150 | 151 | assign mem_data_o.tid = RdTxId; 152 | 153 | assign mem_data_o.nc = paddr_is_nc; 154 | // way that is being replaced 155 | assign mem_data_o.way = repl_way; 156 | assign dreq_o.vaddr = vaddr_q; 157 | 158 | // invalidations take two cycles 159 | assign inv_d = inv_en; 160 | 161 | /////////////////////////////////////////////////////// 162 | // main control logic 163 | /////////////////////////////////////////////////////// 164 | logic addr_ni; 165 | assign addr_ni = is_inside_nonidempotent_regions(ArianeCfg, areq_i.fetch_paddr); 166 | always_comb begin : p_fsm 167 | // default assignment 168 | state_d = state_q; 169 | cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush 170 | flush_en = 1'b0; 171 | cmp_en_d = 1'b0; 172 | cache_rden = 1'b0; 173 | cache_wren = 1'b0; 174 | inv_en = 1'b0; 175 | flush_d = flush_q | flush_i; // register incoming flush 176 | victim_cache_we = 1'b0; 177 | 178 | // interfaces 179 | dreq_o.ready = 1'b0; 180 | areq_o.fetch_req = 1'b0; 181 | dreq_o.valid = 1'b0; 182 | mem_data_req_o = 1'b0; 183 | // performance counter 184 | miss_o = 1'b0; 185 | 186 | // handle invalidations unconditionally 187 | // note: invald are mutually exclusive with 188 | // ifills, since both arrive over the same IF 189 | // however, we need to make sure below that we 190 | // do not trigger a cache readout at the same time... 191 | if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin 192 | inv_en = 1'b1; 193 | end 194 | 195 | unique case (state_q) 196 | ////////////////////////////////// 197 | // this clears all valid bits 198 | FLUSH: begin 199 | flush_en = 1'b1; 200 | if (flush_done) begin 201 | state_d = IDLE; 202 | flush_d = 1'b0; 203 | // if the cache was not enabled set this 204 | cache_en_d = en_i; 205 | end 206 | end 207 | ////////////////////////////////// 208 | // wait for an incoming request 209 | IDLE: begin 210 | // only enable tag comparison if cache is enabled 211 | cmp_en_d = cache_en_q; 212 | 213 | // handle pending flushes, or perform cache clear upon enable 214 | if (flush_d || (en_i && !cache_en_q)) begin 215 | state_d = FLUSH; 216 | // wait for incoming requests 217 | end else begin 218 | // mem requests are for sure invals here 219 | if (!mem_rtrn_vld_i) begin 220 | dreq_o.ready = 1'b1; 221 | // we have a new request 222 | if (dreq_i.req) begin 223 | cache_rden = 1'b1; 224 | state_d = READ; 225 | end 226 | end 227 | if (dreq_i.kill_s1) begin 228 | state_d = IDLE; 229 | end 230 | end 231 | end 232 | ////////////////////////////////// 233 | // check whether we have a hit 234 | // in case the cache is disabled, 235 | // or in case the address is NC, we 236 | // reuse the miss mechanism to handle 237 | // the request 238 | READ: begin 239 | areq_o.fetch_req = '1; 240 | // only enable tag comparison if cache is enabled 241 | cmp_en_d = cache_en_q; 242 | // readout speculatively 243 | cache_rden = cache_en_q; 244 | 245 | if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni) ) begin 246 | // check if we have to flush 247 | if (flush_d) begin 248 | state_d = IDLE; 249 | // we have a hit or an exception output valid result 250 | end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin 251 | dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case 252 | state_d = IDLE; 253 | 254 | // we can accept another request 255 | // and stay here, but only if no inval is coming in 256 | // note: we are not expecting ifill return packets here... 257 | if (!mem_rtrn_vld_i) begin 258 | dreq_o.ready = 1'b1; 259 | if (dreq_i.req) begin 260 | state_d = READ; 261 | end 262 | end 263 | // if a request is being killed at this stage, 264 | // we have to bail out and wait for the address translation to complete 265 | if (dreq_i.kill_s1) begin 266 | state_d = IDLE; 267 | end 268 | end else if (victim_cache_hit) begin 269 | state_d = VICTIM_HIT; 270 | victim_cache_we = 1'b1; // replace with `cl_rdata[repl_way]` 271 | // save 272 | // we have a miss / NC transaction 273 | end else if (dreq_i.kill_s2) begin 274 | state_d = IDLE; 275 | end else if (!inv_q) begin 276 | // only write to victim cache when icache replacement will occur 277 | if (all_ways_valid) begin 278 | state_d = VICTIM_MISS; 279 | victim_cache_we = 1'b1; // replace with `cl_rdata[repl_way]` 280 | end else begin 281 | cmp_en_d = 1'b0; 282 | // only count this as a miss if the cache is enabled, and 283 | // the address is cacheable 284 | // send out ifill request 285 | mem_data_req_o = 1'b1; 286 | if (mem_data_ack_i) begin 287 | miss_o = ~paddr_is_nc; 288 | state_d = MISS; 289 | end 290 | end 291 | end 292 | // bail out if this request is being killed (and we missed on the TLB) 293 | end else if (dreq_i.kill_s2 || flush_d) begin 294 | state_d = KILL_ATRANS; 295 | end 296 | end 297 | ////////////////////////////////// 298 | // wait until the memory transaction 299 | // returns. do not write to memory 300 | // if the nc bit is set. 301 | MISS: begin 302 | // note: this is mutually exclusive with ICACHE_INV_REQ, 303 | // so we do not have to check for invals here 304 | if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin 305 | state_d = IDLE; 306 | // only return data if request is not being killed 307 | if (!(dreq_i.kill_s2 || flush_d)) begin 308 | dreq_o.valid = 1'b1; 309 | // only write to cache if this address is cacheable 310 | cache_wren = ~paddr_is_nc; 311 | end 312 | // bail out if this request is being killed 313 | end else if (dreq_i.kill_s2 || flush_d) begin 314 | state_d = KILL_MISS; 315 | end 316 | end 317 | ////////////////////////////////// 318 | // killed address translation, 319 | // wait until paddr is valid, and go 320 | // back to idle 321 | KILL_ATRANS: begin 322 | areq_o.fetch_req = '1; 323 | if (areq_i.fetch_valid) begin 324 | state_d = IDLE; 325 | end 326 | end 327 | ////////////////////////////////// 328 | // killed miss, 329 | // wait until memory responds and 330 | // go back to idle 331 | KILL_MISS: begin 332 | if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin 333 | state_d = IDLE; 334 | end 335 | end 336 | ////////////////////////////////// 337 | // victim cache hit after icache sram miss 338 | // value is being written to victim cache 339 | // then we need to write to icache sram 340 | VICTIM_HIT: begin 341 | cache_wren = 1'b1; 342 | state_d = IDLE; 343 | end 344 | ////////////////////////////////// 345 | // victim cache miss after icache sram miss 346 | // value is being written to victim cache 347 | // then we need need to do memory request write to icache sram 348 | VICTIM_MISS: begin 349 | cmp_en_d = 1'b0; 350 | // only count this as a miss if the cache is enabled, and 351 | // the address is cacheable 352 | // send out ifill request 353 | mem_data_req_o = 1'b1; 354 | if (mem_data_ack_i) begin 355 | miss_o = ~paddr_is_nc; 356 | state_d = MISS; 357 | end 358 | end 359 | default: begin 360 | // we should never get here 361 | state_d = FLUSH; 362 | end 363 | endcase // state_q 364 | end 365 | 366 | ucsbece154b_victim_cache #( 367 | .ADDR_WIDTH(riscv::PLEN), 368 | .LINE_WIDTH(ICACHE_LINE_WIDTH), 369 | .NR_ENTRIES(1) 370 | ) victim_cache ( 371 | .clk_i, 372 | .rst_ni, 373 | .flush_i, 374 | .en_i, 375 | .raddr_i(areq_i.fetch_paddr), 376 | .rdata_o(victim_cache_rdata_d), 377 | .hit_o(victim_cache_hit), 378 | .we_i(victim_cache_we), 379 | .waddr_i(victim_cache_waddr), 380 | .wdata_i(cl_rdata[repl_way]) 381 | ); 382 | 383 | /////////////////////////////////////////////////////// 384 | // valid bit invalidation and replacement strategy 385 | /////////////////////////////////////////////////////// 386 | 387 | // note: it cannot happen that we get an invalidation + a cl replacement 388 | // in the same cycle as these requests arrive via the same interface 389 | // flushes take precedence over invalidations (it is ok if we ignore 390 | // the inval since the cache is cleared anyway) 391 | 392 | assign flush_cnt_d = (flush_done) ? '0 : 393 | (flush_en) ? flush_cnt_q + 1 : 394 | flush_cnt_q; 395 | 396 | assign flush_done = (flush_cnt_q==(ICACHE_NUM_WORDS-1)); 397 | 398 | // invalidation/clearing address 399 | // flushing takes precedence over invals 400 | assign vld_addr = (flush_en) ? flush_cnt_q : 401 | (inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] : 402 | cl_index; 403 | 404 | assign vld_req = (flush_en || cache_rden) ? '1 : 405 | (mem_rtrn_i.inv.all && inv_en) ? '1 : 406 | (mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) : 407 | repl_way_oh_q; 408 | 409 | assign vld_wdata = (cache_wren) ? '1 : '0; 410 | 411 | assign vld_we = (cache_wren | inv_en | flush_en); 412 | // assign vld_req = (vld_we | cache_rden); 413 | 414 | 415 | // chose random replacement if all are valid 416 | assign update_lfsr = cache_wren & all_ways_valid; 417 | assign repl_way = (all_ways_valid) ? rnd_way : inv_way; 418 | assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q; 419 | 420 | // enable signals for memory arrays 421 | assign cl_req = (cache_rden) ? '1 : 422 | (cache_wren) ? repl_way_oh_q : 423 | '0; 424 | assign cl_we = cache_wren; 425 | 426 | // value written to cache 427 | assign cl_wdata = (state_q==VICTIM_HIT) ? victim_cache_rdata_q : mem_rtrn_i.data; 428 | assign victim_cache_waddr = {cl_tag_rdata[repl_way], cl_index, ICACHE_OFFSET_WIDTH'('0)}; 429 | 430 | 431 | // find invalid cache line 432 | lzc #( 433 | .WIDTH ( ICACHE_SET_ASSOC ) 434 | ) i_lzc ( 435 | .in_i ( ~vld_rdata ), 436 | .cnt_o ( inv_way ), 437 | .empty_o ( all_ways_valid ) 438 | ); 439 | 440 | // generate random cacheline index 441 | lfsr #( 442 | .LfsrWidth ( 8 ), 443 | .OutWidth ( $clog2(ariane_pkg::ICACHE_SET_ASSOC)) 444 | ) i_lfsr ( 445 | .clk_i ( clk_i ), 446 | .rst_ni ( rst_ni ), 447 | .en_i ( update_lfsr ), 448 | .out_o ( rnd_way ) 449 | ); 450 | 451 | 452 | /////////////////////////////////////////////////////// 453 | // tag comparison, hit generation 454 | /////////////////////////////////////////////////////// 455 | 456 | logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx; 457 | 458 | for (genvar i=0;i) 21 | 2. In the keyword `always_comb`, what does "comb" refer to? What is its Verilog equivalent? Provide a GitHub permalink to an instance of `always_comb` in CVA6. 22 | 3. In the keyword `always_ff`, what does "ff" refer to? What is its Verilog equivalent? Provide a GitHub permalink to an instance of `always_ff` in CVA6. 23 | 4. What is a SystemVerilog package, and how do you reference its contents in another file? Provide a GitHub link to `ariane_pkg.sv` and a permalink to an instance where `ariane_pkg` is imported and used in another file. 24 | 5. What is a struct, and how do you access struct members? Provide a GitHub link to a struct definition in CVA6 and a permalink to where a member of that struct is used. 25 | 6. What are block names? Provide a GitHub permalink to an instance of a block name in CVA6. 26 | 7. What is DPI and what is it used for? Provide a GitHub permalink to a Verilog file that calls a DPI function, and provide a GitHub permalink to where that function is implemented. 27 | 28 | ## RISC-V Questions 29 | 30 | 1. Provide a link to the latest RISC-V ISA Manual. 31 | 2. What are the 6 instruction formats of RISC-V? Give a one-to-three word description of each. 32 | 3. What is a compressed instruction and what are they used for? 33 | 34 | ## CVA6 Questions 35 | 36 | 1. Attach the block diagram of CVA6 provided in the [core's documentation](https://docs.openhwgroup.org/projects/cva6-user-manual/01_cva6_user/). 37 | 2. Skim the [CVA6 user manual](https://docs.openhwgroup.org/projects/cva6-user-manual/01_cva6_user/) and give a one sentence summary for each of the 6 pipeline stages. 38 | 3. Which stages are in the "frontend", and which are in the "backend"? 39 | 4. Expand the following acronyms: RISC-V, CVA6, IF, ID, EX, I\$, D\$, FIFO, TLB, ITLB, CSR, BHT, RAS, BTB, MMU, EPC, MTVEC, LSU, PTW, DTLB, ALU, FPU, OoO, WB, AXI, APU, DPI. 40 | 5. What is the difference between the `"./cva6/corev_apu"` and `"./cva6/core"` directories? 41 | 6. What is AXI and what is it primarily used for in CVA6? 42 | 43 | ## ELF Questions 44 | 45 | Note that you can view the instructions and PCs of an ELF file with the following command: `riscv64-unknown-elf-objdump -d .elf` 46 | 47 | 1. What is an ELF file and where are they used? (Not specific to CVA6) 48 | 2. What is the difference between segments and sections? 49 | 3. Compile [`"./programs/examples/asm.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/examples/asm.S), and (using your favorite hex viewer) give the offset into the ELF file at which the `add` instruction is located. Please also provide a screenshot. 50 | 4. Write a `.S` file that contains instructions covering all 6 of the instruction formats, a branch taken condition, and a compressed instruction. Compile it to an ELF file and find all the PCs for each of the 8 occurences. Provide the `.S` file and the list of PCs. 51 | 52 | ## Simulation Questions 53 | 54 | Refer to the [Getting Started Guide](../guides/getting-started.md) if you need help setting up the required tools for simulation. 55 | 56 | All CVA6 net hierarchical paths should start with `TOP.ariane_testharness.i_ariane.i_cva6.`. Each module/struct should be separated with `.` until you reach the delcaration of the net. To see a net hierarchical path in GTKWave, you can right-click an added signal and click "Alias Highlighted Signal". 57 | 58 | When providing screenshots of waveforms, please include all signals you decide are relevant to demonstrate the event. Improper justification will result in a lower score. 59 | 60 | 1. Give the net hierarchical path and GitHub permalink of the PC in the instruction decode stage. 61 | 2. Give the net hierarchical path and GitHub permalink of the ALU output. 62 | 3. Give the net hierarchical path and GitHub permalink of the register file write enable in the commit stage. 63 | 4. Simulate [`"./programs/examples/asm.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/examples/asm.S), and provide a waveform screenshot of the `add` instruction occurring in the ALU. Provide justification. 64 | 5. Simulate the `.S` file you wrote, and provide a waveform screenshot at: a taken branch, a store to memory, a register file write, and a decode of a compressed instruction. Provide justification. 65 | -------------------------------------------------------------------------------- /labs/intro/git-example.txt: -------------------------------------------------------------------------------- 1 | 2 | Filename: git-example.txt 3 | Author: Ethan Sifferman 4 | 5 | This file is exercise in understanding commits and GitHub URLs. 6 | -------------------------------------------------------------------------------- /labs/ooo.md: -------------------------------------------------------------------------------- 1 | 2 | # Out-of-Order 3 | 4 | In this lab, you will be asked several questions to verify your understanding of Out-of-Order. 5 | 6 | ## Prelab 7 | 8 | Read through the [CVA6 Execute Stage Documentation](https://docs.openhwgroup.org/projects/cva6-user-manual/03_cva6_design/ex_stage.html) and the [CVA6 Issue Stage Documentation](https://docs.openhwgroup.org/projects/cva6-user-manual/03_cva6_design/issue_stage.html), and use them to answer the following questions. 9 | 10 | 1. What is the purpose of Out-of-Order? 11 | 2. Give a brief explanation of Scoreboarding and Tomasulo's Algorithm. What are the pros and cons of each? Which OoO strategy does CVA6 use? (Extra: [Tomasulo's original paper](https://ieeexplore.ieee.org/document/5392028)) 12 | 3. CVA6's rename unit will not be enabled for this lab. However, provide pseudocode that would run faster assuming the rename unit was enabled. 13 | 4. CVA6 has 7 functional units in [`"ex_stage.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/ex_stage.sv): ALU, Branch Unit, LSU, Multiplier, CSR Buffer, [FPU](https://github.com/openhwgroup/cvfpu), and [CVXIF](https://github.com/openhwgroup/core-v-xif). For each of the 7 functional units, provide: 14 | 1. A brief explanation of its function. 15 | 2. Which instructions it handles. 16 | 3. How many cycles it takes to execute. (You don't have to do this question for the FPU and CVXIF). 17 | 5. Briefly describe when the following hazards can occur: 18 | 1. Read after Write (RAW) 19 | 2. Write after Write (WAW) 20 | 3. Write after Read (WAR) 21 | 6. Using the following diagram of the CVA6 backend, explain the path that an instruction must take through the issue and execute stage. Be sure to include the issue queue, transaction IDs, source operands, the destination register, `rd_clobber`, the scoreboard, and any other important logic in your explanation. 22 | 23 | [![Scoreboard](./ooo/figures/scoreboard.svg)](https://docs.openhwgroup.org/projects/cva6-user-manual/03_cva6_design/issue_stage.html) 24 | 25 | 7. After looking through the issue stage and scoreboard RTL, Provide a GitHub permalink to the following in CVA6: 26 | 1. The issue queue instantiation 27 | 2. The logic that specifies if a functional unit is ready to execute a new instruction 28 | 3. The logic that stalls the pipeline due to the execute stage being too full for the next instruction 29 | 4. The logic that determines which instruction(s) will be committed on the next cycle 30 | 31 | ## Lab 32 | 33 | Write a program that demonstrates the following situations: 34 | 35 | * Out-of-Order Execution 36 | * Read after Write hazard 37 | * Write after Write hazard 38 | * Write after Read hazard 39 | * A branch miss 40 | * The issue queue full 41 | 42 | Note: 43 | 44 | * A dependency hazard exists only if the instructions are run out-of-order when the dependency is removed. Verify this when writing your RAW, WAW, and WAR hazards. 45 | * To enable out-of-order execution, your program must use a mix of instructions from the 3 functional unit types: 46 | * No more than 1 fixed latency unit operation (`ALU`, `CTRL_FLOW`, `CSR`, `MULT`) can be run simultaneously. 47 | * No more than 1 floating point unit operation (`FPU`, `FPU_VEC`) can be run simultaneously. 48 | * No more than 1 load-store unit operation (`LOAD`, `STORE`) can be run simultaneously. 49 | 50 | An example of how to run RISC-V floating point instructions (RVF) is provided here: [`"fpu_example.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/rvf/fpu_example.S) 51 | 52 | ## Lab Questions 53 | 54 | When providing screenshots of waveforms, please include all signals you decide are relevant to demonstrate the event. Improper justification will result in a lower score. 55 | 56 | 1. Share your program. Be sure each situation is clearly commented. 57 | 2. Provide a waveform screenshot and a brief explanation of **how the issue queue is affected** for each of the following situations: 58 | 1. Out-of-Order Execution 59 | 2. Read after Write hazard 60 | 3. Write after Write hazard 61 | 4. Write after Read hazard 62 | 5. A branch miss 63 | 6. The issue queue full 64 | -------------------------------------------------------------------------------- /labs/sv.md: -------------------------------------------------------------------------------- 1 | 2 | # Synthesizable SystemVerilog 3 | 4 | SystemVerilog is the most common hardware-description-language today. It is powerful for its good object-oriented design support that can be used both in simulation and synthesis. Writing synthesizable SystemVerilog has difficulties, so this lab will help you develop your skills. 5 | 6 | Resources 7 | 8 | * [Writing Synthesizable SystemVerilog](../guides/synthesis.md) 9 | * [DigitalJS Online](https://digitaljs.tilk.eu/) 10 | * ["Busting the Myth that SystemVerilog is only for Verification"](https://sutherland-hdl.com/papers/2013-SNUG-SV_Synthesizable-SystemVerilog_paper.pdf) 11 | 12 | ## Pre-Lab Questions 13 | 14 | 1. For the following questions, please provide a screenshot from [DigitalJS Online](https://digitaljs.tilk.eu/), a Verilog synthesis visualizer, to support your answers. 15 | 1. How is a Verilog/SystemVerilog `for` loop synthesized? 16 | 2. How is a Verilog/SystemVerilog `if` statement synthesized? 17 | 2. CVA6 uses a common design practice of appending `_d` and `_q` to the end of net names. What does each mean? How should each net type be assigned? Why can this practice be more useful than using a single net? 18 | 3. When synthesizing Verilog/SystemVerilog into a netlist, the synthesis tool will often "infer" where you have created a block-RAM and try to optimize the design accordingly. This is extremely helpful if your target FPGA or ASIC already has built-in block-RAMs, because using the built-in block-RAMs will be much more power and area efficient than creating an equivalent design using flip-flops. For example, you can see that [DigitalJS Online](https://digitaljs.tilk.eu/) converts the Verilog array into a "RAM" logic cell. 19 | 20 | ```systemverilog 21 | module ram ( 22 | input clk, 23 | input rst, 24 | // read port 25 | input [1:0] raddr_i, 26 | output [7:0] rdata_o, 27 | // write port 28 | input [1:0] waddr_i, 29 | input [7:0] wdata_i, 30 | input we_i 31 | ); 32 | 33 | logic [7:0] RAM [0:3]; 34 | 35 | assign rdata_o = RAM[raddr_i]; 36 | 37 | always_ff @(posedge clk) begin 38 | if (we_i) RAM[waddr_i] <= wdata_i; 39 | end 40 | 41 | endmodule 42 | ``` 43 | 44 | 1. Add a `for` loop to this design so that when `rst` goes high, all the RAM cells' values are set to `'0` on the next clock cycle. Provide your modified code and a screenshot of what [DigitalJS Online](https://digitaljs.tilk.eu/) synthesized. 45 | 2. Why is this reset strategy not scalable to larger RAM sizes? Theorize a better strategy to reset the RAM over multiple cycles. What are the pros and cons of this new reset strategy versus the first strategy? 46 | 47 | ## Lab 48 | 49 | In this part, you will implement a synchronous FIFO. 50 | 51 | FIFOs (first-in-first-out queues) are incredibly popular in pipelined architecture designs. Usually, if two hardware units with different clock frequencies/latencies need to communicate, the faster unit must stall and wait for the slower unit to be ready to prevent data loss. Fortunetly, a FIFO can be added between the two units to buffer the requests so that the faster unit will stall only when the FIFO is full, and the slower unit can grab available data as soon as it is ready. This strategy can provide incredible speedup when a lot of different types of hardware units need to communicate. (CVA6 uses several FIFOs in its design.) 52 | 53 | You should implement your FIFO with a cyclical buffer. You should have one block ram of size `DATA_WIDTH` and `NR_ENTRIES`, with pointers to your FIFO `head` and `tail`. Here are some specifics: 54 | 55 | * When you pop an element, you should increment the `head` pointer. 56 | * When you push an element, you should write `data_i` to the tail of the buffer, and increment the `tail` pointer. 57 | * If the `head` or `tail` pointer reach `NR_ENTRIES`, they should reset to `0`. 58 | * You can push and pop on the same cycle, but if the FIFO is empty, you should only push. 59 | * If the FIFO is empty, `valid_o` should output `0`, and any pop requests should fail. 60 | * If the FIFO is full, `full_o` should output `1`, and any push request should fail unless a pop request is also active. 61 | * As long as the FIFO is not empty, `data_o` should give the data at the head of the buffer. 62 | * Both pushing and popping should occur on `posedge clk_i`, and reset should occur synchronously. 63 | 64 | The module you need to finish is [`"ucsbece154b_fifo.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/sv/starter/ucsbece154b_fifo.sv), found in [`"labs/sv/starter"`](https://github.com/sifferman/labs-with-cva6/tree/main/labs/sv/starter). You can simulate your changes with ModelSim using `make tb TOOL=modelsim` (or Verilator 5 using `make tb TOOL=verilator` assuming that you have it set up). A [sample testbench](https://github.com/sifferman/labs-with-cva6/blob/main/labs/sv/starter/tb/fifo_tb.sv) is provided that you may edit as desired. You will also be graded on whether your design is synthesizable. You can run `make synth` to verify that it synthesizes with Yosys+Surelog correctly. 65 | 66 | Now that you have seen a lot of CVA6's code, **you must mimic the coding practices/styles of CVA6**. This means using `_d` and `_q` nets for all your flip-flops, and using `always_comb` to set your `_d` nets, and using `always_ff` to set your `_q` nets. See ["Writing Synthesizable SystemVerilog" - Flip-Flops](https://github.com/sifferman/labs-with-cva6/blob/main/guides/synthesis.md#flip-flops). 67 | 68 | Note that for your buffer to infer a block ram, you cannot separate it into `_d` and `_q` nets. This is because if your `always_ff` block has `_q <= _d;`, your array will be inferred as an array of registers. To be inferred as a block ram, you must do `if () [] <= ;` instead. See ["Writing Synthesizable SystemVerilog" - Memory](https://github.com/sifferman/labs-with-cva6/blob/main/guides/synthesis.md#memory). 69 | -------------------------------------------------------------------------------- /labs/sv/starter/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | fusesoc.conf 3 | -------------------------------------------------------------------------------- /labs/sv/starter/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # File: Makefile 3 | # Description: Makefile for running the fifo testbench. 4 | # 5 | 6 | CORE_FILE = ucsbece154b_fifo.core 7 | VENDOR = ucsbece154b 8 | LIBRARY = sv 9 | CORE = fifo 10 | VERSION = 1.0.0 11 | PROJECT = ${VENDOR}:${LIBRARY}:${CORE}:${VERSION} 12 | 13 | TESTS = $(shell find tests -type f) 14 | 15 | # ======================================================= # 16 | 17 | .PHONY: init build sim view lint synth clean 18 | 19 | # ======================================================= # 20 | # Targets 21 | 22 | tb: fusesoc.conf 23 | fusesoc run --target $@ --tool ${TOOL} ${PROJECT} 24 | tb_resized_small: fusesoc.conf 25 | fusesoc run --target $@ --tool ${TOOL} ${PROJECT} 26 | tb_resized_big: fusesoc.conf 27 | fusesoc run --target $@ --tool ${TOOL} ${PROJECT} 28 | 29 | init: fusesoc.conf 30 | 31 | lint: fusesoc.conf 32 | fusesoc run --target $@ ${PROJECT} 33 | lint_resized_small: fusesoc.conf 34 | fusesoc run --target $@ ${PROJECT} 35 | lint_resized_big: fusesoc.conf 36 | fusesoc run --target $@ ${PROJECT} 37 | 38 | synth: 39 | cd synth && make 40 | 41 | clean: 42 | rm -rf build fusesoc.conf 43 | 44 | # ======================================================= # 45 | # Files 46 | 47 | fusesoc.conf: ${CORE_FILE} 48 | @pip3 install --user -r requirements.txt > /dev/null 49 | rm -f fusesoc.conf 50 | fusesoc library add ${VENDOR}_${LIBRARY}_${CORE}_${VERSION} . --sync-type=local 51 | 52 | # ======================================================= # 53 | -------------------------------------------------------------------------------- /labs/sv/starter/README.md: -------------------------------------------------------------------------------- 1 | 2 | # FIFO Starter Code 3 | 4 | ## Guide to Run ECI ModelSim 5 | 6 | 1. Edit [`"ucsbece154b_fifo.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/sv/starter/ucsbece154b_fifo.sv) and [`"tb/fifo_tb.sv"`](https://github.com/sifferman/labs-with-cva6/blob/main/labs/sv/starter/tb/fifo_tb.sv) 7 | 2. Run `make tb TOOL=modelsim` 8 | 3. Run `cd build/ucsbece154b_ooo_fifo_1.0.0/tb-modelsim/` 9 | 4. Run `make run-gui` to compile your design and open ModelSim 10 | 5. Run simulations as normal. 11 | -------------------------------------------------------------------------------- /labs/sv/starter/lint.vlt: -------------------------------------------------------------------------------- 1 | `verilator_config 2 | lint_off -rule WIDTH 3 | -------------------------------------------------------------------------------- /labs/sv/starter/requirements.txt: -------------------------------------------------------------------------------- 1 | fusesoc 2 | -------------------------------------------------------------------------------- /labs/sv/starter/synth/Makefile: -------------------------------------------------------------------------------- 1 | #Auto generated by Edalize 2 | 3 | all: clean stats timing 4 | 5 | build/synth.json: 6 | @mkdir -p build 7 | cd build && yosys -l yosys.log -p 'tcl ../yosys.tcl' 8 | 9 | build/synth.asc: build/synth.json 10 | @mkdir -p build 11 | cd build && nextpnr-ice40 -l next.log --hx8k --package ct256 --pcf-allow-unconstrained --json synth.json --asc synth.asc 12 | 13 | build/synth.stat: build/synth.asc 14 | @mkdir -p build 15 | cd build && icebox_stat synth.asc > synth.stat 16 | 17 | build/synth.tim: build/synth.asc 18 | @mkdir -p build 19 | cd build && icetime -r synth.tim synth.asc 20 | 21 | stats: build/synth.stat 22 | 23 | timing: build/synth.tim 24 | 25 | clean: 26 | rm -rf build 27 | -------------------------------------------------------------------------------- /labs/sv/starter/synth/yosys.tcl: -------------------------------------------------------------------------------- 1 | 2 | set top ucsbece154b_fifo 3 | set name ucsbece154b_sv_fifo_1.0.0 4 | 5 | yosys -import 6 | plugin -i systemverilog 7 | 8 | yosys -import 9 | 10 | read_systemverilog -PDATA_WIDTH=64 -PNR_ENTRIES=1024 -noinfo -nonote {../../ucsbece154b_fifo.sv} 11 | 12 | synth_ice40 -top $top 13 | opt 14 | write_json synth.json 15 | -------------------------------------------------------------------------------- /labs/sv/starter/tb/assert.svh: -------------------------------------------------------------------------------- 1 | /* 2 | * File: assert.svh 3 | * Description: Macro to help with handling errors in testbences. 4 | */ 5 | 6 | `ifndef __ASSERT_SVH 7 | `define __ASSERT_SVH 8 | 9 | `define ASSERT(CONDITION, MESSAGE) if ((CONDITION)==1'b1); else begin $display($sformatf("Error: %s", $sformatf MESSAGE)); ERROR = 1; end 10 | 11 | `endif 12 | -------------------------------------------------------------------------------- /labs/sv/starter/tb/fifo_tb.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * File: fifo_tb.sv 3 | * Description: Example testbench for fifo. 4 | */ 5 | 6 | `include "assert.svh" 7 | 8 | module fifo_tb #( 9 | parameter int unsigned DATA_WIDTH = 32, 10 | parameter int unsigned NR_ENTRIES = 4 11 | ) (); 12 | 13 | logic clk_i = 0; 14 | logic rst_i; 15 | 16 | logic [DATA_WIDTH-1:0] data_o; 17 | logic pop_i; 18 | 19 | logic [DATA_WIDTH-1:0] data_i; 20 | logic push_i; 21 | 22 | logic full_o; 23 | logic valid_o; 24 | 25 | always #10 clk_i = ~clk_i; 26 | ucsbece154b_fifo #( 27 | .DATA_WIDTH(DATA_WIDTH), 28 | .NR_ENTRIES(NR_ENTRIES) // modify as needed 29 | ) fifo (.*); 30 | 31 | int i; 32 | logic ERROR = 0; 33 | initial begin 34 | $dumpvars; 35 | $display( "Begin simulation." ); 36 | //\\ =========================== \\// 37 | 38 | 39 | 40 | // reset/initialize 41 | rst_i = 1'b1; 42 | data_i = '0; 43 | pop_i = 1'b0; 44 | push_i = 1'b0; 45 | 46 | @(negedge clk_i); 47 | 48 | `ASSERT(valid_o==1'b0, ("Not reporting as invalid after reset")); 49 | 50 | rst_i = 1'b0; 51 | 52 | push_i = 1'b1; 53 | 54 | for (i = 1; i <= (NR_ENTRIES+1); i++) begin 55 | if (i==(NR_ENTRIES+1)) // if trying to push SIZE+1th item 56 | `ASSERT(full_o==1'b1, ("Not reporting as full after filling")); 57 | data_i = {DATA_WIDTH{4'(i)}}; 58 | @(negedge clk_i); 59 | `ASSERT(valid_o==1'b1, ("Not reporting as valid after pushing")); 60 | end 61 | 62 | 63 | 64 | //\\ =========================== \\// 65 | $display("End simulation."); 66 | $stop; 67 | end 68 | 69 | endmodule 70 | -------------------------------------------------------------------------------- /labs/sv/starter/ucsbece154b_fifo.core: -------------------------------------------------------------------------------- 1 | CAPI=2: 2 | name: ucsbece154b:sv:fifo:1.0.0 3 | description: UCSB ECE154B FIFO 4 | 5 | filesets: 6 | # default 7 | rtl: 8 | files: 9 | - ucsbece154b_fifo.sv 10 | - lint.vlt: {file_type: vlt} 11 | file_type: systemVerilogSource 12 | 13 | # tb 14 | tb: 15 | files: 16 | - tb/assert.svh: {is_include_file: true} 17 | - tb/fifo_tb.sv 18 | file_type: systemVerilogSource 19 | 20 | 21 | targets: 22 | default: &default 23 | filesets: 24 | - rtl 25 | 26 | tb: &tb # fusesoc run --target tb ucsbece154b:sv:fifo 27 | <<: *default 28 | description: Simulate the design 29 | toplevel: fifo_tb 30 | default_tool: verilator 31 | filesets_append: 32 | - tb 33 | tools: 34 | verilator: 35 | verilator_options: 36 | - --binary 37 | - -j 0 38 | - --trace-fst 39 | - --trace-structs 40 | - --x-assign unique 41 | - --x-initial unique 42 | - --unroll-count 10000 43 | - --unroll-stmts 1000000 44 | run_options: 45 | - +verilator+rand+reset+2 46 | 47 | tb_resized_small: # fusesoc run --target tb_resized_small ucsbece154b:sv:fifo 48 | <<: *tb 49 | parameters: 50 | - DATA_WIDTH=13 51 | - NR_ENTRIES=2 52 | 53 | tb_resized_big: # fusesoc run --target tb_resized_big ucsbece154b:sv:fifo 54 | <<: *tb 55 | parameters: 56 | - DATA_WIDTH=51 57 | - NR_ENTRIES=13 58 | 59 | lint: &lint 60 | <<: *default 61 | description: Lint the design 62 | toplevel: ucsbece154b_fifo 63 | default_tool: verilator 64 | tools: 65 | verilator: 66 | mode: lint-only 67 | verilator_options: 68 | - -Wall 69 | - +1364-2005ext+v 70 | - +1800-2017ext+sv 71 | - --unroll-count 10000 72 | - --unroll-stmts 1000000 73 | 74 | 75 | lint_resized_small: # fusesoc run --target lint_resized_small ucsbece154b:sv:fifo 76 | <<: *lint 77 | parameters: 78 | - DATA_WIDTH=13 79 | - NR_ENTRIES=2 80 | lint_resized_big: # fusesoc run --target lint_resized_big ucsbece154b:sv:fifo 81 | <<: *lint 82 | parameters: 83 | - DATA_WIDTH=51 84 | - NR_ENTRIES=13 85 | 86 | 87 | 88 | parameters: 89 | DATA_WIDTH: 90 | datatype: int 91 | description: Set fifo data width 92 | paramtype: vlogparam 93 | NR_ENTRIES: 94 | datatype: int 95 | description: Set fifo size 96 | paramtype: vlogparam 97 | -------------------------------------------------------------------------------- /labs/sv/starter/ucsbece154b_fifo.sv: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ucsbece154b_fifo.sv 3 | * Description: Starter file for fifo. 4 | */ 5 | 6 | module ucsbece154b_fifo #( 7 | parameter int unsigned DATA_WIDTH = 32, 8 | parameter int unsigned NR_ENTRIES = 4 9 | ) ( 10 | input logic clk_i, 11 | input logic rst_i, 12 | 13 | output logic [DATA_WIDTH-1:0] data_o, 14 | input logic pop_i, 15 | 16 | input logic [DATA_WIDTH-1:0] data_i, 17 | input logic push_i, 18 | 19 | output logic full_o, 20 | output logic valid_o 21 | ); 22 | 23 | // TODO 24 | 25 | endmodule 26 | -------------------------------------------------------------------------------- /labs/vm.md: -------------------------------------------------------------------------------- 1 | 2 | # Virtual Memory 3 | 4 | In addition to the lectures, please use the following resources to help you with this lab: 5 | 6 | * [RISC-V Privileged Architecture Manual](https://github.com/riscv/riscv-isa-manual) 7 | * ["RISC-V Bytes: Privilege Levels" by Daniel Mangum](https://danielmangum.com/posts/risc-v-bytes-privilege-levels/) 8 | 9 | ## Privilege 10 | 11 | RISC-V has functionality for "privilege modes". Depending on the privilege mode a program is in, certain functionalities or memory addresses can be enabled or disabled. This is a summary of the privilege modes: 12 | 13 | * Machine-mode: Highest privilege, all memory addresses are enabled. Used by bootloader. 14 | * Supervisor-mode: All addresses except user addresses are enabled. Used by OS. 15 | * User-mode: Lowest privilege, only user addresses can be accessed. Used by programs. 16 | 17 | [![Privilege Levels](./vm/priv_levels.png)](https://danielmangum.com/posts/risc-v-bytes-privilege-levels/) 18 | 19 | A core will always start in M-mode, which does not support virtual memory. To enable virtual memory, OS must lower the privilege to either S-mode or U-mode. This is demonstrated in [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S) and [`"programs/vm/privilege.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/privilege.S). 20 | 21 | ## Provided OS Explanation 22 | 23 | You have been provided a simple Bootloader and OS: [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S). It is an example of how to set up and enable a page table, and lower the privilege to U-mode. 24 | 25 | These are the steps it takes in more detail: 26 | 27 | 1. **Run bootloader in M-mode** 28 | 2. Set up the page table 29 | 3. Configure PMP 30 | 4. **Jump to the OS in S-mode** 31 | 5. Enable virtual memory using the page table 32 | 6. Add the user program to the page table 33 | 7. Load the user program to user memory 34 | 8. **Jump to the program in U-mode** 35 | 9. [`"rvfi_tracer.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/corev_apu/tb/rvfi_tracer.sv#L74-L77) hits a breakpoint at `ecall` and exits the simulation 36 | 37 | Note, out of simplicity's sake, there are a few important OS features that have not been fully implemented, such as: 38 | 39 | * The code does not implement trap handlers for `ecall` instructions. 40 | * The kernel and each user process should have their own page table. 41 | * User processes are usually loaded to addresses starting at 0x0. 42 | 43 | In this lab, you will be implementing basic trap handlers. 44 | 45 | ## Prelab 46 | 47 | You will need the [RISC-V Privileged Architecture Manual](https://github.com/riscv/riscv-isa-manual) to answer some of these questions. 48 | 49 | 1. What is the purpose of virtual memory? 50 | 2. Define the following: MMU, PTW, TLB. 51 | 3. What is the benefit of a multi-layer page table? 52 | 4. For Sv39, give 53 | 1. The number of bits in a VA 54 | 2. The number of bits in a PA 55 | 3. The number of layers a PT can be 56 | 4. The size of a page in bytes 57 | 5. The size of a PTE in bytes 58 | 5. Complete the following page table entry questions. 59 | 1. Provide a diagram of a Sv39 PTE. 60 | 2. List and define the 10 bottom bits of a Sv39 page table entry. 61 | 3. In [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S), each PTE's bottom 8 bits are set to either `0x1`, `0xef`, or `0xff`; explain the purposes of each of these three values in the context of [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S). 62 | 6. Draw a diagram of the hierarchical page table created in [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S) (unmodified). 63 | * Show/describe the contents of every valid PTE in each PT. 64 | * Denote pointers from a PTE to another layer with an arrow to the corresponding PT. 65 | * Show the contents of every valid physical frame in physical memory. 66 | 7. In [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S) and [`"programs/vm/privilege.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/privilege.S), several control/status registers are written. For each of the registers, provide a screenshot of the bit diagram, and a definition of each of any fields that the provided programs use. (For example, [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/os.S) only uses the `SUM` from `sstatus`, so `SUM` is the only field you need to give a definition of for `sstatus`). 67 | 1. `mstatus` 68 | 2. `sstatus` 69 | 3. `mepc` 70 | 4. `sepc` 71 | 5. `mtvec` 72 | 6. `stvec` 73 | 7. `satp` 74 | 8. `medeleg` 75 | 76 | ## Lab 77 | 78 | The current simulation runs just one user process, then stops the simulation once the `ecall` is run in the user program. However, we want to modify it to run 2 user processes sequentially, then stop the simulation if an `ecall` is run in M-mode. To achieve this, we need to modify the CVA6 testbench, and modify the provided OS code. 79 | 80 | ### CVA6 Testbench Modification 81 | 82 | The CVA6 testbench is currently configured that any `ecall` instruction will stop the simulation. Edit this in [`"rvfi_tracer.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/corev_apu/tb/rvfi_tracer.sv#L74) so that it only exits the simulation on an `ecall` in M-mode. (Hint: [`"rvfi_pkg.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/corev_apu/tb/rvfi_pkg.sv), [`"riscv_pkg.sv"`](https://github.com/openhwgroup/cva6/blob/b44a696bbead23dafb068037eff00a90689d4faf/core/include/riscv_pkg.sv#L56)) 83 | 84 | ### OS Modification 85 | 86 | 0. *Same initial setup...* 87 | 1. Create `m_trap` and `s_trap` trap handlers that are assigned to `mtvec` and `stvec` 88 | 2. Set `medeleg` 89 | 3. Create a counter that specifies which user program should be run (initialized to 1) 90 | 4. Load user program 1 to memory and configure the page table accordingly 91 | 5. Run user program 1 that has an `ecall` instruction 92 | 6. Return to the `s_trap` trap handler 93 | 7. Have `s_trap` increment the user program counter, then jump back to the OS 94 | 8. Load user program 2 to a different VA and PA than user program 1, and configure the page table accordingly 95 | 9. Run user program 2 that has an `ecall` instruction 96 | 10. Return to the `s_trap` trap handler 97 | 11. Have `s_trap` increment the user program counter, then jump back to the OS 98 | 12. On user program counter > 2, the OS runs `ecall` 99 | 13. Return to the `m_trap` trap handler 100 | 14. Have `m_trap` run `ecall` to exit the simulation 101 | 102 | *Note: reference [`"programs/vm/privilege.S"`](https://github.com/sifferman/labs-with-cva6/blob/main/programs/vm/privilege.S) to help you set up your trap handlers.* 103 | 104 | ### CVA6 Trace Log 105 | 106 | CVA6 simulations create a log file: `"cva6/trace_hart_00.dasm"`. For every instruction that the simulation ran, it shows the cycle number, VPC, privilege mode, and instruction. It will be a very useful reference for this lab. 107 | 108 | Notes: 109 | 110 | * *"Hart" means hardware thread, which is the same thing as a core.* 111 | * *Simulation time should take no longer than 1 mintue.* 112 | * *Sometimes the core randomly enters Debug mode. (Observe `TOP.ariane_testharness.i_ariane.i_cva6.debug_mode`). As long as the core returns to normal execution, you can ignore this. If the simulation never exits, then your code has a bug.* 113 | 114 | Additional resource: [RISC-V Instruction Encoder/Decoder](https://luplab.gitlab.io/rvcodecjs/). 115 | 116 | ### Numerical Labels 117 | 118 | Note that [`"programs/vm/os.S"`](https://github.com/sifferman/labs-with-cva6/blob/c5e49d3c7b3dd98ead3ae45898a29cbb437cf101/programs/vm/os.S#L95) demonstrates numerical labels with this line of code: `blt t0, t1, 1b;`. 119 | 120 | Numeric labels are used for local references. References to local labels are suffixed with `f` for a forward reference or `b` for a backwards reference ([reference](https://michaeljclark.github.io/asm.html)). They are most useful when creating loops or conditionals that don't need to be given a name, and don't need to called by another file. 121 | 122 | ### Lab Questions 123 | 124 | 1. Show your modifications to `"rvfi_tracer.sv"`. 125 | 2. Show your modifications to `"os.S"`. 126 | 3. Draw a diagram of the hierarchical page table you created in your modified `"os.S"`. 127 | * Show/describe the contents of every valid PTE in each PT. 128 | * Denote pointers from a PTE to another layer with an arrow to the corresponding PT. 129 | * Show the contents of every valid physical frame in physical memory. 130 | 4. Provide your `"trace_hart_00.dasm"` file, and highlight the following behaviors: 131 | 1. Enter `bootloader` in M-mode 132 | 2. Enter `OS` in S-mode 133 | 3. Enter user program 1 in U-mode; also provide its virtual and physical address 134 | 4. Enter `s_trap` in S-mode 135 | 5. Renter `OS` in S-mode 136 | 6. Enter user program 2 in U-mode; also provide its virtual and physical address 137 | 7. Renter `s_trap` in S-mode 138 | 8. Renter `OS` in S-mode 139 | 9. Enter `m_trap` in M-mode 140 | 10. Exit 141 | 5. Provide a screenshot of a waveform demonstrating how the MMU translates the user program's virtual address to its physical address. *Note: The net hierarchical path to the MMU is `TOP.ariane_testharness.i_ariane.i_cva6.ex_stage_i.lsu_i.gen_mmu_sv39.i_cva6_mmu`.* 142 | -------------------------------------------------------------------------------- /labs/vm/priv_levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sifferman/labs-with-cva6/ab590c6b8dc0cf1032f51438c7aad7a2012f4a73/labs/vm/priv_levels.png -------------------------------------------------------------------------------- /programs/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # File: Makefile 3 | # Description: Makefile for compiling for CVA6. 4 | # 5 | 6 | AUX = -T aux/link.ld 7 | BARE = -static -nostdlib -nostartfiles -march=rv64imafdc -O0 8 | 9 | all: clean examples/asm.elf bp/loop.elf bp/div.elf bp/spaghetti.elf vm/privilege.elf vm/os.elf rvf/fpu_example.elf 10 | 11 | # Create .elf from .S 12 | %.elf: %.S 13 | riscv64-unknown-elf-gcc ${BARE} $< ${AUX} -o $@ 14 | 15 | clean: 16 | rm -rf *.elf **/*.elf 17 | -------------------------------------------------------------------------------- /programs/aux/link.ld: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2018 ETH Zurich and University of Bologna. 3 | Copyright and related rights are licensed under the Solderpad Hardware 4 | License, Version 0.51 (the "License"); you may not use this file except in 5 | compliance with the License. You may obtain a copy of the License at 6 | http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law 7 | or agreed to in writing, software, hardware and materials distributed under 8 | this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 9 | CONDITIONS OF ANY KIND, either express or implied. See the License for the 10 | specific language governing permissions and limitations under the License. 11 | 12 | Author: Michael Schaffner , ETH Zurich 13 | Date: 26.11.2018 14 | Description: Basic linker script for bare metal programs running on Ariane (copied from RISCV tests). 15 | */ 16 | 17 | OUTPUT_ARCH( "riscv" ) 18 | ENTRY(_start) 19 | 20 | SECTIONS 21 | { 22 | . = 0x80000000; 23 | 24 | /* text: test code section */ 25 | .text.init : { 26 | *(.text.init) 27 | } 28 | 29 | .text : { 30 | *(.text) 31 | } 32 | 33 | /* data segment */ 34 | .data : { 35 | *(.data) 36 | } 37 | 38 | .sdata : { 39 | __global_pointer$ = . + 0x800; 40 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 41 | *(.sdata .sdata.* .gnu.linkonce.s.*) 42 | } 43 | 44 | /* bss segment */ 45 | .sbss : { 46 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 47 | *(.scommon) 48 | } 49 | 50 | .bss : { 51 | *(.bss) 52 | } 53 | 54 | /* thread-local data segment */ 55 | .tdata : { 56 | _tls_data = .; 57 | *(.tdata.begin) 58 | *(.tdata) 59 | *(.tdata.end) 60 | } 61 | 62 | .tbss : { 63 | *(.tbss) 64 | *(.tbss.end) 65 | } 66 | 67 | /* End of uninitalized data segement */ 68 | _end : { 69 | _end = .; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /programs/bp/div.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: div.S 3 | * Description: Divisor check to benchmark the branch predictor. 4 | */ 5 | 6 | .text 7 | # Text segment 8 | .globl _start; 9 | _start: # entry point 10 | li t0, 20; # big_loop init 11 | big_loop: 12 | addi t0, t0, -1; # big_loop counter 13 | beqz t0, big_loop_end; # big_loop exit condition 14 | 15 | 16 | # div_loop: 17 | # t0: reserved for big_loop 18 | # a0: divisor, goes from 10-1 19 | # t1: dividend, goes from 20-10 20 | # a1: count how many times dividend/divisor evenly 21 | li a0, 11 22 | div_loop: 23 | addi a0, a0, -1; # big_loop counter 24 | beqz a0, div_loop_end; # div_loop exit condition 25 | 26 | 27 | li a1, 0; # counter 28 | 29 | 30 | 31 | check_20: 32 | li t1, 20; 33 | remw t2, t1, a0; 34 | bnez t2, check_19; 35 | addi a1, a1, 1; 36 | check_19: 37 | li t1, 19; 38 | remw t2, t1, a0; 39 | bnez t2, check_18; 40 | addi a1, a1, 1; 41 | check_18: 42 | li t1, 18; 43 | remw t2, t1, a0; 44 | bnez t2, check_17; 45 | addi a1, a1, 1; 46 | check_17: 47 | li t1, 17; 48 | remw t2, t1, a0; 49 | bnez t2, check_16; 50 | addi a1, a1, 1; 51 | check_16: 52 | li t1, 16; 53 | remw t2, t1, a0; 54 | bnez t2, check_15; 55 | addi a1, a1, 1; 56 | check_15: 57 | li t1, 15; 58 | remw t2, t1, a0; 59 | bnez t2, check_14; 60 | addi a1, a1, 1; 61 | check_14: 62 | li t1, 14; 63 | remw t2, t1, a0; 64 | bnez t2, check_13; 65 | addi a1, a1, 1; 66 | check_13: 67 | li t1, 13; 68 | remw t2, t1, a0; 69 | bnez t2, check_12; 70 | addi a1, a1, 1; 71 | check_12: 72 | li t1, 12; 73 | remw t2, t1, a0; 74 | bnez t2, check_11; 75 | addi a1, a1, 1; 76 | check_11: 77 | li t1, 11; 78 | remw t2, t1, a0; 79 | bnez t2, check_10; 80 | addi a1, a1, 1; 81 | check_10: 82 | li t1, 10; 83 | remw t2, t1, a0; 84 | bnez t2, not_10; 85 | addi a1, a1, 1; 86 | not_10: 87 | 88 | # a1 now holds how many times dividend/divisor evenly 89 | 90 | j div_loop; # loop div_loop 91 | div_loop_end: 92 | 93 | j big_loop; # loop big_loop 94 | big_loop_end: 95 | 96 | 97 | # exit 98 | li a7, 93; 99 | ecall; 100 | 101 | 102 | .section ".tohost","aw",@progbits 103 | .globl tohost 104 | .globl fromhost 105 | 106 | .align 6 107 | tohost: .dword 0 108 | .align 6 109 | fromhost: .dword 0 110 | -------------------------------------------------------------------------------- /programs/bp/loop.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: loop.S 3 | * Description: Nested for loop to benchmark the branch predictor. 4 | */ 5 | 6 | .text 7 | # Text segment 8 | .globl _start; 9 | _start: # entry point 10 | 11 | 12 | # loop 13 | #define LOOP_SIZE 5 14 | li a0, 0; # output init 15 | 16 | li t0, LOOP_SIZE; # loop0 init 17 | loop0: 18 | addi t0, t0, -1; # loop0 counter 19 | beqz t0, loop0_end; # loop0 exit condition 20 | 21 | li t1, LOOP_SIZE; # loop1 init 22 | loop1: 23 | addi t1, t1, -1; # loop1 counter 24 | beqz t1, loop1_end; # loop1 exit condition 25 | 26 | li t2, LOOP_SIZE; # loop1 init 27 | loop2: 28 | addi t2, t2, -1; # loop2 counter 29 | beqz t2, loop2_end; # loop2 exit condition 30 | 31 | addi a0, a0, 1; # increment output 32 | 33 | j loop2 # loop loop2 34 | loop2_end: 35 | 36 | j loop1 # loop loop1 37 | loop1_end: 38 | 39 | j loop0 # loop loop0 40 | loop0_end: 41 | 42 | 43 | # exit 44 | li a7, 93; 45 | ecall; 46 | 47 | 48 | .section ".tohost","aw",@progbits 49 | .globl tohost 50 | .globl fromhost 51 | 52 | .align 6 53 | tohost: .dword 0 54 | .align 6 55 | fromhost: .dword 0 56 | -------------------------------------------------------------------------------- /programs/bp/spaghetti.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: spaghetti.S 3 | * Description: Spaghetti code to benchmark the branch predictor. 4 | */ 5 | 6 | .text 7 | # Text segment 8 | .globl _start; 9 | _start: # entry point 10 | li t0, 30; # big_loop init 11 | big_loop: 12 | addi t0, t0, -1; # big_loop counter 13 | beqz t0, big_loop_end; # big_loop exit condition 14 | 15 | 16 | 17 | spaghetti_0: 18 | li t1, 1; 19 | li t2, 2; 20 | label1_0: bgt t1, t2, label3_0; 21 | label2_0: ble t1, t2, label4_0; 22 | label6_0: bne t1, t2, label5_0; 23 | label3_0: bgtu t1, t2, label5_0; 24 | label4_0: bleu t1, t2, label6_0; 25 | label5_0: beq t1, t2, label7_0; 26 | label7_0: blt t1, t2, label9_0; 27 | label8_0: bge t1, t2, labelA_0; 28 | label9_0: bltu t1, t2, spaghetti_1; 29 | labelA_0: bgeu t1, t2, spaghetti_1; 30 | 31 | spaghetti_1: 32 | li t1, 2; 33 | li t2, 1; 34 | label1_1: bgt t1, t2, label3_1; 35 | label2_1: ble t1, t2, label4_1; 36 | label7_1: blt t1, t2, label5_1; 37 | label3_1: bgtu t1, t2, label5_1; 38 | label4_1: bleu t1, t2, label6_1; 39 | label5_1: beq t1, t2, label7_1; 40 | label6_1: bne t1, t2, label8_1; 41 | label8_1: bge t1, t2, labelA_1; 42 | label9_1: bltu t1, t2, label8_1; 43 | labelA_1: bgeu t1, t2, spaghetti_2; 44 | 45 | spaghetti_2: 46 | li t1, 0; 47 | li t2, 0; 48 | label1_2: bgt t1, t2, label3_2; 49 | label2_2: ble t1, t2, label4_2; 50 | label8_2: bge t1, t2, label5_2; 51 | label3_2: bgtu t1, t2, label5_2; 52 | label4_2: bleu t1, t2, label6_2; 53 | label5_2: beq t1, t2, label7_2; 54 | label6_2: bne t1, t2, label8_2; 55 | label7_2: blt t1, t2, label9_2; 56 | label9_2: bltu t1, t2, done; 57 | labelA_2: bgeu t1, t2, done; 58 | 59 | done: 60 | 61 | 62 | j big_loop; # loop big_loop 63 | big_loop_end: 64 | 65 | 66 | # exit 67 | li a7, 93; 68 | ecall; 69 | 70 | 71 | .section ".tohost","aw",@progbits 72 | .globl tohost 73 | .globl fromhost 74 | 75 | .align 6 76 | tohost: .dword 0 77 | .align 6 78 | fromhost: .dword 0 79 | -------------------------------------------------------------------------------- /programs/examples/asm.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: asm.S 3 | * Description: Example of how to format a RISC-V assembly file to be read by CVA6. 4 | */ 5 | 6 | .text 7 | # Text segment 8 | .globl _start; 9 | _start: # entry point 10 | 11 | li t0, 2022; 12 | li t1, 2023; 13 | add t2, t0, t1; 14 | 15 | li a0, 0; # set exit value to 0 16 | li a7, 93; # set syscall to `exit` 17 | ecall; 18 | 19 | .data 20 | # Data segment 21 | 22 | arr: .asciz "Hello, world!" 23 | 24 | 25 | .section ".tohost","aw",@progbits 26 | .globl tohost 27 | .globl fromhost 28 | 29 | .align 6 30 | tohost: .dword 0 31 | .align 6 32 | fromhost: .dword 0 33 | -------------------------------------------------------------------------------- /programs/rvf/fpu_example.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: fpu_example.S 3 | * Description: Example of how to enable the FPU. 4 | */ 5 | 6 | .text 7 | # Text segment 8 | .globl _start; 9 | _start: # entry point 10 | 11 | 12 | # enable FPU 13 | li t0, (3 << 13); 14 | csrs mstatus, t0; 15 | 16 | # load floats into f0 and f1 17 | la t0, WORD_F0; 18 | flw f0, 0(t0); 19 | la t0, WORD_F1; 20 | flw f1, 0(t0); 21 | 22 | # single precision RVF instructions 23 | fadd.s f2, f0, f1; 24 | fmul.s f3, f0, f1; 25 | fsqrt.s f4, f0; 26 | fmin.s f5, f0, f1; 27 | 28 | # exit 29 | li a0, 0; # set exit value to 0 30 | li a7, 93; # set syscall to `exit` 31 | ecall; 32 | 33 | 34 | .data 35 | # Data segment 36 | WORD_F0: .word 0x431a0000 # 154.0 37 | WORD_F1: .word 0x41700000 # 15.0 38 | 39 | 40 | .section ".tohost","aw",@progbits 41 | .globl tohost 42 | .globl fromhost 43 | 44 | .align 6 45 | tohost: .dword 0 46 | .align 6 47 | fromhost: .dword 0 48 | -------------------------------------------------------------------------------- /programs/vm/os.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: os.S 3 | * Description: Example of how to enable virtual memory and load and run a user program. 4 | */ 5 | 6 | #define USER_PA (0x0000000080005000ULL) 7 | #define USER_VA (0x0000000080001000ULL) 8 | 9 | #define KERNEL_PA (0x0000000080000000ULL) 10 | #define PT2_PA (0x0000000080002000ULL) 11 | #define PT1_PA (0x0000000080003000ULL) 12 | #define PT0_PA (0x0000000080004000ULL) 13 | 14 | # Page Number Getters 15 | #define PN(PA) (PA >> 12) 16 | #define PN0(PA) (PA >> 12 & 0x1ff) 17 | #define PN1(PA) (PA >> 21 & 0x1ff) 18 | #define PN2(PA) (PA >> 30 & 0x1ff) 19 | 20 | .text 21 | .global _start; 22 | 23 | _start: 24 | .align 2 25 | bootloader: 26 | 27 | # Init PT2 28 | li t2, PT2_PA + 8*PN2(PT1_PA); 29 | li t3, (PN(PT1_PA) << 10) | 0x1; # pointer to PT1 30 | sw t3, 0(t2); 31 | 32 | # Init PT1 33 | li t2, PT1_PA + 8*PN1(PT0_PA); 34 | li t3, (PN(PT0_PA) << 10) | 0x1; # pointer to PT0 35 | sw t3, 0(t2); 36 | 37 | # Init PT0 38 | # Set KERNEL_PA->KERNEL_PA 39 | li t2, PT0_PA + 8*PN0(KERNEL_PA); 40 | li t3, (PN(KERNEL_PA) << 10) | 0xef; 41 | sw t3, 0(t2); 42 | # Set PT2_PA->PT2_PA 43 | li t2, PT0_PA + 8*PN0(PT2_PA); 44 | li t3, (PN(PT2_PA) << 10) | 0xef; 45 | sw t3, 0(t2); 46 | # Set PT1_PA->PT1_PA 47 | li t2, PT0_PA + 8*PN0(PT1_PA); 48 | li t3, (PN(PT1_PA) << 10) | 0xef; 49 | sw t3, 0(t2); 50 | # Set PT0_PA->PT0_PA 51 | li t2, PT0_PA + 8*PN0(PT0_PA); 52 | li t3, (PN(PT0_PA) << 10) | 0xef; 53 | sw t3, 0(t2); 54 | 55 | 56 | # cfig PMP 57 | # entry0: addr 0x0_0000_0000~0x2_ffff_ffff, l=0 a=1 x=1 w=1 r=1 58 | li x1, 0x0f; 59 | csrw pmpcfg0, x1; 60 | li x1, 0xc0000000; 61 | csrw pmpaddr0, x1; 62 | 63 | # Get into supervisor mode 64 | li t4, (1 << 11) | (1 << 5); 65 | csrw mstatus, t4; 66 | la t5, OS; 67 | csrw mepc, t5; 68 | mret; 69 | 70 | 71 | .align 2 72 | OS: 73 | # Enable virtual memory 74 | li a0, (8 << 60) | PN(PT2_PA); 75 | csrw satp, a0; 76 | sfence.vma; # Refetch the SATP register 77 | 78 | # Set USER_VA->USER_PA 79 | li t2, PT0_PA + 8*PN0(USER_VA); 80 | li t3, (PN(USER_PA) << 10) | 0xff; 81 | sw t3, 0(t2); 82 | 83 | # Enable writing to User PTE in S-Mode 84 | li t0, (1 << 18); 85 | csrs sstatus, t0; 86 | 87 | # Load program to user space 88 | la t0, user_program; # Load the the address to start copying into t0 89 | la t1, user_program_end # Load the address to stop copying into t1 90 | li t3, USER_VA; # Load the destination address into t3 91 | 1: 92 | lw t4, 0(t0); # Load a word from the user program 93 | sw t4, 0(t3); # Store the word to the destination 94 | addi t0, t0, 4; # Increment the user program address 95 | addi t3, t3, 4; # Increment the destination address 96 | blt t0, t1, 1b; # Repeat until the end of the user program is reached 97 | 98 | # Disable writing to User PTE in S-Mode 99 | li t0, (1 << 18); 100 | csrc sstatus, t0; 101 | 102 | # Get into user mode 103 | li t5, USER_VA; 104 | csrw sepc, t5; 105 | sret; 106 | 107 | 108 | 109 | .data 110 | 111 | .align 2 112 | user_program: 113 | # store 154 at USER_VA+0xf00 114 | li t0, 0x154b; 115 | li t1, USER_VA+0xf00; 116 | sw t0, 0(t1); 117 | # load 154 to t2 118 | lw t2, 0(t1); 119 | # exit 120 | li a0, 0; 121 | li a7, 93; 122 | ecall; 123 | user_program_end: 124 | 125 | .section ".tohost","aw",@progbits 126 | .globl tohost 127 | .globl fromhost 128 | 129 | .align 6 130 | tohost: .dword 0 131 | .align 6 132 | fromhost: .dword 0 133 | -------------------------------------------------------------------------------- /programs/vm/privilege.S: -------------------------------------------------------------------------------- 1 | /* 2 | * File: privilege.S 3 | * Description: Example of how to switch between different privilege modes. 4 | * Reference: "RISC-V Bytes: Privilege Levels" by Daniel Mangum 5 | */ 6 | 7 | .section .text 8 | .globl _start 9 | 10 | _start: 11 | .align 2 12 | machine: 13 | li t0, 0x0f; 14 | csrw pmpcfg0, t0; 15 | li t0, 0xc0000000; 16 | csrw pmpaddr0, t0; 17 | la t1, m_trap; 18 | csrw mtvec, t1; 19 | la t2, supervisor; 20 | csrw mepc, t2; 21 | li t3, (1 << 11); 22 | csrw mstatus, t3; 23 | li t4, 0x100; 24 | csrs medeleg, t4; 25 | mret; # to supervisor 26 | 27 | .align 2 28 | m_trap: 29 | nop; 30 | ecall; # exit simulation 31 | 32 | .align 2 33 | supervisor: 34 | la t0, user; 35 | csrw sepc, t0; 36 | la t1, s_trap; 37 | csrw stvec, t1; 38 | sret; # to user 39 | 40 | .align 2 41 | s_trap: 42 | nop; 43 | ecall; # to m_trap 44 | 45 | .align 2 46 | user: 47 | nop; 48 | ecall; # to s_trap 49 | 50 | 51 | .data 52 | 53 | .section ".tohost","aw",@progbits 54 | .globl tohost 55 | .globl fromhost 56 | 57 | .align 6 58 | tohost: .dword 0 59 | .align 6 60 | fromhost: .dword 0 61 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | # 2 | # File: setup.sh 3 | # Description: Script to set up environment variables for CVA6 simulation and build flow. 4 | # Usage: Using bash, source this file while being in the root folder of "labs-with-cva6". 5 | # 6 | 7 | # Adapted from 8 | # https://github.com/PrincetonUniversity/openpiton/blob/3cc7bf4d3d1ee2f8e18c33eda6c136a57222806b/piton/ariane_setup.sh 9 | 10 | # tool install directory 11 | export RISCV=$RISCV_ROOT 12 | 13 | # init cva6 submodule 14 | git submodule update --init --recursive 15 | 16 | # PATHS 17 | export CVA6_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/cva6" && pwd) 18 | export LIBRARY_PATH=$RISCV/lib 19 | export LD_LIBRARY_PATH=$RISCV/lib 20 | export C_INCLUDE_PATH=$RISCV/include 21 | export CPLUS_INCLUDE_PATH=$RISCV/include 22 | --------------------------------------------------------------------------------