├── .gitmodules ├── hardware ├── src │ └── iob_cache.sdc ├── simulation │ ├── uut.gtkw.license │ ├── src │ │ └── iob_v_tb.vh │ ├── cache_coverage.waiver │ ├── sim_build.mk │ ├── uut.gtkw │ └── iob_cache_sim_wrapper │ │ └── iob_cache_sim_wrapper.py ├── fpga │ ├── fpga_build.mk │ └── vivado │ │ ├── iob_aes_ku040_db_g │ │ └── iob_cache_dev.sdc │ │ └── iob_cache_tool.sdc ├── lint │ ├── lint_build.mk │ └── verilator_waiver.vlt └── modules │ ├── iob_cache_memory │ ├── hardware │ │ └── src │ │ │ ├── iob_cache_onehot_to_bin.v │ │ │ ├── iob_cache_gen_sp_ram.v │ │ │ ├── iob_cache_replacement_policy.v │ │ │ └── iob_cache_memory.v │ └── iob_cache_memory.py │ ├── iob_cache_back_end_iob │ ├── hardware │ │ └── src │ │ │ ├── iob_cache_back_end_iob.v │ │ │ ├── iob_cache_read_channel_iob.v │ │ │ └── iob_cache_write_channel_iob.v │ └── iob_cache_back_end_iob.py │ ├── iob_cache_control │ ├── iob_cache_control.py │ └── hardware │ │ ├── axi │ │ └── iob_cache_control_axi.v │ │ └── iob │ │ └── iob_cache_control_iob.v │ ├── iob_cache_back_end_axi │ ├── hardware │ │ └── src │ │ │ ├── iob_cache_back_end_axi.v │ │ │ ├── iob_cache_read_channel_axi.v │ │ │ └── iob_cache_write_channel_axi.v │ └── iob_cache_back_end_axi.py │ └── iob_cache_front_end │ └── iob_cache_front_end.py ├── CITATION.cff.license ├── document ├── figures │ ├── bd.odg │ ├── inst.odg │ ├── symb.odg │ ├── tbbd.odg │ ├── bd.odg.license │ ├── inst.odg.license │ ├── symb.odg.license │ └── tbbd.odg.license ├── tsrc │ ├── name.tex │ ├── pb_title.tex │ ├── revhist_desc.tex │ ├── ug_title.tex │ ├── benefits.tex │ ├── deliverables.tex │ ├── sim_desc.tex │ ├── features.tex │ ├── intro.tex │ ├── pb.tex │ ├── inst_desc.tex │ └── if.tex └── document.mk ├── software ├── sw_build.mk └── src │ ├── iob_cache_csrs_pc_emul.c │ └── iob_core_tb.c ├── TODO ├── CITATION.cff ├── LICENSES └── MIT.txt ├── LICENSE ├── .github └── workflows │ ├── release-artifacts.yml │ └── ci.yml ├── default.nix ├── README.md ├── Makefile └── iob_cache.py /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hardware/src/iob_cache.sdc: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /CITATION.cff.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: 2024 IObundle 2 | 3 | SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /document/figures/bd.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IObundle/iob-cache/HEAD/document/figures/bd.odg -------------------------------------------------------------------------------- /document/figures/inst.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IObundle/iob-cache/HEAD/document/figures/inst.odg -------------------------------------------------------------------------------- /document/figures/symb.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IObundle/iob-cache/HEAD/document/figures/symb.odg -------------------------------------------------------------------------------- /document/figures/tbbd.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IObundle/iob-cache/HEAD/document/figures/tbbd.odg -------------------------------------------------------------------------------- /document/figures/bd.odg.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: 2024 IObundle 2 | 3 | SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /document/figures/inst.odg.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: 2024 IObundle 2 | 3 | SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /document/figures/symb.odg.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: 2024 IObundle 2 | 3 | SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /document/figures/tbbd.odg.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: 2024 IObundle 2 | 3 | SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /hardware/simulation/uut.gtkw.license: -------------------------------------------------------------------------------- 1 | SPDX-FileCopyrightText: 2024 IObundle 2 | 3 | SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /document/tsrc/name.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | IOb-Cache 6 | -------------------------------------------------------------------------------- /software/sw_build.mk: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | UTARGETS=tb 6 | TB_INCLUDES=-I./src 7 | -------------------------------------------------------------------------------- /document/tsrc/pb_title.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \title{IOb-Cache, a Configurable Cache} 6 | -------------------------------------------------------------------------------- /hardware/fpga/fpga_build.mk: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | FPGA_TOP:=iob_cache 6 | IS_FPGA:=0 7 | QUARTUS_SEED=10 8 | 9 | -------------------------------------------------------------------------------- /document/tsrc/revhist_desc.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \input{\NAME_version} & May/30/2022 & JTS & Document released. \\ \hline 6 | -------------------------------------------------------------------------------- /document/document.mk: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | #tests 6 | TEST_LIST:=pb.pdf doc-clean ug.pdf doc-clean 7 | 8 | .PHONY: $(TEST_LIST) 9 | -------------------------------------------------------------------------------- /hardware/lint/lint_build.mk: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | VFLAGS+=--top-module $(LINT_TOP) 6 | VFLAGS+=--waiver-output cache_waivers.vlt 7 | -------------------------------------------------------------------------------- /hardware/fpga/vivado/iob_aes_ku040_db_g/iob_cache_dev.sdc: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | set clk_period 10.0 6 | set input_delay [expr $clk_period * 0.25 + 0.9] 7 | -------------------------------------------------------------------------------- /hardware/simulation/src/iob_v_tb.vh: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `include "iob_uut_conf.vh" 6 | // FE_ADDR_W + 1 (USE_CTRL) 7 | `define IOB_CSRS_ADDR_W (`IOB_UUT_FE_ADDR_W+1) 8 | -------------------------------------------------------------------------------- /document/tsrc/ug_title.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \title{% 6 | \Huge IOb-Cache \\ 7 | \vspace*{3cm} 8 | \Large A Configurable Cache 9 | } 10 | 11 | \header{IOb-Cache, a Configurable Cache} 12 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 1. Check invalidate while read request completes (see iob-soc ext_mem old fix) 6 | 2. Move doc building to build directory 7 | 3. Fix testbenches 8 | 4. Finish doc and respond to issues 9 | -------------------------------------------------------------------------------- /hardware/fpga/vivado/iob_cache_tool.sdc: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | create_clock -name "clk" -add -period 10.0 [get_ports clk] 6 | set_property CFGBVS VCCO [current_design] 7 | set_property HD.CLK_SRC BUFGCTRL_X0Y0 [get_ports clk] 8 | -------------------------------------------------------------------------------- /hardware/simulation/cache_coverage.waiver: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | // IOb-Cache verilator coverage waivers 6 | 7 | // waiver structure: 8 | // waive filename:line[:line] [reason] 9 | // waive module.v:19 "Coverage waiver description." 10 | -------------------------------------------------------------------------------- /document/tsrc/benefits.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \begin{itemize} 6 | \itemsep-0.5em 7 | \item Easy to integrate hardware and bare-metal software 8 | \item Low usage of FPGA resources or silicon area 9 | \item Operating frequency suitable for low-cost FPGAs and less recent ASIC nodes 10 | \item Low power consumption 11 | \end{itemize} 12 | -------------------------------------------------------------------------------- /hardware/simulation/sim_build.mk: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | # Custom Coverage Analysis 6 | CUSTOM_COVERAGE_FLAGS=cov_annotated 7 | CUSTOM_COVERAGE_FLAGS+=-E iob_uut.v 8 | CUSTOM_COVERAGE_FLAGS+=-E iob_ram_sp_be.v 9 | CUSTOM_COVERAGE_FLAGS+=-E iob_reg_ca.v 10 | CUSTOM_COVERAGE_FLAGS+=--waive cache_coverage.waiver 11 | CUSTOM_COVERAGE_FLAGS+=--waived-tag 12 | CUSTOM_COVERAGE_FLAGS+=-o cache_coverage.rpt 13 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below. If you are an author, submit a pull request to add your name" 3 | authors: 4 | - family-names: "Roque" 5 | given-names: "Joao" 6 | - family-names: "T. de Sousa" 7 | given-names: "Jose" 8 | orcid: "https://orcid.org/0000-0001-7525-7546" 9 | title: "IOb-Cache" 10 | version: 0.1.0 11 | doi: 10.3390/a14080218 12 | date-released: 2020-07-20 13 | url: "https://github.com/github-linguist/linguist" 14 | -------------------------------------------------------------------------------- /document/tsrc/deliverables.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \begin{itemize} 6 | \itemsep-0.5em 7 | \item Verilog source code and testbench 8 | \item Verilator testbench 9 | \item FPGA implementation scripts 10 | \item Bare-metal software driver 11 | \item Documentation Latex sources 12 | \item Example System on Chip using IOb-Cache 13 | \end{itemize} 14 | -------------------------------------------------------------------------------- /document/tsrc/sim_desc.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | The above paragraph describes a desirable simulation setup, but IOb-Cache's 6 | simulation environment still lacks a modular simulation structure. Currently, 7 | only a set of primary non-pipelined write followed by read tests is 8 | implemented. However, IOb-Cache has been thoroughly verified in-system, with two 9 | cache levels. Various open-source RISC-V processors have proven that IOb-Cache 10 | works well: PicoRV32, SSRV, VexRISCV, and DarkRV. 11 | \clearpage 12 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_memory/hardware/src/iob_cache_onehot_to_bin.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | /*---------------------------*/ 8 | /* One-Hot to Binary Encoder */ 9 | /*---------------------------*/ 10 | 11 | // One-hot to binary encoder (if input is (0)0 or (0)1, the output is 0) 12 | module iob_cache_onehot_to_bin #( 13 | parameter BIN_W = 2 14 | ) ( 15 | input [2**BIN_W-1:1] onehot_i, 16 | output reg [ BIN_W-1:0] bin_o 17 | ); 18 | 19 | reg [BIN_W-1:0] bin_cnt; 20 | integer i; 21 | 22 | always @(onehot_i) begin : onehot_to_binary_encoder 23 | bin_cnt = 0; 24 | for (i = 1; i < 2 ** BIN_W; i = i + 1) if (onehot_i[i]) bin_cnt = bin_cnt | i[BIN_W-1:0]; 25 | bin_o = bin_cnt; 26 | end 27 | 28 | endmodule 29 | -------------------------------------------------------------------------------- /software/src/iob_cache_csrs_pc_emul.c: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2024 IObundle 3 | * 4 | * SPDX-License-Identifier: MIT 5 | */ 6 | 7 | /* PC Emulation of CACHE peripheral */ 8 | #include "iob_cache_csrs.h" 9 | 10 | static int base; 11 | void IOB_CACHE_INIT_BASEADDR(uint32_t addr) { base = addr; } 12 | 13 | // Core Setters 14 | void IOB_CACHE_SET_RST_CNTRS(uint8_t value) { return; } 15 | 16 | void IOB_CACHE_SET_INVALIDATE(uint8_t value) { return; } 17 | 18 | // Core Getters 19 | uint8_t IOB_CACHE_GET_WTB_EMPTY() { return 1; } 20 | 21 | uint8_t IOB_CACHE_GET_WTB_FULL() { return 0; } 22 | 23 | uint32_t IOB_CACHE_GET_RW_HIT() { return 0; } 24 | 25 | uint32_t IOB_CACHE_GET_RW_MISS() { return 0; } 26 | 27 | uint32_t IOB_CACHE_GET_READ_HIT() { return 0; } 28 | 29 | uint32_t IOB_CACHE_GET_READ_MISS() { return 0; } 30 | 31 | uint32_t IOB_CACHE_GET_WRITE_HIT() { return 0; } 32 | 33 | uint32_t IOB_CACHE_GET_WRITE_MISS() { return 0; } 34 | 35 | uint16_t IOB_CACHE_GET_VERSION() { return 0x0010; } 36 | -------------------------------------------------------------------------------- /document/tsrc/features.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \begin{itemize} 6 | \itemsep-0.5em 7 | \item Pipelined operation allowing consecutive one-cycle reads and writes 8 | \item IObundle's Native Pipelined Interface (NPI) front-end native interface on the processor side (front-end) 9 | \item NPI or AXI4 interface on the memory side (back-end) 10 | \item Configurable address and data widths on the front-end and back-end interfaces for supporting a variety of different systems 11 | \item Configurable number of lines and words per line 12 | \item Configurable K-Way Set-Associativity ($k \geq 1$) 13 | \item Configurable line replacement policy: LRU, MRU-based PLRU, and tree-based PLRU. 14 | \item Configurable Write-Through Not-Allocate and Write-Back Allocate policies 15 | \item Configurable Write-Through buffer depth 16 | \item Optional control address space for cache invalidation, accessing the write through buffer status and read/write hit/miss counters 17 | \end{itemize} 18 | -------------------------------------------------------------------------------- /LICENSES/MIT.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jose T. de Sousa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /document/tsrc/intro.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | IOb-Cache is an open-source configurable pipelined memory cache. The 6 | processor-side interface (front-end) uses IObundle's Native Pipelined Interface 7 | (NPI). The memory-side interface (back-end) can also be configured to use NPI or 8 | the widely used AXI4 interface. The address and data widths of the front-end and 9 | back-end are configurable to support multiple user cores and memories. IOb-Cache 10 | is a K-Way Set-Associative cache, where K can vary from 1 (directly mapped) to 8 11 | or more ways, provided the operating frequency after synthesis is 12 | acceptable. IOb-Cache supports the two most common write policies: Write-Through 13 | Not-Allocate and Write-Back Allocate. 14 | 15 | IOb-Cache was developed in the scope of João Roque's master's thesis in 16 | Electrical and Computer Engineering at the Instituto Superior Técnico of the 17 | University of Lisbon. The Verilog code works well in IObundle's IOb-SoC system 18 | (https://github.com/IObundle/iob-soc) both in simulation and FPGA. To be used in 19 | an ASIC, it would need to be lint-cleaned and verified more thoroughly by RTL 20 | simulation to achieve 100\% code coverage desirably. 21 | -------------------------------------------------------------------------------- /.github/workflows/release-artifacts.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | name: Release-Artifacts 6 | 7 | on: 8 | release: 9 | types: [created, edited, prereleased, published, released] 10 | # Allow manual workflow runs 11 | workflow_dispatch: 12 | 13 | # Set default shell as interactive (source ~/.bashrc) 14 | defaults: 15 | run: 16 | shell: bash -ieo pipefail {0} 17 | 18 | 19 | # Run only one instance of this workflow at a time 20 | # cancel-in-progress: stop running workflow and run latest instead 21 | concurrency: 22 | group: ${{ github.workflow }}-${{ github.ref }} 23 | cancel-in-progress: true 24 | 25 | jobs: 26 | build-and-upload-release-artifacts: 27 | runs-on: self-hosted 28 | timeout-minutes: 5 29 | steps: 30 | - name: Checkout repository 31 | uses: actions/checkout@v4 32 | with: 33 | submodules: 'recursive' 34 | - name: Clean untracked files and directories (like old submodules) 35 | run: git clean -ffdx 36 | - name: Build release artifacts 37 | run: make release-artifacts 38 | - name: Upload to release 39 | uses: softprops/action-gh-release@v2 40 | with: 41 | files: | 42 | *.tar.gz 43 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_memory/hardware/src/iob_cache_gen_sp_ram.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | /*---------------------------------*/ 8 | /* Byte-width generable iob-sp-ram */ 9 | /*---------------------------------*/ 10 | 11 | // For cycle that generates byte-width (single enable) single-port SRAM 12 | // older synthesis tool may require this approach 13 | 14 | module iob_cache_gen_sp_ram #( 15 | parameter DATA_W = 32, 16 | parameter ADDR_W = 10 17 | ) ( 18 | input clk_i, 19 | input en_i, 20 | input [DATA_W/8-1:0] we_i, 21 | input [ ADDR_W-1:0] addr_i, 22 | output [ DATA_W-1:0] data_o, 23 | input [ DATA_W-1:0] data_i 24 | ); 25 | 26 | genvar i; 27 | generate 28 | for (i = 0; i < (DATA_W / 8); i = i + 1) begin : g_ram 29 | iob_ram_sp #( 30 | .DATA_W(8), 31 | .ADDR_W(ADDR_W) 32 | ) iob_cache_mem ( 33 | .clk_i (clk_i), 34 | .en_i (en_i), 35 | .we_i (we_i[i]), 36 | .addr_i(addr_i), 37 | .d_o (data_o[8*i+:8]), 38 | .d_i (data_i[8*i+:8]) 39 | ); 40 | end 41 | endgenerate 42 | 43 | endmodule 44 | -------------------------------------------------------------------------------- /document/tsrc/pb.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \documentclass[twocolumn]{pb} 6 | 7 | %Params 8 | %\def\XILINX{1} 9 | %\def\INTEL{1} 10 | 11 | \usepackage{color,soul} 12 | \usepackage{float} 13 | \usepackage{makecell} 14 | \usepackage{lipsum} 15 | \usepackage[table,xcdraw]{xcolor} 16 | \usepackage{calc} 17 | \usepackage{eso-pic} 18 | \usepackage[binary-units=true]{siunitx} 19 | \graphicspath{{./figures/}} 20 | 21 | \input{pb_title} 22 | \category{Product Brief} 23 | %\confidential{} 24 | 25 | \newcommand\BackgroundPic{% 26 | \put(0,0){% 27 | \parbox[b][\paperheight]{\paperwidth}{% 28 | \vfill 29 | \centering 30 | \includegraphics[width=\paperwidth,height=\paperheight,% 31 | keepaspectratio]{bg.pdf}% 32 | \vfill 33 | }}} 34 | 35 | \input{color} 36 | 37 | \begin{document} 38 | \AddToShipoutPicture*{\BackgroundPic} 39 | 40 | \section*{Overview} 41 | \input{intro} 42 | 43 | \section*{Features} 44 | \input{features} 45 | 46 | \newpage 47 | \section*{Block Diagram} 48 | \input{bd} 49 | 50 | \section*{Deliverables} 51 | \input{deliverables} 52 | 53 | \section*{Benefits} 54 | \input{benefits} 55 | 56 | 57 | \ifdefined\RESULTS 58 | \section*{Implementation Results} 59 | \input{fpga_results} 60 | \input{asic_results} 61 | \fi 62 | 63 | \end{document} 64 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | name: CI 6 | 7 | # Set default shell as interactive (source ~/.bashrc) 8 | defaults: 9 | run: 10 | shell: bash -ieo pipefail {0} 11 | 12 | # Run only one instance of this workflow at a time 13 | # cancel-in-progress: stop running workflow and run latest instead 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.ref }} 16 | cancel-in-progress: true 17 | 18 | on: 19 | push: 20 | branches: '*' 21 | pull_request: 22 | branches: '*' 23 | # Allow manual workflow runs 24 | workflow_dispatch: 25 | 26 | jobs: 27 | 28 | cache: 29 | runs-on: self-hosted 30 | timeout-minutes: 30 31 | if: ${{ !cancelled() }} 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | with: 36 | submodules: 'recursive' 37 | - name: run simulation test 38 | run: make sim-test 39 | - name: run fpga test 40 | run: make fpga-test 41 | - name: run synthesis test (yosys) 42 | run: make syn-test 43 | 44 | reuse: 45 | runs-on: self-hosted 46 | timeout-minutes: 3 47 | if: ${{ !cancelled() }} 48 | needs: [ cache ] 49 | 50 | steps: 51 | - uses: actions/checkout@v4 52 | with: 53 | submodules: 'recursive' 54 | - name: Clean untracked files and directories (like old submodules) 55 | run: git clean -ffdx 56 | - name: Check compliance with REUSE specification 57 | run: nix-shell --run "reuse lint" 58 | -------------------------------------------------------------------------------- /document/tsrc/inst_desc.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | The figure shows a three-level memory hierarchy comprising L1 and L2 caches and 6 | a memory module. The Host drives the L1 cache through its front-end NPI 7 | interface (the user is free to develop other interfaces). The L1 and L2 caches 8 | are connected using another NPI interface since IOb-Cache's front-end interface 9 | only supports this. 10 | 11 | The {\tt wtb\_empty\_in} and {\tt wtb\_empty\_out} signals form a chain from the 12 | L1's front-end to the L2's back-end. As explained in the description of these 13 | signals, this chain ensures that the user's core knows that all write-through 14 | buffers across the cache hierarchy are empty. Note that the L1's {\tt 15 | wtb\_empty\_out} signal is floating because the Host uses the cache controller 16 | to query the write-through buffer status. The L2's {\tt wtb\_empty\_in} is tied 17 | high as L2 is the last cache in the hierarchy, and there are no more 18 | write-through buffers to its right-hand side. 19 | 20 | The {\tt invalidate\_in} and {\tt invalidate\_out} signals form another chain that 21 | ensures that the data in the whole cache hierarchy is invalidated, as explained 22 | in these signal's descriptions. Note that the L1's {\tt invalidate\_in} signal is 23 | tight to low as L1 is invalidated via the cache controller by writing to the 24 | respective address. The L2's {\tt invalidate\_out} signal is floating because L2 25 | is the last cache in the hierarchy, and there are no more caches to invalidate. 26 | 27 | Finally, L2 is connected to a memory module, and one can choose between NPI or 28 | AXI4 interfaces. In practice, most memory modules have a standard interface such 29 | as AXI4, which is themost common choice, although one may choose NPI in less 30 | usual simulation or FPGA prototyping scenarios. 31 | 32 | -------------------------------------------------------------------------------- /document/tsrc/if.tex: -------------------------------------------------------------------------------- 1 | % SPDX-FileCopyrightText: 2024 IObundle 2 | % 3 | % SPDX-License-Identifier: MIT 4 | 5 | \begin{longtable}{|l|c|c|p{10.5cm}|} 6 | \caption{General Interface Signals}\label{gen_tab:is}\\ \hline 7 | \rowcolor{iob-green} 8 | \multicolumn{1}{|c}{\bf Name} & \multicolumn{1}{|c|}{\bf Direction} & \multicolumn{1}{c|}{\bf Width} & \multicolumn{1}{c|}{\bf Description} \\ \hline \hline 9 | \input gen_if_tab 10 | \end{longtable} 11 | 12 | \begin{longtable}{|l|c|c|p{6.4cm}|} 13 | \caption{Front-End Interface Signals}\label{tab:if_fe}\\ \hline 14 | \rowcolor{iob-green} 15 | \multicolumn{1}{|c|}{\bf Name} & \multicolumn{1}{c|}{\bf Direction} & \multicolumn{1}{c|}{\bf Width} & \multicolumn{1}{c|}{\bf Description} \\ \hline \hline 16 | \input fe_if_tab 17 | \end{longtable} 18 | 19 | \begin{longtable}{|l|c|c|p{10.5cm}|} 20 | \caption{Invalidate and Write-Through Buffer Empty Chain Interface Signals}\label{tab:if_ie}\\ \hline 21 | \rowcolor{iob-green} 22 | \multicolumn{1}{|c}{\bf Name} & \multicolumn{1}{c|}{\bf Direction} & \multicolumn{1}{c|}{\bf Width} & \multicolumn{1}{c|}{\bf Description} \\ \hline \hline 23 | \input ie_if_tab 24 | \end{longtable} 25 | 26 | \begin{longtable}{|l|c|c|p{9.7cm}|} 27 | \caption{Native Back-End Interface Signals}\label{tab:if_be_native}\\ \hline 28 | \rowcolor{iob-green} 29 | \multicolumn{1}{|c|}{\bf Name} & \multicolumn{1}{c|}{\bf Direction} & \multicolumn{1}{c|}{\bf Width} & \multicolumn{1}{c|}{\bf Description} \\ \hline \hline 30 | \input be_if_tab 31 | \end{longtable} 32 | 33 | \begin{longtable}{|l|c|c|p{9.7cm}|} 34 | \caption{Back-End Interface Signals}\label{tab:if_be}\\ \hline 35 | \rowcolor{iob-green} 36 | \multicolumn{1}{|c|}{\bf Name} & \multicolumn{1}{c|}{\bf Direction} & \multicolumn{1}{c|}{\bf Width} & \multicolumn{1}{c|}{\bf Description} \\ \hline \hline 37 | \input axi_m_port_if_tab 38 | \end{longtable} 39 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | { pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/25.05.tar.gz") {} }: 6 | 7 | let 8 | py2hwsw_commit = "13d2f9d76e3af80c76293e6f5ad0db8593c3e405"; # Replace with the desired commit. 9 | py2hwsw_sha256 = "AjDDJZZgTEllKfImNtj5MyDP2A4eqVge6bP8fgo2STM="; # Replace with the actual SHA256 hash. 10 | # Get local py2hwsw root from `PY2HWSW_ROOT` env variable 11 | py2hwswRoot = builtins.getEnv "PY2HWSW_ROOT"; 12 | 13 | # For debug 14 | force_py2_build = 0; 15 | 16 | py2hwsw = 17 | # If no root is provided, or there is a root but we want to force a rebuild 18 | if py2hwswRoot == "" || force_py2_build != 0 then 19 | pkgs.python3.pkgs.buildPythonPackage rec { 20 | pname = "py2hwsw"; 21 | version = py2hwsw_commit; 22 | src = 23 | if py2hwswRoot != "" then 24 | # Root provided, use local 25 | pkgs.lib.cleanSource py2hwswRoot 26 | else 27 | # No root provided, use GitHub 28 | (pkgs.fetchFromGitHub { 29 | owner = "IObundle"; 30 | repo = "py2hwsw"; 31 | rev = py2hwsw_commit; 32 | sha256 = py2hwsw_sha256; 33 | fetchSubmodules = true; 34 | }).overrideAttrs (_: { 35 | GIT_CONFIG_COUNT = 1; 36 | GIT_CONFIG_KEY_0 = "url.https://github.com/.insteadOf"; 37 | GIT_CONFIG_VALUE_0 = "git@github.com:"; 38 | }); 39 | # Add any necessary dependencies here. 40 | #propagatedBuildInputs = [ pkgs.python38Packages.someDependency ]; 41 | } 42 | else 43 | null; 44 | 45 | extra_pkgs = with pkgs; [ 46 | # Define other Nix packages for your project here 47 | ]; 48 | 49 | in 50 | 51 | # If no root is provided, or there is a root but we want to force a rebuild 52 | if py2hwswRoot == "" || force_py2_build != 0 then 53 | # Use newly built nix package 54 | import "${py2hwsw}/lib/python${builtins.substring 0 4 pkgs.python3.version}/site-packages/py2hwsw/lib/default.nix" { py2hwsw_pkg = py2hwsw; extra_pkgs = extra_pkgs; } 55 | else 56 | # Use local 57 | import "${py2hwswRoot}/py2hwsw/lib/default.nix" { py2hwsw_pkg = py2hwsw; extra_pkgs = extra_pkgs; } 58 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_iob/hardware/src/iob_cache_back_end_iob.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_back_end_iob_conf.vh" 8 | 9 | module iob_cache_back_end_iob #( 10 | `include "iob_cache_back_end_iob_params.vs" 11 | ) ( 12 | `include "iob_cache_back_end_iob_io.vs" 13 | ); 14 | 15 | wire [BE_ADDR_W-1:0] be_addr_read, be_addr_write; 16 | wire be_valid_read, be_valid_write; 17 | wire be_ack; 18 | wire be_wack; 19 | wire be_wack_r; 20 | 21 | assign iob_addr_o = (be_valid_read) ? be_addr_read : be_addr_write; 22 | assign iob_valid_o = be_valid_read | be_valid_write; 23 | assign be_ack = iob_rvalid_i | be_wack_r; 24 | assign be_wack = iob_ready_i & iob_valid_o & (|iob_wstrb_o); 25 | 26 | iob_reg_care #( 27 | .DATA_W (1), 28 | .RST_VAL(0) 29 | ) iob_reg_be_wack ( 30 | .clk_i (clk_i), 31 | .arst_i(arst_i), 32 | .cke_i (cke_i), 33 | .rst_i (1'b0), 34 | .en_i (1'b1), 35 | .data_i(be_wack), 36 | .data_o(be_wack_r) 37 | ); 38 | 39 | iob_cache_read_channel_iob #( 40 | .FE_ADDR_W (FE_ADDR_W), 41 | .FE_DATA_W (FE_DATA_W), 42 | .BE_ADDR_W (BE_ADDR_W), 43 | .BE_DATA_W (BE_DATA_W), 44 | .WORD_OFFSET_W(WORD_OFFSET_W) 45 | ) read_fsm ( 46 | .clk_i (clk_i), 47 | .reset_i (arst_i), 48 | .replace_valid_i(replace_valid_i), 49 | .replace_addr_i (replace_addr_i), 50 | .replace_o (replace_o), 51 | .read_valid_o (read_valid_o), 52 | .read_addr_o (read_addr_o), 53 | .read_rdata_o (read_rdata_o), 54 | .be_addr_o (be_addr_read), 55 | .be_valid_o (be_valid_read), 56 | .be_ack_i (be_ack), 57 | .be_rdata_i (iob_rdata_i) 58 | ); 59 | 60 | iob_cache_write_channel_iob #( 61 | .ADDR_W (FE_ADDR_W), 62 | .DATA_W (FE_DATA_W), 63 | .BE_ADDR_W (BE_ADDR_W), 64 | .BE_DATA_W (BE_DATA_W), 65 | .WRITE_POL (WRITE_POL), 66 | .WORD_OFFSET_W(WORD_OFFSET_W) 67 | ) write_fsm ( 68 | .clk_i (clk_i), 69 | .reset_i(arst_i), 70 | 71 | .valid_i(write_valid_i), 72 | .addr_i (write_addr_i), 73 | .wstrb_i(write_wstrb_i), 74 | .wdata_i(write_wdata_i), 75 | .ready_o(write_ready_o), 76 | 77 | .be_addr_o (be_addr_write), 78 | .be_valid_o(be_valid_write), 79 | .be_ack_i (be_ack), 80 | .be_wdata_o(iob_wdata_o), 81 | .be_wstrb_o(iob_wstrb_o) 82 | ); 83 | 84 | endmodule 85 | -------------------------------------------------------------------------------- /hardware/simulation/uut.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.3.113 (w)1999-2022 BSI 3 | [*] Tue Nov 21 19:09:44 2023 4 | [*] 5 | [dumpfile] "(null)" 6 | [savefile] "/home/jsousa/sandbox/iob-soc/submodules/CACHE/hardware/simulation/uut.gtkw" 7 | [timestart] 21014700 8 | [size] 1910 1014 9 | [pos] -1 -1 10 | *-16.175915 21245000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 11 | [treeopen] iob_cache_tb. 12 | [treeopen] iob_cache_tb.uut. 13 | [treeopen] iob_cache_tb.uut.cache. 14 | [treeopen] iob_cache_tb.uut.cache.back_end. 15 | [treeopen] iob_cache_tb.uut.cache.cache_memory. 16 | [treeopen] iob_cache_tb.uut.cache.cache_memory.g_n_ways_block[0]. 17 | [treeopen] iob_cache_tb.uut.cache.cache_memory.g_n_ways_block[0].g_line2mem_block[0]. 18 | [treeopen] iob_cache_tb.uut.cache.cache_memory.g_n_ways_block[0].g_line2mem_block[0].g_BE_block[0]. 19 | [sst_width] 380 20 | [signals_width] 262 21 | [sst_expanded] 1 22 | [sst_vpaned_height] 334 23 | @200 24 | -tb 25 | @28 26 | iob_cache_tb.uut.cache.cache_memory.clk_i 27 | @22 28 | iob_cache_tb.iob_addr_i[23:0] 29 | iob_cache_tb.iob_rdata_o[31:0] 30 | iob_cache_tb.iob_wdata_i[31:0] 31 | iob_cache_tb.iob_wstrb_i[3:0] 32 | iob_cache_tb.uut.cache.iob_addr_i[21:0] 33 | @200 34 | -sim wrapper 35 | @22 36 | iob_cache_tb.uut.iob_addr_i[21:0] 37 | iob_cache_tb.uut.iob_rdata_o[31:0] 38 | iob_cache_tb.uut.iob_wdata_i[31:0] 39 | iob_cache_tb.uut.iob_wstrb_i[3:0] 40 | @200 41 | -cache 42 | @22 43 | iob_cache_tb.uut.cache.iob_addr_i[21:0] 44 | iob_cache_tb.uut.cache.iob_rdata_o[31:0] 45 | iob_cache_tb.uut.cache.iob_wdata_i[31:0] 46 | iob_cache_tb.uut.cache.iob_wstrb_i[3:0] 47 | iob_cache_tb.uut.cache.be_rdata_i[31:0] 48 | @200 49 | -cache memory 50 | @28 51 | iob_cache_tb.uut.cache.cache_memory.ready_o 52 | iob_cache_tb.uut.cache.cache_memory.write_miss_o 53 | iob_cache_tb.uut.cache.cache_memory.read_miss_o 54 | iob_cache_tb.uut.cache.cache_memory.hit 55 | iob_cache_tb.uut.cache.cache_memory.req_i 56 | iob_cache_tb.uut.cache.cache_memory.write_access 57 | @22 58 | iob_cache_tb.uut.cache.cache_memory.write_wdata_o[31:0] 59 | iob_cache_tb.uut.cache.cache_memory.write_wstrb_o[3:0] 60 | @28 61 | iob_cache_tb.uut.cache.cache_memory.write_req_o 62 | iob_cache_tb.uut.cache.cache_memory.read_addr_i[2:0] 63 | @23 64 | iob_cache_tb.uut.cache.cache_memory.read_rdata_i[31:0] 65 | @200 66 | -front-end 67 | @28 68 | iob_cache_tb.uut.cache.front_end.ready_i 69 | @200 70 | -back end 71 | @22 72 | iob_cache_tb.uut.cache.back_end.be_rdata_i[31:0] 73 | @200 74 | -read_channel 75 | @22 76 | iob_cache_tb.uut.cache.back_end.read_fsm.be_rdata_i[31:0] 77 | @200 78 | -write_channel 79 | -axi_ram 80 | -iob_ram 81 | @22 82 | iob_cache_tb.uut.native_ram.addr_i[21:0] 83 | iob_cache_tb.uut.native_ram.d_i[31:0] 84 | iob_cache_tb.uut.native_ram.we_i[3:0] 85 | @28 86 | iob_cache_tb.uut.native_ram.clk_i 87 | @22 88 | iob_cache_tb.uut.native_ram.d_o[31:0] 89 | iob_cache_tb.uut.native_ram.d_o_int[31:0] 90 | @28 91 | iob_cache_tb.uut.native_ram.en_i 92 | @420 93 | iob_cache_tb.uut.native_ram.i 94 | [pattern_trace] 1 95 | [pattern_trace] 0 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # IOb-cache 8 | 9 | IOb-cache is a high-performance, configurable open-source Verilog cache. If you use or like this repository, please cite the following article: 10 | Roque, J.V.; Lopes, J.D.; Véstias, M.P.; de Sousa, J.T. IOb-Cache: A High-Performance Configurable Open-Source Cache. Algorithms 2021, 14, 218. https://doi.org/10.3390/a14080218 11 | 12 | IOb-cache supports pipeline architectures, allowing one request per clock cycle (read and write). 13 | IOb-cache has both Native `IOb` (pipelined) and `AXI4` back-end interfaces. 14 | The Write Policy is configurable: either write-through/not-allocate or write-back/allocate. 15 | The configuration supports the number of ways, address width, cache's word size (front-end data width), the memory's word size (back-end data width), the number of lines and words per line, replacement policy (if set associative), and cache-control module (allows performance measurement, cache invalidation, and write-through buffer status). 16 | 17 | 18 | ## Setup using Py2hwsw 19 | 20 | IOb-Cache uses Py2hwsw to generate the hardware and software components. To 21 | install Py2hwsw, follow the instructions in the [Py2hwsw 22 | repository](https://github.com/IObundle/py2hwsw). The file iob_cache.py is 23 | IOb-Cache's Py2hwsw description, which you can update to your needs. 24 | 25 | Edit the Makefile file to set the back-end interface type (BE_IF) and width 26 | (BE_DATA_W) according to your needs at compile time. These variables can also be 27 | passed at the command line. You can also change the SIMULATOR variable used to 28 | select a specific simulator or the DOC variable used to choose a document type 29 | to generate. The Makefile provides the following targets for simulation, FPGA 30 | synthesis, and documentation generation. 31 | 32 | ## Quick setup 33 | 34 | Py2HWSW runs on a Nix shell. First, download and install 35 | [nix-shell](https://nixos.org/download.html#nix-install-linux). 36 | 37 | To generate the Verilog sources, call the 'setup' Makefile target: 38 | ``` 39 | make setup 40 | ``` 41 | The sources are generated in the `../iob_cache_Vx.y/hardware/src/` directory, where Vx.y is the current version of IOb-cache. 42 | The generated top level module is either `iob_cache_iob.v` or `iob_cache_axi.v`, depending on the value of the BE_IF Makefile variable. 43 | 44 | To run in simulation, call the 'sim-run' Makefile target: 45 | ``` 46 | make sim-run 47 | ``` 48 | 49 | ## Pre-built files 50 | 51 | Two [FuseSoC](https://github.com/olofk/fusesoc)-compatible pre-built versions of iob-cache, with AXI4 and IOb back-end interfaces, are available in the [repository's release page](https://github.com/IObundle/iob-cache/releases). 52 | The Cache's Verilog sources are available in the `iob_cache_*/hardware/src/` directory of the compressed tar.gz files in the release page. 53 | 54 | To use these pre-built cores in FuseSoC, extract the compressed tar.gz files to a [FuseSoC library directory](https://fusesoc.readthedocs.io/en/stable/user/overview.html#discover-cores-the-package-manager). 55 | 56 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_control/iob_cache_control.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | import os 6 | from pathlib import Path 7 | import shutil 8 | 9 | 10 | def setup(py_params: dict): 11 | 12 | be_if = py_params.get("be_if", "axi") 13 | 14 | # Create dictionary with attributes of cache 15 | attributes_dict = { 16 | "generate_hw": False, 17 | } 18 | # 19 | # Confs 20 | # 21 | attributes_dict["confs"] = [ 22 | { 23 | "name": "DATA_W", 24 | "descr": "Data width", 25 | "type": "P", 26 | "val": "32", 27 | "min": "NA", 28 | "max": "NA", 29 | }, 30 | { 31 | "name": "USE_CTRL_CNT", 32 | "descr": "Instantiates hit/miss counters for reads, writes or both (1), or not (0). This parameter is meaningful if the cache controller is present (USE_CTRL: 1), providing additional software accessible functions for these functions.", 33 | "type": "P", 34 | "val": "1", 35 | "min": "0", 36 | "max": "1", 37 | }, 38 | ] 39 | # 40 | # Ports 41 | # 42 | attributes_dict["ports"] = [ 43 | { 44 | "name": "clk_en_rst_s", 45 | "descr": "Clock, clock enable and reset", 46 | "signals": { 47 | "type": "iob_clk", 48 | "params": "c_a", 49 | }, 50 | }, 51 | { 52 | "name": "cache_ctrl_io", 53 | "descr": "", 54 | "signals": [ 55 | {"name": "valid_i", "width": 1}, 56 | {"name": "addr_i", "width": f"`IOB_CACHE_{be_if.upper()}_CSRS_ADDR_W"}, 57 | {"name": "wstrb_i", "width": "DATA_W/8"}, 58 | {"name": "wtbuf_full_i", "width": 1}, 59 | {"name": "wtbuf_empty_i", "width": 1}, 60 | {"name": "write_hit_i", "width": 1}, 61 | {"name": "write_miss_i", "width": 1}, 62 | {"name": "read_hit_i", "width": 1}, 63 | {"name": "read_miss_i", "width": 1}, 64 | {"name": "rdata_o", "width": "DATA_W", "isvar": True}, 65 | {"name": "ready_o", "width": 1, "isvar": True}, 66 | {"name": "invalidate_o", "width": 1, "isvar": True}, 67 | ], 68 | }, 69 | ] 70 | # 71 | # Wires 72 | # 73 | attributes_dict["wires"] = [] 74 | # 75 | # Subblocks 76 | # 77 | attributes_dict["subblocks"] = [] 78 | # 79 | # Snippets 80 | # 81 | attributes_dict["snippets"] = [] 82 | 83 | # Copy correct iob_cache_control according to cache backend interface 84 | # Backend interface type ["axi", "iob"] 85 | hw_src = os.path.dirname(os.path.realpath(__file__)) 86 | hw_src = f"{hw_src}/hardware/{be_if}/iob_cache_control_{be_if}.v" 87 | hw_dst = f"{py_params['build_dir']}/hardware/src/" 88 | Path(hw_dst).mkdir(parents=True, exist_ok=True) 89 | shutil.copy2(hw_src, f"{hw_dst}/iob_cache_control.v") 90 | 91 | return attributes_dict 92 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_axi/hardware/src/iob_cache_back_end_axi.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_back_end_axi_conf.vh" 8 | 9 | module iob_cache_back_end_axi #( 10 | `include "iob_cache_back_end_axi_params.vs" 11 | ) ( 12 | `include "iob_cache_back_end_axi_io.vs" 13 | ); 14 | 15 | iob_cache_read_channel_axi #( 16 | .ADDR_W (FE_ADDR_W), 17 | .DATA_W (FE_DATA_W), 18 | .BE_ADDR_W (AXI_ADDR_W), 19 | .BE_DATA_W (AXI_DATA_W), 20 | .WORD_OFFSET_W(WORD_OFFSET_W), 21 | .AXI_ADDR_W (AXI_ADDR_W), 22 | .AXI_DATA_W (AXI_DATA_W), 23 | .AXI_ID_W (AXI_ID_W), 24 | .AXI_LEN_W (AXI_LEN_W), 25 | .AXI_ID (AXI_ID) 26 | ) read_fsm ( 27 | .replace_valid_i(replace_valid_i), 28 | .replace_addr_i (replace_addr_i), 29 | .replace_o (replace_o), 30 | .read_valid_o (read_valid_o), 31 | .read_addr_o (read_addr_o), 32 | .read_rdata_o (read_rdata_o), 33 | 34 | .axi_araddr_o (axi_araddr_o), 35 | .axi_arprot_o (), 36 | .axi_arvalid_o(axi_arvalid_o), 37 | .axi_arready_i(axi_arready_i), 38 | .axi_rdata_i (axi_rdata_i), 39 | .axi_rresp_i (axi_rresp_i), 40 | .axi_rvalid_i (axi_rvalid_i), 41 | .axi_rready_o (axi_rready_o), 42 | .axi_arid_o (axi_arid_o), 43 | .axi_arlen_o (axi_arlen_o), 44 | .axi_arsize_o (axi_arsize_o), 45 | .axi_arburst_o(axi_arburst_o), 46 | .axi_arlock_o (axi_arlock_o), 47 | .axi_arcache_o(axi_arcache_o), 48 | .axi_arqos_o (axi_arqos_o), 49 | .axi_rid_i (axi_rid_i), 50 | .axi_rlast_i (axi_rlast_i), 51 | 52 | .clk_i (clk_i), 53 | .reset_i(arst_i) 54 | ); 55 | 56 | iob_cache_write_channel_axi #( 57 | .ADDR_W (FE_ADDR_W), 58 | .DATA_W (FE_DATA_W), 59 | .BE_ADDR_W (AXI_ADDR_W), 60 | .BE_DATA_W (AXI_DATA_W), 61 | .WRITE_POL (WRITE_POL), 62 | .WORD_OFFSET_W(WORD_OFFSET_W), 63 | .AXI_ADDR_W (AXI_ADDR_W), 64 | .AXI_DATA_W (AXI_DATA_W), 65 | .AXI_ID_W (AXI_ID_W), 66 | .AXI_LEN_W (AXI_LEN_W), 67 | .AXI_ID (AXI_ID) 68 | ) write_fsm ( 69 | .valid_i(write_valid_i), 70 | .addr_i (write_addr_i), 71 | .wstrb_i(write_wstrb_i), 72 | .wdata_i(write_wdata_i), 73 | .ready_o(write_ready_o), 74 | 75 | .axi_awaddr_o (axi_awaddr_o), 76 | .axi_awprot_o (), 77 | .axi_awvalid_o(axi_awvalid_o), 78 | .axi_awready_i(axi_awready_i), 79 | .axi_wdata_o (axi_wdata_o), 80 | .axi_wstrb_o (axi_wstrb_o), 81 | .axi_wvalid_o (axi_wvalid_o), 82 | .axi_wready_i (axi_wready_i), 83 | .axi_bresp_i (axi_bresp_i), 84 | .axi_bvalid_i (axi_bvalid_i), 85 | .axi_bready_o (axi_bready_o), 86 | .axi_awid_o (axi_awid_o), 87 | .axi_awlen_o (axi_awlen_o), 88 | .axi_awsize_o (axi_awsize_o), 89 | .axi_awburst_o(axi_awburst_o), 90 | .axi_awlock_o (axi_awlock_o), 91 | .axi_awcache_o(axi_awcache_o), 92 | .axi_awqos_o (axi_awqos_o), 93 | .axi_wlast_o (axi_wlast_o), 94 | .axi_bid_i (axi_bid_i), 95 | 96 | .clk_i (clk_i), 97 | .reset_i(arst_i) 98 | ); 99 | 100 | endmodule 101 | -------------------------------------------------------------------------------- /hardware/lint/verilator_waiver.vlt: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | // DESCRIPTION: Verilator output: Waivers generated with --waiver-output 6 | 7 | `verilator_config 8 | 9 | // unused instance port 10 | lint_off -rule PINCONNECTEMPTY -file "**/*_cache_memory.v" -match "Instance pin connected by name with empty reference: 'level_o'*" 11 | 12 | // Extra bits to match line_rdata signal width 13 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_memory.v" -match "Bits of signal are not used: 'line_rdata_tmp'[511:32]*" 14 | 15 | // Signals used if USE_CTRL=1 16 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_front_end.v" -match "Bits of signal are not used: 'iob_addr_i'[1:0]*" 17 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_front_end.v" -match "Signal is not used: 'ctrl_rdata_i'*" 18 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_front_end.v" -match "Signal is not used: 'ctrl_ack_i'*" 19 | 20 | // signals used in some generate cases 21 | // but generate cases are split, so we can't just put signals inside one of them 22 | // otherwise they get out scoped 23 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_memory.v" -match "Signal is not driven, nor used: 'dirty'*" 24 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_memory.v" -match "Signal is not driven, nor used: 'dirty_reg'*" 25 | 26 | // 27 | // IOb 28 | // 29 | 30 | // Bits can be used depending of module parameters 31 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Bits of signal are not used: 'data_addr'[2:0]*" 32 | 33 | 34 | // Signals used for USE_CTRL = 1 35 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'ctrl_req'*" 36 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'ctrl_addr'*" 37 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'ctrl_wstrb'*" 38 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'wtbuf_full'*" 39 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'write_hit'*" 40 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'write_miss'*" 41 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'read_hit'*" 42 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_iob.v" -match "Signal is not used: 'read_miss'*" 43 | 44 | // 45 | // AXI4 46 | // 47 | 48 | // Unused instance outputs 49 | lint_off -rule PINCONNECTEMPTY -file "**/*_cache_back_end_axi.v" -match "Instance pin connected by name with empty reference: 'axi_arprot_o'*" 50 | lint_off -rule PINCONNECTEMPTY -file "**/*_cache_back_end_axi.v" -match "Instance pin connected by name with empty reference: 'axi_awprot_o'*" 51 | 52 | // Bits can be used depending of module parameters 53 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Bits of signal are not used: 'data_addr'[2:0]*" 54 | 55 | // Signals used for USE_CTRL = 1 56 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'ctrl_req'*" 57 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'ctrl_addr'*" 58 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'ctrl_wstrb'*" 59 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'wtbuf_full'*" 60 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'write_hit'*" 61 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'write_miss'*" 62 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'read_hit'*" 63 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_axi.v" -match "Signal is not used: 'read_miss'*" 64 | 65 | // Signals kept for standard interface implementation 66 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_read_channel_axi.v" -match "Signal is not used: 'axi_rid_i'*" 67 | lint_off -rule UNUSEDSIGNAL -file "**/*_cache_write_channel_axi.v" -match "Signal is not used: 'axi_bid_i'*" 68 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | CORE := iob_cache 6 | 7 | SIMULATOR ?= verilator 8 | BOARD ?= iob_aes_ku040_db_g 9 | 10 | BE_IF ?= AXI4 11 | 12 | # Fill PY_PARAMS if not defined 13 | ifeq ($(PY_PARAMS),) 14 | ifneq ($(BE_IF),) 15 | PY_PARAMS:=$(PY_PARAMS):be_if=$(BE_IF) 16 | endif 17 | ifneq ($(BE_DATA_W),) 18 | PY_PARAMS:=$(PY_PARAMS):be_data_w=$(BE_DATA_W) 19 | endif 20 | # Remove first char (:) from PY_PARAMS 21 | PY_PARAMS:=$(shell echo $(PY_PARAMS) | cut -c2-) 22 | endif # ifndef PY_PARAMS 23 | 24 | BUILD_DIR ?= $(shell nix-shell --run "py2hwsw $(CORE) print_build_dir --py_params '$(PY_PARAMS)'") 25 | NAME ?= $(shell nix-shell --run "py2hwsw $(CORE) print_core_name --py_params '$(PY_PARAMS)'") 26 | VERSION ?= $(shell nix-shell --run "py2hwsw $(CORE) print_core_version --py_params '$(PY_PARAMS)'") 27 | 28 | 29 | DOC ?= ug 30 | 31 | all: sim-run 32 | 33 | setup: 34 | nix-shell --run "py2hwsw $(CORE) setup --no_verilog_lint --build_dir '$(BUILD_DIR)' --py_params '$(PY_PARAMS)' $(SETUP_ARGS)" 35 | 36 | sim-build: clean setup 37 | nix-shell --run "make -C $(BUILD_DIR) sim-build SIMULATOR=$(SIMULATOR)" 38 | 39 | sim-run: clean setup 40 | nix-shell --run "make -C $(BUILD_DIR) sim-run SIMULATOR=$(SIMULATOR)" 41 | 42 | sim-waves: 43 | nix-shell --run "make -C $(BUILD_DIR) sim-waves" 44 | 45 | sim-test: 46 | make sim-run SIMULATOR=icarus BE_IF=IOb 47 | make sim-run SIMULATOR=verilator BE_IF=IOb 48 | make sim-run SIMULATOR=icarus BE_IF=AXI4 49 | make sim-run SIMULATOR=verilator BE_IF=AXI4 50 | 51 | lint: clean setup 52 | nix-shell --run "make -C $(BUILD_DIR)/hardware/lint run" 53 | 54 | lint-test: 55 | make lint BE_IF=IOb 56 | make lint BE_IF=AXI4 57 | 58 | fpga-build: clean setup 59 | nix-shell --run "make -C $(BUILD_DIR) fpga-build FPGA_TOP=$(NAME) BOARD=$(BOARD)" 60 | 61 | fpga-test: 62 | make fpga-build BE_IF=IOb 63 | make fpga-build BE_IF=AXI4 64 | 65 | 66 | syn-build: clean setup 67 | nix-shell --run "make -C $(BUILD_DIR) syn-build" 68 | 69 | syn-test: 70 | make syn-build BE_IF=IOb 71 | make syn-build BE_IF=AXI4 72 | 73 | doc-build: clean setup 74 | nix-shell --run "make -C $(BUILD_DIR) doc-build DOC=$(DOC)" 75 | 76 | doc-view: $(BUILD_DIR)/document/$(DOC).pdf 77 | nix-shell --run "make -C $(BUILD_DIR) doc-view DOC=$(DOC)" 78 | 79 | $(BUILD_DIR)/document/$(DOC).pdf: doc-build 80 | 81 | .PHONY: all setup sim-build sim-run sim-waves sim-test fpga-build fpga-test doc-build doc-view 82 | 83 | clean: 84 | nix-shell --run "py2hwsw $(CORE) clean --build_dir '$(BUILD_DIR)'" 85 | @rm -rf ../*.summary ../*.rpt fusesoc_exports *.core 86 | @find . -name \*~ -delete 87 | 88 | .PHONY: clean 89 | 90 | fusesoc-export: clean setup 91 | nix-shell --run "py2hwsw $(CORE) export_fusesoc --build_dir '$(BUILD_DIR)' --py_params '$(PY_PARAMS)'" 92 | 93 | .PHONY: fusesoc-export 94 | 95 | CORE_NAME=$(shell nix-shell --run "py2hwsw $(CORE) print_core_name --py_params '$(PY_PARAMS)'") 96 | 97 | define MULTILINE_TEXT 98 | provider: 99 | name: url 100 | url: https://github.com/IObundle/iob-cache/releases/latest/download/$(CORE_NAME)_V$(VERSION).tar.gz 101 | filetype: tar 102 | endef 103 | 104 | # Generate independent fusesoc .core file. FuseSoC will obtain the Verilog sources from remote url with a pre-built build directory. 105 | export MULTILINE_TEXT 106 | fusesoc-core-file: fusesoc-export 107 | cp fusesoc_exports/$(CORE_NAME).core . 108 | # Append provider remote URL to .core file 109 | printf "\n%s\n" "$$MULTILINE_TEXT" >> $(CORE_NAME).core 110 | echo "Generated independent $(CORE_NAME).core file." 111 | 112 | .PHONY: fusesoc-core-file 113 | 114 | # Release Artifacts 115 | 116 | release-artifacts: 117 | make fusesoc-export BE_IF=AXI4 118 | tar -czf $(CORE)_axi_V$(VERSION).tar.gz -C ./fusesoc_exports . 119 | make fusesoc-export BE_IF=IOb 120 | tar -czf $(CORE)_iob_V$(VERSION).tar.gz -C ./fusesoc_exports . 121 | 122 | .PHONY: release-artifacts 123 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_control/hardware/axi/iob_cache_control_axi.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_control_conf.vh" 8 | `include "iob_cache_axi_csrs_conf.vh" 9 | `include "iob_cache_axi_csrs.vh" 10 | 11 | // Module responsible for performance measuring, information about the current 12 | // cache state, and other cache functions 13 | 14 | module iob_cache_control #( 15 | `include "iob_cache_control_params.vs" 16 | ) ( 17 | `include "iob_cache_control_io.vs" 18 | ); 19 | 20 | localparam WSTRB_W = DATA_W/8; 21 | localparam BYTE_SHIFT = $clog2(WSTRB_W); 22 | 23 | wire [`IOB_CACHE_AXI_CSRS_ADDR_W-1:0] addr_int; 24 | wire [($clog2(WSTRB_W)+1)-1:0] byte_offset; 25 | // TODO: add iob_prio_enc subblock 26 | iob_prio_enc #( 27 | .W (WSTRB_W), 28 | .MODE("LOW") 29 | ) prio_encoder0 ( 30 | .unencoded_i(wstrb_i), 31 | .encoded_o (byte_offset) 32 | ); 33 | 34 | // ignore address LSBs 35 | assign addr_int = {addr_i[`IOB_CACHE_AXI_CSRS_ADDR_W-1:BYTE_SHIFT], {BYTE_SHIFT{1'b0}}} + byte_offset; 36 | 37 | generate 38 | if (USE_CTRL_CNT) begin : g_ctrl_cnt 39 | reg [DATA_W-1:0] read_hit_cnt, read_miss_cnt, write_hit_cnt, write_miss_cnt; 40 | reg [DATA_W-1:0] hit_cnt, miss_cnt; 41 | reg reset_counters; 42 | 43 | always @(posedge clk_i, posedge arst_i) begin 44 | if (arst_i) begin 45 | read_hit_cnt <= {DATA_W{1'b0}}; 46 | read_miss_cnt <= {DATA_W{1'b0}}; 47 | write_hit_cnt <= {DATA_W{1'b0}}; 48 | write_miss_cnt <= {DATA_W{1'b0}}; 49 | hit_cnt <= {DATA_W{1'b0}}; 50 | miss_cnt <= {DATA_W{1'b0}}; 51 | end else begin 52 | if (reset_counters) begin 53 | read_hit_cnt <= {DATA_W{1'b0}}; 54 | read_miss_cnt <= {DATA_W{1'b0}}; 55 | write_hit_cnt <= {DATA_W{1'b0}}; 56 | write_miss_cnt <= {DATA_W{1'b0}}; 57 | hit_cnt <= {DATA_W{1'b0}}; 58 | miss_cnt <= {DATA_W{1'b0}}; 59 | end else if (read_hit_i) begin 60 | read_hit_cnt <= read_hit_cnt + 1'b1; 61 | end else if (write_hit_i) begin 62 | write_hit_cnt <= write_hit_cnt + 1'b1; 63 | end else if (read_miss_i) begin 64 | read_miss_cnt <= read_miss_cnt + 1'b1; 65 | read_hit_cnt <= read_hit_cnt - 1'b1; 66 | end else if (write_miss_i) begin 67 | write_miss_cnt <= write_miss_cnt + 1'b1; 68 | end else begin 69 | read_hit_cnt <= read_hit_cnt; 70 | read_miss_cnt <= read_miss_cnt; 71 | write_hit_cnt <= write_hit_cnt; 72 | write_miss_cnt <= write_miss_cnt; 73 | hit_cnt <= read_hit_cnt + write_hit_cnt; 74 | miss_cnt <= read_miss_cnt + write_miss_cnt; 75 | end 76 | end 77 | end 78 | 79 | always @(posedge clk_i) begin 80 | rdata_o <= {DATA_W{1'b0}}; 81 | invalidate_o <= 1'b0; 82 | reset_counters <= 1'b0; 83 | ready_o <= valid_i; // Sends acknowledge the next clock cycle after request (handshake) 84 | 85 | if (valid_i) begin 86 | if (wstrb_i == 0) begin // read operation 87 | if (addr_i == `IOB_CACHE_AXI_CSRS_RW_HIT_ADDR) rdata_o <= hit_cnt; 88 | else if (addr_i == `IOB_CACHE_AXI_CSRS_RW_MISS_ADDR) rdata_o <= miss_cnt; 89 | else if (addr_i == `IOB_CACHE_AXI_CSRS_READ_HIT_ADDR) rdata_o <= read_hit_cnt; 90 | else if (addr_i == `IOB_CACHE_AXI_CSRS_READ_MISS_ADDR) rdata_o <= read_miss_cnt; 91 | else if (addr_i == `IOB_CACHE_AXI_CSRS_WRITE_HIT_ADDR) rdata_o <= write_hit_cnt; 92 | else if (addr_i == `IOB_CACHE_AXI_CSRS_WRITE_MISS_ADDR) rdata_o <= write_miss_cnt; 93 | end else begin // write operation 94 | if (addr_int == `IOB_CACHE_AXI_CSRS_RST_CNTRS_ADDR) reset_counters <= 1'b1; 95 | else if (addr_int == `IOB_CACHE_AXI_CSRS_INVALIDATE_ADDR) invalidate_o <= 1'b1; 96 | end 97 | end 98 | end 99 | end else begin : g_no_ctrl_cnt 100 | always @(posedge clk_i) begin 101 | rdata_o <= {DATA_W{1'b0}}; 102 | invalidate_o <= 1'b0; 103 | ready_o <= valid_i; // Sends acknowledge the next clock cycle after request (handshake) 104 | if (valid_i) begin 105 | if (wstrb_i == 0) begin // read operation 106 | if (addr_i == `IOB_CACHE_AXI_CSRS_WTB_EMPTY_ADDR) rdata_o <= wtbuf_empty_i; 107 | else if (addr_i == `IOB_CACHE_AXI_CSRS_WTB_FULL_ADDR) rdata_o <= wtbuf_full_i; 108 | else if (addr_i == `IOB_CACHE_AXI_CSRS_VERSION_ADDR) 109 | rdata_o <= `IOB_CACHE_AXI_CSRS_VERSION; 110 | end else begin // write operation 111 | if (addr_int == `IOB_CACHE_AXI_CSRS_INVALIDATE_ADDR) invalidate_o <= 1'b1; 112 | end 113 | end 114 | end 115 | end 116 | 117 | endgenerate 118 | 119 | endmodule 120 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_control/hardware/iob/iob_cache_control_iob.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_control_conf.vh" 8 | `include "iob_cache_iob_csrs_conf.vh" 9 | `include "iob_cache_iob_csrs.vh" 10 | 11 | // Module responsible for performance measuring, information about the current 12 | // cache state, and other cache functions 13 | 14 | module iob_cache_control #( 15 | `include "iob_cache_control_params.vs" 16 | ) ( 17 | `include "iob_cache_control_io.vs" 18 | ); 19 | 20 | localparam WSTRB_W = DATA_W/8; 21 | localparam BYTE_SHIFT = $clog2(WSTRB_W); 22 | 23 | wire [`IOB_CACHE_IOB_CSRS_ADDR_W-1:0] addr_int; 24 | wire [($clog2(WSTRB_W)+1)-1:0] byte_offset; 25 | // TODO: add iob_prio_enc subblock 26 | iob_prio_enc #( 27 | .W (WSTRB_W), 28 | .MODE("LOW") 29 | ) prio_encoder0 ( 30 | .unencoded_i(wstrb_i), 31 | .encoded_o (byte_offset) 32 | ); 33 | 34 | // ignore address LSBs 35 | assign addr_int = {addr_i[`IOB_CACHE_IOB_CSRS_ADDR_W-1:BYTE_SHIFT], {BYTE_SHIFT{1'b0}}} + byte_offset; 36 | 37 | generate 38 | if (USE_CTRL_CNT) begin : g_ctrl_cnt 39 | reg [DATA_W-1:0] read_hit_cnt, read_miss_cnt, write_hit_cnt, write_miss_cnt; 40 | reg [DATA_W-1:0] hit_cnt, miss_cnt; 41 | reg reset_counters; 42 | 43 | always @(posedge clk_i, posedge arst_i) begin 44 | if (arst_i) begin 45 | read_hit_cnt <= {DATA_W{1'b0}}; 46 | read_miss_cnt <= {DATA_W{1'b0}}; 47 | write_hit_cnt <= {DATA_W{1'b0}}; 48 | write_miss_cnt <= {DATA_W{1'b0}}; 49 | hit_cnt <= {DATA_W{1'b0}}; 50 | miss_cnt <= {DATA_W{1'b0}}; 51 | end else begin 52 | if (reset_counters) begin 53 | read_hit_cnt <= {DATA_W{1'b0}}; 54 | read_miss_cnt <= {DATA_W{1'b0}}; 55 | write_hit_cnt <= {DATA_W{1'b0}}; 56 | write_miss_cnt <= {DATA_W{1'b0}}; 57 | hit_cnt <= {DATA_W{1'b0}}; 58 | miss_cnt <= {DATA_W{1'b0}}; 59 | end else if (read_hit_i) begin 60 | read_hit_cnt <= read_hit_cnt + 1'b1; 61 | end else if (write_hit_i) begin 62 | write_hit_cnt <= write_hit_cnt + 1'b1; 63 | end else if (read_miss_i) begin 64 | read_miss_cnt <= read_miss_cnt + 1'b1; 65 | read_hit_cnt <= read_hit_cnt - 1'b1; 66 | end else if (write_miss_i) begin 67 | write_miss_cnt <= write_miss_cnt + 1'b1; 68 | end else begin 69 | read_hit_cnt <= read_hit_cnt; 70 | read_miss_cnt <= read_miss_cnt; 71 | write_hit_cnt <= write_hit_cnt; 72 | write_miss_cnt <= write_miss_cnt; 73 | hit_cnt <= read_hit_cnt + write_hit_cnt; 74 | miss_cnt <= read_miss_cnt + write_miss_cnt; 75 | end 76 | end 77 | end 78 | 79 | always @(posedge clk_i) begin 80 | rdata_o <= {DATA_W{1'b0}}; 81 | invalidate_o <= 1'b0; 82 | reset_counters <= 1'b0; 83 | ready_o <= valid_i; // Sends acknowledge the next clock cycle after request (handshake) 84 | 85 | if (valid_i) begin 86 | if (wstrb_i == 0) begin // read operation 87 | if (addr_i == `IOB_CACHE_IOB_CSRS_RW_HIT_ADDR) rdata_o <= hit_cnt; 88 | else if (addr_i == `IOB_CACHE_IOB_CSRS_RW_MISS_ADDR) rdata_o <= miss_cnt; 89 | else if (addr_i == `IOB_CACHE_IOB_CSRS_READ_HIT_ADDR) rdata_o <= read_hit_cnt; 90 | else if (addr_i == `IOB_CACHE_IOB_CSRS_READ_MISS_ADDR) rdata_o <= read_miss_cnt; 91 | else if (addr_i == `IOB_CACHE_IOB_CSRS_WRITE_HIT_ADDR) rdata_o <= write_hit_cnt; 92 | else if (addr_i == `IOB_CACHE_IOB_CSRS_WRITE_MISS_ADDR) rdata_o <= write_miss_cnt; 93 | end else begin // write operation 94 | if (addr_int == `IOB_CACHE_IOB_CSRS_RST_CNTRS_ADDR) reset_counters <= 1'b1; 95 | else if (addr_int == `IOB_CACHE_IOB_CSRS_INVALIDATE_ADDR) invalidate_o <= 1'b1; 96 | end 97 | end 98 | end 99 | end else begin : g_no_ctrl_cnt 100 | always @(posedge clk_i) begin 101 | rdata_o <= {DATA_W{1'b0}}; 102 | invalidate_o <= 1'b0; 103 | ready_o <= valid_i; // Sends acknowledge the next clock cycle after request (handshake) 104 | if (valid_i) begin 105 | if (wstrb_i == 0) begin // read operation 106 | if (addr_i == `IOB_CACHE_IOB_CSRS_WTB_EMPTY_ADDR) rdata_o <= wtbuf_empty_i; 107 | else if (addr_i == `IOB_CACHE_IOB_CSRS_WTB_FULL_ADDR) rdata_o <= wtbuf_full_i; 108 | else if (addr_i == `IOB_CACHE_IOB_CSRS_VERSION_ADDR) 109 | rdata_o <= `IOB_CACHE_IOB_CSRS_VERSION; 110 | end else begin // write operation 111 | if (addr_int == `IOB_CACHE_IOB_CSRS_INVALIDATE_ADDR) invalidate_o <= 1'b1; 112 | end 113 | end 114 | end 115 | end 116 | 117 | endgenerate 118 | 119 | endmodule 120 | -------------------------------------------------------------------------------- /software/src/iob_core_tb.c: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2025 IObundle 3 | * 4 | * SPDX-License-Identifier: MIT 5 | */ 6 | 7 | #include "iob_cache_csrs.h" 8 | #include "iob_cache_csrs_conf.h" 9 | 10 | #include 11 | #include 12 | 13 | #define USE_CTRL (1) 14 | #define DATA_W (IOB_CACHE_CSRS_FE_DATA_W) 15 | #define FE_NBYTES_W (2) 16 | #define CACHE_DATA_ADDR_W (IOB_CACHE_CSRS_FE_ADDR_W) 17 | // address control after data addressing 18 | #define CACHE_CTRL_BASE (1 << (CACHE_DATA_ADDR_W)) 19 | 20 | static inline void use_ctrl() { iob_cache_csrs_init_baseaddr(CACHE_CTRL_BASE); } 21 | 22 | static inline void use_data() { iob_cache_csrs_init_baseaddr(0); } 23 | 24 | // print n dots (.), keep CPU busy to wait for cache 25 | void wait_print(uint32_t n) { 26 | uint32_t i = 0; 27 | printf("\tWait Print:"); 28 | for (i = 0; i < n; i++) { 29 | printf("."); 30 | } 31 | printf("\n"); 32 | } 33 | 34 | int simple_test(uint32_t n) { 35 | uint32_t i = 0; 36 | uint32_t failed = 0; 37 | uint32_t rdata = 0; 38 | uint32_t expected = 0; 39 | // write n words to cache 40 | for (i = 0; i < n * 4; i += 4) { 41 | iob_write(i, DATA_W, (3 * i)); 42 | } 43 | // read n words back 44 | for (i = 0; i < n * 4; i += 4) { 45 | rdata = iob_read(i, DATA_W); 46 | // check for valid data 47 | expected = 3 * i; 48 | if (rdata != expected) { 49 | failed++; 50 | printf("ERROR at address %d: got 0x%x, expected 0x%x\n", i, rdata, 51 | expected); 52 | } 53 | } 54 | return failed; 55 | } 56 | 57 | int data_test() { 58 | uint32_t failed = 0; 59 | uint32_t rdata = 0; 60 | uint32_t wdata[3] = {0, 0xFFFFFFFF, 0}; 61 | uint32_t ndata = 3; 62 | uint32_t i; 63 | 64 | // write data 65 | for (i = 0; i < ndata; i++) { 66 | iob_write(i * 4, DATA_W, wdata[i]); 67 | } 68 | 69 | wait_print(50); 70 | 71 | // read data 72 | for (i = 0; i < ndata; i++) { 73 | rdata = iob_read(i * 4, DATA_W); 74 | if (rdata != wdata[i]) { 75 | failed++; 76 | printf("DATA TEST ERROR at address %d: got 0x%x, expected 0x%x\n", i * 4, 77 | rdata, wdata[i]); 78 | } 79 | } 80 | return failed; 81 | } 82 | 83 | int address_test() { 84 | uint32_t failed = 0; 85 | uint32_t addr_w = CACHE_DATA_ADDR_W; 86 | uint32_t rdata = 0; 87 | uint32_t wdata[3] = {0x0F, 0x10, 0x0F}; 88 | uint32_t addr[3] = {0}; 89 | uint32_t ndata = 3; 90 | uint32_t i; 91 | uint32_t max_addr = (1 << addr_w) - 1; 92 | addr[1] = max_addr; 93 | 94 | // write data 95 | for (i = 0; i < ndata; i++) { 96 | iob_write(addr[i], DATA_W, wdata[i]); 97 | } 98 | wait_print(50); 99 | // read data 100 | for (i = 0; i < ndata; i++) { 101 | rdata = iob_read(addr[i], DATA_W); 102 | if (rdata != wdata[i]) { 103 | failed++; 104 | printf("ADDRESS TEST ERROR at address %d: got 0x%x, expected 0x%x\n", 105 | addr[i], rdata, wdata[i]); 106 | } 107 | } 108 | return failed; 109 | } 110 | 111 | int lru_test(uint32_t nways_w, uint32_t nlines_w) { 112 | uint32_t i = 0; 113 | uint32_t nways = (1 << nways_w); 114 | uint32_t addr_step = ((1 << nlines_w) * (DATA_W / 8)); 115 | uint32_t addr = 0; 116 | uint32_t wdata = 0xDEADBEEF; 117 | 118 | // Write data 119 | for (i = 0, addr = 0; i < (2 * nways); i++, addr += addr_step) { 120 | printf("\tLRU: mem[%x] = %x\n", addr, wdata); 121 | iob_write(addr, DATA_W, wdata); 122 | } 123 | 124 | // Read back data 125 | for (i = 0, addr = 0; i < (2 * nways); i++, addr += addr_step) { 126 | printf("\tLRU: mem[%x] = %x\n", addr, iob_read(addr, DATA_W)); 127 | } 128 | return 0; 129 | } 130 | 131 | void print_counters() { 132 | use_ctrl(); 133 | printf("\tCache Counters:\n"); 134 | printf("\tRW Hit:%d\n", iob_cache_csrs_get_RW_HIT()); 135 | printf("\tRW Miss:%d\n", iob_cache_csrs_get_RW_MISS()); 136 | printf("\tRead Hit:%d\n", iob_cache_csrs_get_READ_HIT()); 137 | printf("\tRead Miss:%d\n", iob_cache_csrs_get_READ_MISS()); 138 | printf("\tWrite Hit:%d\n", iob_cache_csrs_get_WRITE_HIT()); 139 | printf("\tWrite Miss:%d\n", iob_cache_csrs_get_WRITE_MISS()); 140 | } 141 | 142 | void wtb_status() { 143 | use_ctrl(); 144 | printf("\tWrite Buffer Status:\n"); 145 | printf("\t\tEmpty: %d\n", iob_cache_csrs_get_WTB_EMPTY()); 146 | printf("\t\tFull: %d\n", iob_cache_csrs_get_WTB_FULL()); 147 | } 148 | 149 | void reset_counters() { 150 | use_ctrl(); 151 | iob_cache_csrs_set_RST_CNTRS(1); 152 | } 153 | 154 | int ctrl_test() { 155 | 156 | printf("CTRL Test\n"); 157 | use_ctrl(); 158 | printf("\tVersion: %x\n", iob_cache_csrs_get_version()); 159 | 160 | wtb_status(); 161 | print_counters(); 162 | printf("\tResetting counters..."); 163 | 164 | reset_counters(); 165 | printf("done!\n"); 166 | print_counters(); 167 | 168 | iob_cache_csrs_set_INVALIDATE(1); 169 | iob_cache_csrs_set_INVALIDATE(0); 170 | 171 | return 0; 172 | } 173 | 174 | int iob_core_tb() { 175 | 176 | int failed = 0; 177 | 178 | // print welcome message 179 | printf("IOB CACHE testbench\n"); 180 | 181 | // print the reset message 182 | printf("Reset complete\n"); 183 | 184 | // init Cache Control 185 | iob_cache_csrs_init_baseaddr(CACHE_CTRL_BASE); 186 | 187 | // simple cache access test 188 | failed += simple_test(5); 189 | 190 | failed += data_test(); 191 | failed += address_test(); 192 | 193 | failed += lru_test(IOB_CACHE_CSRS_NWAYS_W, IOB_CACHE_CSRS_NLINES_W); 194 | 195 | failed += ctrl_test(); 196 | 197 | printf("CACHE test complete.\n"); 198 | return failed; 199 | } 200 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_iob/hardware/src/iob_cache_read_channel_iob.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_iob_conf.vh" 8 | 9 | module iob_cache_read_channel_iob #( 10 | parameter FE_ADDR_W = 1, 11 | parameter FE_DATA_W = 32, 12 | parameter BE_ADDR_W = `IOB_CACHE_IOB_BE_ADDR_W, 13 | parameter BE_DATA_W = `IOB_CACHE_IOB_BE_DATA_W, 14 | parameter WORD_OFFSET_W = `IOB_CACHE_IOB_WORD_OFFSET_W, 15 | //derived parameters 16 | parameter BE_NBYTES = BE_DATA_W / 8, 17 | parameter BE_NBYTES_W = $clog2(BE_NBYTES), 18 | parameter LINE2BE_W = WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W) 19 | ) ( 20 | input clk_i, 21 | input reset_i, 22 | input replace_valid_i, 23 | input [FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)-1:0] replace_addr_i, 24 | output reg replace_o, 25 | output reg read_valid_o, 26 | output reg [ LINE2BE_W-1:0] read_addr_o, 27 | output [ BE_DATA_W-1:0] read_rdata_o, 28 | 29 | // Native memory interface 30 | output [BE_ADDR_W-1:0] be_addr_o, 31 | output reg be_valid_o, 32 | input be_ack_i, 33 | input [BE_DATA_W-1:0] be_rdata_i 34 | ); 35 | 36 | generate 37 | if (LINE2BE_W > 0) begin : g_line2be_w 38 | reg [LINE2BE_W-1:0] word_counter; 39 | 40 | assign be_addr_o = {BE_ADDR_W{1'b0}} + {replace_addr_i, word_counter, {BE_NBYTES_W{1'b0}}}; 41 | assign read_rdata_o = be_rdata_i; 42 | 43 | localparam 44 | idle = 2'd0, 45 | handshake = 2'd1, // the process was divided in 2 handshake steps to cause a delay in the 46 | end_handshake = 2'd2; // (always 1 or a delayed valid signal), otherwise it will fail 47 | 48 | always @(posedge clk_i) read_addr_o <= word_counter; 49 | 50 | reg [1:0] state; 51 | 52 | always @(posedge clk_i, posedge reset_i) begin 53 | if (reset_i) begin 54 | state <= idle; 55 | end else begin 56 | case (state) 57 | idle: begin 58 | if (replace_valid_i) // main_process flag 59 | state <= handshake; 60 | else state <= idle; 61 | end 62 | handshake: begin 63 | if (be_ack_i) 64 | if (read_addr_o == {LINE2BE_W{1'b1}}) begin 65 | state <= end_handshake; 66 | end else begin 67 | state <= handshake; 68 | end 69 | else begin 70 | state <= handshake; 71 | end 72 | end 73 | end_handshake: begin // read-latency delay (last line word) 74 | state <= idle; 75 | end 76 | default: ; 77 | endcase 78 | end 79 | end 80 | 81 | always @* begin 82 | be_valid_o = 1'b0; 83 | replace_o = 1'b1; 84 | word_counter = 0; 85 | read_valid_o = 1'b0; 86 | 87 | case (state) 88 | idle: begin 89 | replace_o = 1'b0; 90 | end 91 | handshake: begin 92 | be_valid_o = ~be_ack_i | ~(&read_addr_o); 93 | if (be_ack_i) begin 94 | word_counter = read_addr_o + 1; 95 | end else begin 96 | word_counter = read_addr_o; 97 | end 98 | read_valid_o = be_ack_i; 99 | end 100 | default: ; 101 | endcase 102 | end 103 | end else begin : g_no_line2be_w 104 | assign be_addr_o = {BE_ADDR_W{1'b0}} + {replace_addr_i, {BE_NBYTES_W{1'b0}}}; 105 | assign read_rdata_o = be_rdata_i; 106 | 107 | localparam 108 | idle = 2'd0, 109 | handshake = 2'd1, // the process was divided in 2 handshake steps to cause a delay in the 110 | end_handshake = 2'd2; // (always 1 or a delayed valid signal), otherwise it will fail 111 | 112 | reg [1:0] state; 113 | 114 | always @(posedge clk_i, posedge reset_i) begin 115 | if (reset_i) state <= idle; 116 | else begin 117 | case (state) 118 | idle: begin 119 | if (replace_valid_i) state <= handshake; 120 | else state <= idle; 121 | end 122 | handshake: begin 123 | if (be_ack_i) state <= end_handshake; 124 | else state <= handshake; 125 | end 126 | end_handshake: begin // read-latency delay (last line word) 127 | state <= idle; 128 | end 129 | default: ; 130 | endcase 131 | end 132 | end 133 | 134 | always @* begin 135 | be_valid_o = 1'b0; 136 | replace_o = 1'b1; 137 | read_valid_o = 1'b0; 138 | 139 | case (state) 140 | idle: begin 141 | replace_o = 1'b0; 142 | end 143 | handshake: begin 144 | be_valid_o = ~be_ack_i; 145 | read_valid_o = be_ack_i; 146 | end 147 | default: ; 148 | endcase 149 | end 150 | end 151 | endgenerate 152 | 153 | endmodule 154 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_iob/iob_cache_back_end_iob.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def setup(py_params: dict): 7 | # Create dictionary with attributes of cache 8 | attributes_dict = { 9 | "generate_hw": False, 10 | } 11 | # 12 | # Confs 13 | # 14 | attributes_dict["confs"] = [ 15 | { 16 | "name": "FE_ADDR_W", 17 | "descr": "Front-end address width (log2): defines the total memory space accessible via the cache, which must be a power of two.", 18 | "type": "P", 19 | "val": "24", 20 | "min": "1", 21 | "max": "64", 22 | }, 23 | { 24 | "name": "FE_DATA_W", 25 | "descr": "Front-end data width (log2): this parameter allows supporting processing elements with various data widths.", 26 | "type": "P", 27 | "val": "32", 28 | "min": "32", 29 | "max": "64", 30 | }, 31 | { 32 | "name": "BE_ADDR_W", 33 | "descr": "Back-end address width (log2): the value of this parameter must be equal or greater than FE_ADDR_W to match the width of the back-end interface, but the address space is still dictated by ADDR_W.", 34 | "type": "P", 35 | "val": "24", 36 | "min": "1", 37 | "max": "", 38 | }, 39 | { 40 | "name": "BE_DATA_W", 41 | "descr": "Back-end data width (log2): the value of this parameter must be an integer multiple $k \\geq 1$ of DATA_W. If $k>1$, the memory controller can operate at a frequency higher than the cache's frequency. Typically, the memory controller has an asynchronous FIFO interface, so that it can sequentially process multiple commands received in paralell from the cache's back-end interface. ", 42 | "type": "P", 43 | "val": "32", 44 | "min": "32", 45 | "max": "256", 46 | }, 47 | { 48 | "name": "WORD_OFFSET_W", 49 | "descr": "Word offset width (log2): the value of this parameter equals the number of words per line, which is 2**OFFSET_W. ", 50 | "type": "P", 51 | "val": "3", 52 | "min": "1", 53 | "max": "", 54 | }, 55 | { 56 | "name": "WRITE_POL", 57 | "descr": "Write policy: set to 0 for write-through or set to 1 for write-back.", 58 | "type": "P", 59 | "val": "0 ", 60 | "min": "0", 61 | "max": "1", 62 | }, 63 | # Derived parameters 64 | { 65 | "name": "FE_NBYTES", 66 | "type": "D", 67 | "val": "FE_DATA_W / 8", 68 | "min": "0", 69 | "max": "32", 70 | }, 71 | { 72 | "name": "FE_NBYTES_W", 73 | "type": "D", 74 | "val": "$clog2(FE_NBYTES)", 75 | "min": "0", 76 | "max": "32", 77 | }, 78 | { 79 | "name": "BE_NBYTES", 80 | "type": "D", 81 | "val": "BE_DATA_W / 8", 82 | "min": "0", 83 | "max": "32", 84 | }, 85 | { 86 | "name": "BE_NBYTES_W", 87 | "type": "D", 88 | "val": "$clog2(BE_NBYTES)", 89 | "min": "0", 90 | "max": "32", 91 | }, 92 | { 93 | "name": "LINE2BE_W", 94 | "type": "D", 95 | "val": "WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W)", 96 | "min": "0", 97 | "max": "32", 98 | }, 99 | ] 100 | # 101 | # Ports 102 | # 103 | attributes_dict["ports"] = [ 104 | { 105 | "name": "clk_en_rst_s", 106 | "descr": "Clock, clock enable and reset", 107 | "signals": { 108 | "type": "iob_clk", 109 | }, 110 | }, 111 | { 112 | "name": "write_io", 113 | "descr": "Back-end write channel", 114 | "signals": [ 115 | {"name": "write_valid_i", "width": 1}, 116 | { 117 | "name": "write_addr_i", 118 | "width": "FE_ADDR_W - (FE_NBYTES_W + WRITE_POL*WORD_OFFSET_W)", 119 | }, 120 | { 121 | "name": "write_wdata_i", 122 | "width": "FE_DATA_W + WRITE_POL*(FE_DATA_W*(2**WORD_OFFSET_W)-FE_DATA_W)", 123 | }, 124 | {"name": "write_wstrb_i", "width": "FE_NBYTES"}, 125 | {"name": "write_ready_o", "width": 1}, 126 | ], 127 | }, 128 | { 129 | "name": "read_io", 130 | "descr": "Back-end read channel", 131 | "signals": [ 132 | {"name": "replace_valid_i", "width": 1}, 133 | {"name": "replace_o", "width": 1}, 134 | { 135 | "name": "replace_addr_i", 136 | "width": "FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)", 137 | }, 138 | {"name": "read_valid_o", "width": 1}, 139 | {"name": "read_addr_o", "width": "LINE2BE_W"}, 140 | {"name": "read_rdata_o", "width": "BE_DATA_W"}, 141 | ], 142 | }, 143 | { 144 | "name": "iob_m", 145 | "descr": "Back-end interface", 146 | "signals": { 147 | "type": "iob", 148 | "ADDR_W": "BE_ADDR_W", 149 | "DATA_W": "BE_DATA_W", 150 | }, 151 | }, 152 | ] 153 | # 154 | # Wires 155 | # 156 | attributes_dict["wires"] = [] 157 | # 158 | # Subblocks 159 | # 160 | attributes_dict["subblocks"] = [ 161 | { 162 | "core_name": "iob_reg", 163 | "instance_name": "iob_reg_care_inst", 164 | "port_params": { 165 | "clk_en_rst_s": "c_a_r_e", 166 | }, 167 | }, 168 | ] 169 | # 170 | # Snippets 171 | # 172 | attributes_dict["snippets"] = [] 173 | 174 | return attributes_dict 175 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_front_end/iob_cache_front_end.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def setup(py_params: dict): 7 | # Create dictionary with attributes of cache 8 | attributes_dict = { 9 | "generate_hw": True, 10 | } 11 | # 12 | # Confs 13 | # 14 | attributes_dict["confs"] = [ 15 | # Currently, Py2hwsw does not have a way of adding verilog `include` directives. So we need to repeat this CSRs ADDR_W macro manually here 16 | { 17 | "name": "ADDR_W_CSRS", 18 | "descr": "Address width of CSRs", 19 | "type": "M", 20 | "val": "5", 21 | "min": "?", 22 | "max": "?", 23 | }, 24 | { 25 | "name": "ADDR_W", 26 | "descr": "Cache address width used by csrs_gen", 27 | "type": "P", 28 | "val": "`IOB_CACHE_FRONT_END_ADDR_W_CSRS", 29 | "min": "NA", 30 | "max": "NA", 31 | }, 32 | { 33 | "name": "DATA_W", 34 | "descr": "Cache data width used by csrs_gen", 35 | "type": "P", 36 | "val": "32", 37 | "min": "NA", 38 | "max": "NA", 39 | }, 40 | { 41 | "name": "FE_NBYTES_W", 42 | "type": "D", 43 | "descr": "Front end data bytes width. Sets the number of bits ignored for data addressing.", 44 | "val": "$clog2(DATA_W/8)", 45 | "min": "NA", 46 | "max": "NA", 47 | }, 48 | { 49 | "name": "USE_CTRL", 50 | "descr": "Instantiates a cache controller (1) or not (0). The cache controller provides memory-mapped software accessible registers to invalidate the cache data contents, and monitor the write through buffer status using the front-end interface. To access the cache controller, the MSB of the address mut be set to 1. For more information refer to the example software functions provided.", 51 | "type": "P", 52 | "val": "0", 53 | "min": "0", 54 | "max": "1", 55 | }, 56 | ] 57 | # 58 | # Ports 59 | # 60 | attributes_dict["ports"] = [ 61 | { 62 | "name": "clk_en_rst_s", 63 | "descr": "Clock, clock enable and reset", 64 | "signals": { 65 | "type": "iob_clk", 66 | }, 67 | }, 68 | { 69 | "name": "iob_s", 70 | "descr": "Front-end interface", 71 | "signals": { 72 | "type": "iob", 73 | "ADDR_W": "ADDR_W", 74 | "DATA_W": "DATA_W", 75 | }, 76 | }, 77 | { 78 | "name": "cache_mem_io", 79 | "descr": "Cache memory front-end interface", 80 | "signals": [ 81 | {"name": "data_req_o", "width": 1}, 82 | {"name": "data_addr_o", "width": "ADDR_W-USE_CTRL-FE_NBYTES_W"}, 83 | {"name": "data_rdata_i", "width": "DATA_W"}, 84 | {"name": "data_ack_i", "width": 1}, 85 | {"name": "data_req_reg_o", "width": 1}, 86 | {"name": "data_addr_reg_o", "width": "ADDR_W-USE_CTRL-FE_NBYTES_W"}, 87 | {"name": "data_wdata_reg_o", "width": "DATA_W"}, 88 | {"name": "data_wstrb_reg_o", "width": "DATA_W/8"}, 89 | ], 90 | }, 91 | { 92 | "name": "ctrl_io", 93 | "descr": "Control interface.", 94 | "signals": [ 95 | {"name": "ctrl_req_o", "width": 1}, 96 | {"name": "ctrl_addr_o", "width": "`IOB_CACHE_FRONT_END_ADDR_W_CSRS"}, 97 | {"name": "ctrl_wstrb_o", "width": "DATA_W/8"}, 98 | {"name": "ctrl_rdata_i", "width": "USE_CTRL*(DATA_W-1)+1"}, 99 | {"name": "ctrl_ack_i", "width": 1}, 100 | ], 101 | }, 102 | ] 103 | # 104 | # Wires 105 | # 106 | attributes_dict["wires"] = [ 107 | { 108 | "name": "internal_wires", 109 | "descr": "Internal wires", 110 | "signals": [ 111 | {"name": "ack", "width": 1}, 112 | {"name": "valid_int", "width": 1}, 113 | {"name": "ready_int", "width": 1}, 114 | {"name": "we_r", "width": 1}, 115 | {"name": "data_ready_int", "width": 1, "isvar": True}, 116 | ], 117 | }, 118 | ] 119 | # 120 | # Combinatorial 121 | # 122 | attributes_dict["comb"] = { 123 | "code": """ 124 | // data output ports 125 | data_addr_o = valid_int ? iob_addr_i[ADDR_W-USE_CTRL-1:FE_NBYTES_W] : data_addr_reg_o; 126 | data_req_o = valid_int | data_req_reg_o; 127 | 128 | iob_rvalid_o = we_r ? 1'b0 : ack; 129 | iob_ready_o = ready_int; 130 | 131 | data_ready_int = data_req_reg_o ~^ data_ack_i; 132 | 133 | // Register every input 134 | data_req_reg_o_nxt = valid_int; 135 | data_req_reg_o_en = valid_int | ack; 136 | 137 | data_addr_reg_o_nxt = iob_addr_i[ADDR_W-USE_CTRL-1:FE_NBYTES_W]; 138 | data_addr_reg_o_en = valid_int; 139 | 140 | data_wdata_reg_o_nxt = iob_wdata_i; 141 | data_wdata_reg_o_en = valid_int; 142 | 143 | data_wstrb_reg_o_nxt = iob_wstrb_i; 144 | data_wstrb_reg_o_en = valid_int; 145 | 146 | we_r_nxt = |iob_wstrb_i; 147 | we_r_en = iob_valid_i; 148 | """ 149 | } 150 | # 151 | # Snippets 152 | # 153 | attributes_dict["snippets"] = [ 154 | { 155 | "verilog_code": """ 156 | // select cache memory or controller 157 | generate 158 | if (USE_CTRL) begin : g_ctrl 159 | // Front-end output signals 160 | assign ack = ctrl_ack_i | data_ack_i; 161 | assign iob_rdata_o = (ctrl_ack_i) ? ctrl_rdata_i : data_rdata_i; 162 | 163 | assign valid_int = ~iob_addr_i[ADDR_W-1] & iob_valid_i; 164 | 165 | assign ctrl_req_o = iob_addr_i[ADDR_W-1] & iob_valid_i; 166 | assign ctrl_addr_o = iob_addr_i[`IOB_CACHE_FRONT_END_ADDR_W_CSRS-1:0]; 167 | assign ctrl_wstrb_o = (ctrl_req_o) ? iob_wstrb_i : {(DATA_W/8){1'b0}}; 168 | 169 | wire ctrl_ready_int; 170 | assign ctrl_ready_int = ctrl_req_o ~^ ctrl_ack_i; 171 | assign ready_int = ctrl_req_o ? ctrl_ready_int : data_ready_int; 172 | 173 | end else begin : g_no_ctrl 174 | // Front-end output signals 175 | assign ack = data_ack_i; 176 | assign iob_rdata_o = data_rdata_i; 177 | assign valid_int = iob_valid_i; 178 | assign ctrl_req_o = 1'b0; 179 | assign ctrl_addr_o = `IOB_CACHE_FRONT_END_ADDR_W_CSRS'dx; 180 | assign ctrl_wstrb_o = {(DATA_W/8){1'b0}}; 181 | 182 | assign ready_int = data_ready_int; 183 | end 184 | endgenerate 185 | """, 186 | }, 187 | ] 188 | 189 | return attributes_dict 190 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_axi/iob_cache_back_end_axi.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def setup(py_params: dict): 7 | # Create dictionary with attributes of cache 8 | attributes_dict = { 9 | "generate_hw": False, 10 | } 11 | # 12 | # Confs 13 | # 14 | attributes_dict["confs"] = [ 15 | { 16 | "name": "FE_ADDR_W", 17 | "descr": "Front-end address width (log2): defines the total memory space accessible via the cache, which must be a power of two.", 18 | "type": "P", 19 | "val": "24", 20 | "min": "1", 21 | "max": "64", 22 | }, 23 | { 24 | "name": "FE_DATA_W", 25 | "descr": "Front-end data width (log2): this parameter allows supporting processing elements with various data widths.", 26 | "type": "P", 27 | "val": "32", 28 | "min": "32", 29 | "max": "64", 30 | }, 31 | { 32 | "name": "BE_ADDR_W", 33 | "descr": "Back-end address width (log2): the value of this parameter must be equal or greater than FE_ADDR_W to match the width of the back-end interface, but the address space is still dictated by ADDR_W.", 34 | "type": "P", 35 | "val": "24", 36 | "min": "1", 37 | "max": "", 38 | }, 39 | { 40 | "name": "BE_DATA_W", 41 | "descr": "Back-end data width (log2): the value of this parameter must be an integer multiple $k \\geq 1$ of DATA_W. If $k>1$, the memory controller can operate at a frequency higher than the cache's frequency. Typically, the memory controller has an asynchronous FIFO interface, so that it can sequentially process multiple commands received in paralell from the cache's back-end interface. ", 42 | "type": "P", 43 | "val": "32", 44 | "min": "32", 45 | "max": "256", 46 | }, 47 | { 48 | "name": "WORD_OFFSET_W", 49 | "descr": "Word offset width (log2): the value of this parameter equals the number of words per line, which is 2**OFFSET_W. ", 50 | "type": "P", 51 | "val": "3", 52 | "min": "1", 53 | "max": "", 54 | }, 55 | { 56 | "name": "WRITE_POL", 57 | "descr": "Write policy: set to 0 for write-through or set to 1 for write-back.", 58 | "type": "P", 59 | "val": "0 ", 60 | "min": "0", 61 | "max": "1", 62 | }, 63 | { 64 | "name": "AXI_ID_W", 65 | "descr": "AXI ID width", 66 | "type": "P", 67 | "val": "1", 68 | "min": "0", 69 | "max": "32", 70 | }, 71 | { 72 | "name": "AXI_ID", 73 | "descr": "AXI ID", 74 | "type": "P", 75 | "val": "0", 76 | "min": "0", 77 | "max": "32", 78 | }, 79 | { 80 | "name": "AXI_LEN_W", 81 | "descr": "AXI length", 82 | "type": "P", 83 | "val": "4", 84 | "min": "0", 85 | "max": "32", 86 | }, 87 | { 88 | "name": "AXI_ADDR_W", 89 | "descr": "AXI address width", 90 | "type": "P", 91 | "val": "BE_ADDR_W", 92 | "min": "0", 93 | "max": "32", 94 | }, 95 | { 96 | "name": "AXI_DATA_W", 97 | "descr": "AXI data width", 98 | "type": "P", 99 | "val": "BE_DATA_W", 100 | "min": "0", 101 | "max": "32", 102 | }, 103 | # Derived parameters 104 | { 105 | "name": "FE_NBYTES", 106 | "type": "D", 107 | "val": "FE_DATA_W / 8", 108 | "min": "0", 109 | "max": "32", 110 | }, 111 | { 112 | "name": "FE_NBYTES_W", 113 | "type": "D", 114 | "val": "$clog2(FE_NBYTES)", 115 | "min": "0", 116 | "max": "32", 117 | }, 118 | { 119 | "name": "BE_NBYTES", 120 | "type": "D", 121 | "val": "BE_DATA_W / 8", 122 | "min": "0", 123 | "max": "32", 124 | }, 125 | { 126 | "name": "BE_NBYTES_W", 127 | "type": "D", 128 | "val": "$clog2(BE_NBYTES)", 129 | "min": "0", 130 | "max": "32", 131 | }, 132 | { 133 | "name": "LINE2BE_W", 134 | "type": "D", 135 | "val": "WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W)", 136 | "min": "0", 137 | "max": "32", 138 | }, 139 | ] 140 | # 141 | # Ports 142 | # 143 | attributes_dict["ports"] = [ 144 | { 145 | "name": "clk_rst_s", 146 | "descr": "Clock and reset", 147 | "signals": { 148 | "type": "iob_clk", 149 | "params": "a", 150 | }, 151 | }, 152 | { 153 | "name": "write_io", 154 | "descr": "Back-end write channel", 155 | "signals": [ 156 | {"name": "write_valid_i", "width": 1}, 157 | { 158 | "name": "write_addr_i", 159 | "width": "FE_ADDR_W - (FE_NBYTES_W + WRITE_POL*WORD_OFFSET_W)", 160 | }, 161 | { 162 | "name": "write_wdata_i", 163 | "width": "FE_DATA_W + WRITE_POL*(FE_DATA_W*(2**WORD_OFFSET_W)-FE_DATA_W)", 164 | }, 165 | {"name": "write_wstrb_i", "width": "FE_NBYTES"}, 166 | {"name": "write_ready_o", "width": 1}, 167 | ], 168 | }, 169 | { 170 | "name": "read_io", 171 | "descr": "Back-end read channel", 172 | "signals": [ 173 | {"name": "replace_valid_i", "width": 1}, 174 | {"name": "replace_o", "width": 1}, 175 | { 176 | "name": "replace_addr_i", 177 | "width": "FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)", 178 | }, 179 | {"name": "read_valid_o", "width": 1}, 180 | {"name": "read_addr_o", "width": "LINE2BE_W"}, 181 | {"name": "read_rdata_o", "width": "AXI_DATA_W"}, 182 | ], 183 | }, 184 | { 185 | "name": "axi_m", 186 | "descr": "Back-end interface", 187 | "signals": { 188 | "type": "axi", 189 | "ID_W": "AXI_ID_W", 190 | "ADDR_W": "AXI_ADDR_W", 191 | "DATA_W": "AXI_DATA_W", 192 | "LEN_W": "AXI_LEN_W", 193 | "LOCK_W": 1, 194 | }, 195 | }, 196 | ] 197 | # 198 | # Wires 199 | # 200 | attributes_dict["wires"] = [] 201 | # 202 | # Subblocks 203 | # 204 | attributes_dict["subblocks"] = [] 205 | # 206 | # Snippets 207 | # 208 | attributes_dict["snippets"] = [] 209 | 210 | return attributes_dict 211 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_iob/hardware/src/iob_cache_write_channel_iob.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_iob_conf.vh" 8 | 9 | module iob_cache_write_channel_iob #( 10 | parameter ADDR_W = 1, 11 | parameter DATA_W = 32, 12 | parameter FE_DATA_W = `IOB_CACHE_IOB_FE_DATA_W, 13 | parameter BE_ADDR_W = `IOB_CACHE_IOB_BE_ADDR_W, 14 | parameter BE_DATA_W = `IOB_CACHE_IOB_BE_DATA_W, 15 | parameter WRITE_POL = `IOB_CACHE_IOB_WRITE_THROUGH, 16 | parameter WORD_OFFSET_W = `IOB_CACHE_IOB_WORD_OFFSET_W, 17 | //derived parameters 18 | parameter FE_NBYTES = FE_DATA_W / 8, 19 | parameter FE_NBYTES_W = $clog2(FE_NBYTES), 20 | parameter BE_NBYTES = BE_DATA_W / 8, 21 | parameter BE_NBYTES_W = $clog2(BE_NBYTES), 22 | parameter LINE2BE_W = WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W) 23 | ) ( 24 | input clk_i, 25 | input reset_i, 26 | 27 | input valid_i, 28 | input [ADDR_W-1 : FE_NBYTES_W + WRITE_POL*WORD_OFFSET_W] addr_i, 29 | input [FE_NBYTES-1:0] wstrb_i, 30 | input [DATA_W + WRITE_POL*(DATA_W*(2**WORD_OFFSET_W)-DATA_W)-1:0] wdata_i, // try [DATA_W*((2**WORD_OFFSET_W)**WRITE_POL)-1:0] (f(x)=a*b^x) 31 | output reg ready_o, 32 | 33 | // Native Memory interface 34 | output [BE_ADDR_W -1:0] be_addr_o, 35 | output reg be_valid_o, 36 | input be_ack_i, 37 | output [ BE_DATA_W-1:0] be_wdata_o, 38 | output reg [ BE_NBYTES-1:0] be_wstrb_o 39 | ); 40 | 41 | genvar i; 42 | 43 | generate 44 | if (WRITE_POL == `IOB_CACHE_IOB_WRITE_THROUGH) begin : g_write_through 45 | assign be_addr_o = {BE_ADDR_W{1'b0}} + {addr_i[ADDR_W-1 : BE_NBYTES_W], {BE_NBYTES_W{1'b0}}}; 46 | 47 | localparam idle = 1'd0, write = 1'd1; 48 | 49 | reg [0:0] state; 50 | if (BE_DATA_W == DATA_W) begin : g_same_data_w 51 | assign be_wdata_o = wdata_i; 52 | 53 | always @* begin 54 | be_wstrb_o = 0; 55 | 56 | case (state) 57 | write: be_wstrb_o = wstrb_i; 58 | default: ; 59 | endcase 60 | end 61 | end else begin : g_not_same_data_w 62 | wire [BE_NBYTES_W-FE_NBYTES_W -1 :0] word_align = addr_i[FE_NBYTES_W +: (BE_NBYTES_W - FE_NBYTES_W)]; 63 | 64 | for (i = 0; i < BE_DATA_W / DATA_W; i = i + 1) begin : g_wdata_block 65 | assign be_wdata_o[(i+1)*DATA_W-1:i*DATA_W] = wdata_i; 66 | end 67 | 68 | always @* begin 69 | be_wstrb_o = 0; 70 | 71 | case (state) 72 | write: be_wstrb_o = wstrb_i << word_align * FE_NBYTES; 73 | default: ; 74 | endcase 75 | end 76 | end 77 | 78 | always @(posedge clk_i, posedge reset_i) begin 79 | if (reset_i) state <= idle; 80 | else 81 | case (state) 82 | idle: begin 83 | if (valid_i) state <= write; 84 | else state <= idle; 85 | end 86 | default: begin // write 87 | if (be_ack_i & ~valid_i) state <= idle; 88 | else if (be_ack_i & valid_i) // still has data to write 89 | state <= write; 90 | else state <= write; 91 | end 92 | endcase 93 | end 94 | 95 | always @* begin 96 | ready_o = 1'b0; 97 | be_valid_o = 1'b0; 98 | 99 | case (state) 100 | idle: ready_o = 1'b1; 101 | default: begin // write 102 | be_valid_o = ~be_ack_i; 103 | ready_o = be_ack_i; 104 | end 105 | endcase 106 | end 107 | end else begin : g_write_back 108 | // if (WRITE_POL == WRITE_BACK) 109 | if (LINE2BE_W > 0) begin : g_line2be_w 110 | reg [LINE2BE_W-1:0] word_counter, word_counter_reg; 111 | always @(posedge clk_i) word_counter_reg <= word_counter; 112 | 113 | // memory address 114 | assign be_addr_o = {BE_ADDR_W{1'b0}} + {addr_i[ADDR_W-1: BE_NBYTES_W + LINE2BE_W], word_counter, {BE_NBYTES_W{1'b0}}}; 115 | 116 | // memory write-data 117 | assign be_wdata_o = wdata_i >> (BE_DATA_W * word_counter); 118 | 119 | localparam idle = 1'd0, write = 1'd1; 120 | 121 | reg [0:0] state; 122 | 123 | always @(posedge clk_i, posedge reset_i) begin 124 | if (reset_i) state <= idle; 125 | else 126 | case (state) 127 | idle: begin 128 | if (valid_i) state <= write; 129 | else state <= idle; 130 | end 131 | default: begin // write 132 | if (be_ack_i & (&word_counter_reg)) state <= idle; 133 | else state <= write; 134 | end 135 | endcase 136 | end 137 | 138 | always @* begin 139 | ready_o = 1'b0; 140 | be_valid_o = 1'b0; 141 | be_wstrb_o = 0; 142 | word_counter = 0; 143 | 144 | case (state) 145 | idle: begin 146 | ready_o = ~valid_i; 147 | if (valid_i) be_wstrb_o = {BE_NBYTES{1'b1}}; 148 | else be_wstrb_o = 0; 149 | end 150 | default: begin // write 151 | ready_o = be_ack_i & (&word_counter); // last word transfered 152 | be_valid_o = ~(be_ack_i & (&word_counter)); 153 | be_wstrb_o = {BE_NBYTES{1'b1}}; 154 | word_counter = word_counter_reg + be_ack_i; 155 | end 156 | endcase 157 | end 158 | end else begin : g_no_line2be_w 159 | // memory address 160 | assign be_addr_o = {BE_ADDR_W{1'b0}} + {addr_i[ADDR_W-1:BE_NBYTES_W], {BE_NBYTES_W{1'b0}}}; 161 | 162 | // memory write-data 163 | assign be_wdata_o = wdata_i; 164 | 165 | localparam idle = 1'd0, write = 1'd1; 166 | 167 | reg [0:0] state; 168 | 169 | always @(posedge clk_i, posedge reset_i) begin 170 | if (reset_i) state <= idle; 171 | else 172 | case (state) 173 | idle: begin 174 | if (valid_i) state <= write; 175 | else state <= idle; 176 | end 177 | default: begin // write 178 | if (be_ack_i) state <= idle; 179 | else state <= write; 180 | end 181 | endcase 182 | end 183 | 184 | always @* begin 185 | ready_o = 1'b0; 186 | be_valid_o = 1'b0; 187 | be_wstrb_o = 0; 188 | 189 | case (state) 190 | idle: begin 191 | ready_o = ~valid_i; 192 | if (valid_i) be_wstrb_o = {BE_NBYTES{1'b1}}; 193 | else be_wstrb_o = 0; 194 | end 195 | default: begin // write 196 | ready_o = be_ack_i; 197 | be_valid_o = ~be_ack_i; 198 | be_wstrb_o = {BE_NBYTES{1'b1}}; 199 | end 200 | endcase 201 | end 202 | end 203 | end 204 | endgenerate 205 | 206 | endmodule 207 | -------------------------------------------------------------------------------- /hardware/simulation/iob_cache_sim_wrapper/iob_cache_sim_wrapper.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def setup(py_params_dict): 7 | params = { 8 | # Confs passed by issuer (iob_cache) 9 | "cache_confs": [], 10 | "be_if": "axi", 11 | } 12 | 13 | # Update params with values from py_params_dict 14 | for param in py_params_dict: 15 | if param in params: 16 | params[param] = py_params_dict[param] 17 | 18 | assert params["be_if"] in ["axi", "iob"], "Invalid BE_IF" 19 | 20 | attributes_dict = { 21 | "name": "iob_uut", 22 | "generate_hw": True, 23 | "confs": params["cache_confs"], 24 | } 25 | # 26 | # Ports 27 | # 28 | attributes_dict["ports"] = [ 29 | { 30 | "name": "clk_en_rst_s", 31 | "descr": "Clock, clock enable and reset", 32 | "signals": { 33 | "type": "iob_clk", 34 | }, 35 | }, 36 | { 37 | "name": "cache_s", 38 | "descr": "Testbench cache csrs interface", 39 | "signals": { 40 | "type": "iob", 41 | "ADDR_W": "ADDR_W", 42 | "DATA_W": "DATA_W", 43 | }, 44 | }, 45 | ] 46 | # 47 | # Confs 48 | # 49 | # Overwrite Cache Confs 50 | for conf in attributes_dict["confs"]: 51 | if conf["name"] == "USE_CTRL": 52 | conf["val"] = "1" 53 | elif conf["name"] == "USE_CTRL_CNT": 54 | conf["val"] = "1" 55 | # 56 | # Wires 57 | # 58 | attributes_dict["wires"] = [ 59 | { 60 | "name": "clk", 61 | "descr": "Clock signal", 62 | "signals": [ 63 | {"name": "clk_i"}, 64 | ], 65 | }, 66 | { 67 | "name": "rst", 68 | "descr": "Reset signal", 69 | "signals": [ 70 | {"name": "arst_i"}, 71 | ], 72 | }, 73 | { 74 | "name": "ie", 75 | "descr": "Internal signals for cache invalidate and write-trough buffer IO chain", 76 | "signals": [ 77 | {"name": "invalidate_i_int", "width": 1}, 78 | {"name": "invalidate_o_int", "width": 1}, 79 | {"name": "wtb_empty_i_int", "width": 1}, 80 | {"name": "wtb_empty_o_int", "width": 1}, 81 | ], 82 | }, 83 | ] 84 | if params["be_if"] == "axi": 85 | attributes_dict["wires"] += [ 86 | { 87 | "name": "axi", 88 | "descr": "AXI bus to connect Cache back end to memory", 89 | "signals": { 90 | "type": "axi", 91 | "prefix": "be_", 92 | "ID_W": "AXI_ID_W", 93 | "ADDR_W": "AXI_ADDR_W", 94 | "DATA_W": "AXI_DATA_W", 95 | "LEN_W": "AXI_LEN_W", 96 | "LOCK_W": 1, 97 | }, 98 | }, 99 | { 100 | "name": "axi_ram_mem", 101 | "descr": "Connect axi_ram to 'iob_ram_t2p_be' memory", 102 | "signals": { 103 | "type": "ram_t2p_be", 104 | "prefix": "ext_mem_", 105 | "ADDR_W": "AXI_ADDR_W - 2", 106 | }, 107 | }, 108 | ] 109 | elif params["be_if"] == "iob": 110 | attributes_dict["wires"] += [ 111 | { 112 | "name": "iob", 113 | "descr": "IOb bus to connect Cache back end to memory", 114 | "signals": { 115 | "type": "iob", 116 | "prefix": "be_", 117 | "ADDR_W": "BE_ADDR_W", 118 | "DATA_W": "BE_DATA_W", 119 | }, 120 | }, 121 | { 122 | "name": "mem_if", 123 | "descr": "Memory interface", 124 | "signals": [ 125 | {"name": "mem_en_i", "width": 1}, 126 | {"name": "mem_we_i", "width": "BE_DATA_W/8"}, 127 | {"name": "mem_addr_i", "width": "BE_ADDR_W"}, 128 | {"name": "mem_d_i", "width": "BE_DATA_W"}, 129 | {"name": "mem_d_o", "width": "BE_DATA_W"}, 130 | ], 131 | }, 132 | { 133 | "name": "iob_reg_rvalid", 134 | "descr": "Register valid signal", 135 | "signals": [ 136 | {"name": "iob_reg_rvalid", "width": 1}, 137 | ], 138 | }, 139 | ] 140 | # 141 | # Blocks 142 | # 143 | attributes_dict["subblocks"] = [ 144 | { 145 | "core_name": "iob_cache", 146 | "instance_name": "cache", 147 | "instance_description": f"Unit Under Test (UUT) Cache instance with '{params['be_if']}' back end interface.", 148 | "parameters": { 149 | "USE_CTRL": "USE_CTRL", 150 | "USE_CTRL_CNT": "USE_CTRL_CNT", 151 | }, 152 | "connect": { 153 | "clk_en_rst_s": "clk_en_rst_s", 154 | "iob_s": "cache_s", 155 | f"{params['be_if']}_m": f"{params['be_if']}", 156 | "ie_io": "ie", 157 | }, 158 | }, 159 | ] 160 | if params["be_if"] == "axi": 161 | attributes_dict["subblocks"] += [ 162 | { 163 | "core_name": "iob_axi_ram", 164 | "instance_name": "ddr_model_mem", 165 | "instance_description": "External memory", 166 | "parameters": { 167 | "ID_WIDTH": "AXI_ID_W", 168 | "ADDR_WIDTH": "AXI_ADDR_W", 169 | "DATA_WIDTH": "AXI_DATA_W", 170 | "LEN_WIDTH": "AXI_LEN_W", 171 | }, 172 | "connect": { 173 | "clk_i": "clk", 174 | "rst_i": "rst", 175 | "axi_s": ( 176 | "axi", 177 | [ 178 | "{1'b0, be_axi_arlock}", 179 | "{1'b0, be_axi_awlock}", 180 | ], 181 | ), 182 | "external_mem_bus_m": "axi_ram_mem", 183 | }, 184 | }, 185 | { 186 | "core_name": "iob_ram_t2p_be", 187 | "instance_name": "iob_ram_t2p_be_inst", 188 | "parameters": { 189 | "ADDR_W": "AXI_ADDR_W - 2", 190 | "DATA_W": "AXI_DATA_W", 191 | }, 192 | "connect": { 193 | "ram_t2p_be_s": "axi_ram_mem", 194 | }, 195 | }, 196 | ] 197 | elif params["be_if"] == "iob": 198 | attributes_dict["subblocks"] += [ 199 | { 200 | "core_name": "iob_ram_sp_be", 201 | "instance_name": "native_ram", 202 | "parameters": { 203 | "ADDR_W": "BE_ADDR_W", 204 | "DATA_W": "BE_DATA_W", 205 | }, 206 | "connect": { 207 | "clk_i": "clk", 208 | "mem_if_io": "mem_if", 209 | }, 210 | }, 211 | ] 212 | # 213 | # Combinatorial 214 | # 215 | attributes_dict["snippets"] = [ 216 | """ 217 | // Set constant inputs and connect outputs 218 | assign invalidate_i_int = 1'b0; 219 | assign wtb_empty_i_int = 1'b1; 220 | """ 221 | ] 222 | if params["be_if"] == "iob": 223 | comb_code = """ 224 | be_iob_ready = 1'b1; 225 | 226 | mem_en_i = be_iob_valid; 227 | mem_we_i = be_iob_wstrb; 228 | mem_addr_i = be_iob_addr; 229 | mem_d_i = be_iob_wdata; 230 | be_iob_rdata = mem_d_o; 231 | 232 | iob_reg_rvalid_nxt = be_iob_valid & (~(|be_iob_wstrb)); 233 | be_iob_rvalid = iob_reg_rvalid; 234 | """ 235 | attributes_dict["comb"] = {"code": comb_code} 236 | 237 | return attributes_dict 238 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_memory/hardware/src/iob_cache_replacement_policy.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_memory_conf.vh" 8 | 9 | module iob_cache_replacement_policy #( 10 | parameter N_WAYS = 8, 11 | parameter NLINES_W = 0, 12 | parameter NWAYS_W = $clog2(N_WAYS), 13 | parameter REP_POLICY = `IOB_CACHE_MEMORY_PLRU_TREE 14 | ) ( 15 | input clk_i, 16 | input cke_i, 17 | input reset_i, 18 | input write_en_i, 19 | input [ N_WAYS-1:0] way_hit_i, 20 | input [NLINES_W-1:0] line_addr_i, 21 | output [ N_WAYS-1:0] way_select_o, 22 | output [ NWAYS_W-1:0] way_select_bin_o 23 | ); 24 | 25 | genvar i, j; 26 | 27 | generate 28 | if (REP_POLICY == `IOB_CACHE_MEMORY_LRU) begin : g_LRU 29 | wire [N_WAYS*NWAYS_W-1:0] mru_out, mru_in; 30 | wire [N_WAYS*NWAYS_W-1:0] mru; // Initial MRU values of the LRU algorithm, also initialized them in case it's the first access or was invalidated 31 | wire [N_WAYS*NWAYS_W-1:0] mru_cnt; // updates the MRU line, the way used will be the highest value, while the others are decremented 32 | wire [NWAYS_W-1:0] way_hit_bin; 33 | reg [NWAYS_W-1:0] mru_index; 34 | 35 | iob_cache_onehot_to_bin #(NWAYS_W) way_hit_binary ( 36 | .onehot_i(way_hit_i[N_WAYS-1:1]), 37 | .bin_o (way_hit_bin) 38 | ); 39 | 40 | always @(*) begin 41 | mru_index = mru_out[(way_hit_bin*NWAYS_W) +: NWAYS_W]; 42 | end 43 | 44 | for (i = 0; i < N_WAYS; i = i + 1) begin : encoder_decoder 45 | // LRU - Encoder 46 | assign mru [i*NWAYS_W +: NWAYS_W] = (|mru_out)? mru_out [i*NWAYS_W +: NWAYS_W] : i; // verifies if the mru line has been initialized (if any bit in mru_output is HIGH), otherwise applies the priority values 47 | assign mru_cnt [i*NWAYS_W +: NWAYS_W] = (way_hit_i[i])? {NWAYS_W{1'b1}} : (mru[i*NWAYS_W +: NWAYS_W] > mru_index) ? mru[i*NWAYS_W +: NWAYS_W] - 1 : mru[i*NWAYS_W +: NWAYS_W]; // the MRU way gets updated to the the highest value; the remaining, if their value was bigger than the MRU index previous value (mru_index), they get decremented 48 | 49 | // LRU - Decoder (checks every index in search for the lowest (0) 50 | assign way_select_o [i] = ~(|mru[i*NWAYS_W+:NWAYS_W]); // selects the way that has the lowest priority (mru = 0) 51 | end 52 | 53 | assign mru_in = (|way_hit_i)? mru_cnt : mru_out; // If an hit occured, then it updates, to avoid updating during a (write) miss (mru_cnt would decrement every way besides the lowest) 54 | 55 | // Most Recently Used (MRU) memory 56 | iob_regarray_sp #( 57 | .ADDR_W(NLINES_W), 58 | .DATA_W(N_WAYS * NWAYS_W) 59 | ) mru_memory // simply uses the same format as valid memory 60 | ( 61 | .clk_i (clk_i), 62 | .cke_i (cke_i), 63 | .arst_i(reset_i), 64 | 65 | .rst_i (1'b0), 66 | .we_i (write_en_i), 67 | .addr_i(line_addr_i), 68 | .d_i (mru_in), 69 | .d_o (mru_out) 70 | ); 71 | 72 | iob_cache_onehot_to_bin #(NWAYS_W) onehot_bin ( 73 | .onehot_i(way_select_o[N_WAYS-1:1]), 74 | .bin_o (way_select_bin_o) 75 | ); 76 | end else if (REP_POLICY == `IOB_CACHE_MEMORY_PLRU_MRU) begin : g_PLRU_MRU 77 | wire [N_WAYS -1:0] mru_in, mru_out; 78 | 79 | // pseudo LRU MRU based Encoder (More Recenty-Used bits): 80 | assign mru_in = (&(mru_out | way_hit_i))? way_hit_i : mru_out | way_hit_i; // When the cache access results in a hi, it will update the MRU signal, if all ways were used, it resets and only updated the Most Recent 81 | 82 | // pseudo LRU MRU based Decoder: 83 | for (i = 1; i < N_WAYS; i = i + 1) begin : g_way_select_block 84 | assign way_select_o[i] = ~mru_out[i] & (&mru_out[i-1:0]); // verifies priority (lower index) 85 | end 86 | assign way_select_o[0] = ~mru_out[0]; 87 | 88 | // Most Recently Used (MRU) memory 89 | iob_regarray_sp #( 90 | .ADDR_W(NLINES_W), 91 | .DATA_W(N_WAYS) 92 | ) mru_memory // simply uses the same format as valid memory 93 | ( 94 | .clk_i (clk_i), 95 | .cke_i (cke_i), 96 | .arst_i(reset_i), 97 | 98 | .rst_i (1'b0), 99 | .we_i (write_en_i), 100 | .addr_i(line_addr_i), 101 | .d_i (mru_in), 102 | .d_o (mru_out) 103 | ); 104 | 105 | iob_cache_onehot_to_bin #(NWAYS_W) onehot_bin ( 106 | .onehot_i(way_select_o[N_WAYS-1:1]), 107 | .bin_o (way_select_bin_o) 108 | ); 109 | end else begin : g_PLRU_TREE 110 | // (REP_POLICY == PLRU_TREE) 111 | /* 112 | i: tree level, start from 1, i <= NWAYS_W 113 | j: tree node id @ i level, start from 0, j < (1<<(i-1)) 114 | (((1<<(i-1))+j)*2)*(1<<(NWAYS_W-i)) ==> start node id of left tree @ the lowest level node pointed to 115 | (((1<<(i-1))+j)*2+1)*(1<<(NWAYS_W-i)) ==> start node id of right tree @ the lowest level node pointed to 116 | 117 | way_hit_i[(((1<<(i-1))+j)*2)*(1<<(NWAYS_W-i))-N_WAYS +: (N_WAYS>>i)] ==> way hit range of left tree 118 | way_hit_i[(((1<<(i-1))+j)*2+1)*(1<<(NWAYS_W-i))-N_WAYS +: (N_WAYS>>i)] ==> way hit range of right tree 119 | 120 | 121 | == tree traverse == 122 | 123 | <--0 1--> traverse direction 124 | [1] node id @ level1 125 | [2] [3] node id @ level2 ==> which to traverse? from node_id[1] 126 | [4] [5] [6] [7] node id @ level3 ==> which to traverse? from node_id[2] 127 | [08] [09] [10] [11] [12] [13] [14] [15] node id @ level4 ==> which to traverse? from node_id[3] 128 | (00) (01) (02) (03) (04) (05) (06) (07) way idx 129 | 130 | node value is 0 -> left tree traverse 131 | node value is 1 -> right tree traverse 132 | 133 | node id mapping to way idx: node_id[NWAYS_W]-N_WAYS 134 | */ 135 | 136 | wire [N_WAYS -1:1] tree_in, tree_out; 137 | wire [NWAYS_W:0] node_id[NWAYS_W:1]; 138 | assign node_id[1] = tree_out[1] ? 3 : 2; // next node id @ level2 to traverse 139 | for (i = 2; i <= NWAYS_W; i = i + 1) begin : g_traverse_tree_level 140 | // next node id @ level3, level4, ..., to traverse 141 | assign node_id[i] = tree_out[node_id[i-1]] ? ((node_id[i-1]<<1)+1) : (node_id[i-1]<<1); 142 | end 143 | 144 | for (i = 1; i <= NWAYS_W; i = i + 1) begin : tree_level 145 | for (j = 0; j < (1 << (i - 1)); j = j + 1) begin : tree_level_node 146 | assign tree_in[(1<<(i-1))+j] = ~(|way_hit_i) ? tree_out[(1<<(i-1))+j] : 147 | (|way_hit_i[((((1<<(i-1))+j)*2)*(1<<(NWAYS_W-i)))-N_WAYS +: (N_WAYS>>i)]) || 148 | (tree_out[(1<<(i-1))+j] && (~(|way_hit_i[((((1<<(i-1))+j)*2+1)*(1<<(NWAYS_W-i)))-N_WAYS +: (N_WAYS>>i)]))); 149 | end 150 | end 151 | 152 | assign way_select_bin_o = node_id[NWAYS_W] - N_WAYS; 153 | assign way_select_o = (1 << way_select_bin_o); 154 | 155 | // Most Recently Used (MRU) memory 156 | iob_regarray_sp #( 157 | .ADDR_W(NLINES_W), 158 | .DATA_W(N_WAYS - 1) 159 | ) mru_memory // simply uses the same format as valid memory 160 | ( 161 | .clk_i (clk_i), 162 | .cke_i (cke_i), 163 | .arst_i(reset_i), 164 | 165 | .rst_i (1'b0), 166 | .we_i (write_en_i), 167 | .addr_i(line_addr_i), 168 | .d_i (tree_in), 169 | .d_o (tree_out) 170 | ); 171 | end 172 | endgenerate 173 | 174 | endmodule 175 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_axi/hardware/src/iob_cache_read_channel_axi.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_axi_conf.vh" 8 | 9 | module iob_cache_read_channel_axi #( 10 | parameter ADDR_W = 1, 11 | parameter DATA_W = 32, 12 | parameter BE_ADDR_W = `IOB_CACHE_AXI_BE_ADDR_W, 13 | parameter BE_DATA_W = `IOB_CACHE_AXI_BE_DATA_W, 14 | parameter WORD_OFFSET_W = `IOB_CACHE_AXI_WORD_OFFSET_W, 15 | parameter AXI_ID_W = `IOB_CACHE_AXI_AXI_ID_W, 16 | parameter [AXI_ID_W-1:0] AXI_ID = `IOB_CACHE_AXI_AXI_ID, 17 | parameter AXI_LEN_W = `IOB_CACHE_AXI_AXI_LEN_W, 18 | parameter AXI_ADDR_W = BE_ADDR_W, 19 | parameter AXI_DATA_W = BE_DATA_W, 20 | //derived parameters 21 | parameter BE_NBYTES = BE_DATA_W / 8, 22 | parameter BE_NBYTES_W = $clog2(BE_NBYTES), 23 | parameter LINE2BE_W = WORD_OFFSET_W - $clog2(BE_DATA_W / DATA_W) 24 | ) ( 25 | input replace_valid_i, 26 | input [ADDR_W-(BE_NBYTES_W+LINE2BE_W)-1:0] replace_addr_i, 27 | output reg replace_o, 28 | output read_valid_o, 29 | output reg [ LINE2BE_W-1:0] read_addr_o, 30 | output [ BE_DATA_W-1:0] read_rdata_o, 31 | 32 | output [AXI_ADDR_W-1:0] axi_araddr_o, 33 | output [ 3-1:0] axi_arprot_o, 34 | output axi_arvalid_o, 35 | input axi_arready_i, 36 | input [AXI_DATA_W-1:0] axi_rdata_i, 37 | input [ 2-1:0] axi_rresp_i, 38 | input axi_rvalid_i, 39 | output axi_rready_o, 40 | output [ AXI_ID_W-1:0] axi_arid_o, 41 | output [ AXI_LEN_W-1:0] axi_arlen_o, 42 | output [ 3-1:0] axi_arsize_o, 43 | output [ 2-1:0] axi_arburst_o, 44 | output axi_arlock_o, 45 | output [ 4-1:0] axi_arcache_o, 46 | output [ 4-1:0] axi_arqos_o, 47 | input [ AXI_ID_W-1:0] axi_rid_i, 48 | input axi_rlast_i, 49 | 50 | input clk_i, 51 | input reset_i 52 | ); 53 | 54 | reg axi_arvalid_int; 55 | reg axi_rready_int; 56 | 57 | assign axi_arvalid_o = axi_arvalid_int; 58 | assign axi_rready_o = axi_rready_int; 59 | 60 | 61 | generate 62 | if (LINE2BE_W > 0) begin : g_line2be_w 63 | // Constant AXI signals 64 | assign axi_arid_o = AXI_ID; 65 | assign axi_arlock_o = 1'b0; 66 | assign axi_arcache_o = 4'b0011; 67 | assign axi_arprot_o = 3'd0; 68 | assign axi_arqos_o = 4'd0; 69 | 70 | // Burst parameters 71 | assign axi_arlen_o = 2**LINE2BE_W - 1'b1; // will choose the burst lenght depending on the cache's and slave's data width 72 | assign axi_arsize_o = BE_NBYTES_W[3-1:0]; // each word will be the width of the memory for maximum bandwidth 73 | assign axi_arburst_o = 2'b01; // incremental burst 74 | assign axi_araddr_o = {BE_ADDR_W{1'b0}} + {replace_addr_i, {(LINE2BE_W+BE_NBYTES_W){1'b0}}}; // base address for the burst, with width extension 75 | 76 | // Read Line values 77 | assign read_rdata_o = axi_rdata_i; 78 | assign read_valid_o = axi_rvalid_i; 79 | 80 | localparam idle = 2'd0, init_process = 2'd1, load_process = 2'd2, end_process = 2'd3; 81 | 82 | reg [1:0] state; 83 | reg slave_error; // axi slave_error during reply (axi_rresp[1] == 1) - burst can't be interrupted, so a flag needs to be active 84 | 85 | always @(posedge clk_i, posedge reset_i) begin 86 | if (reset_i) begin 87 | state <= idle; 88 | read_addr_o <= 0; 89 | slave_error <= 0; 90 | end else begin 91 | slave_error <= slave_error; 92 | 93 | case (state) 94 | idle: begin 95 | slave_error <= 0; 96 | read_addr_o <= 0; 97 | if (replace_valid_i) state <= init_process; 98 | else state <= idle; 99 | end 100 | init_process: begin 101 | slave_error <= 0; 102 | read_addr_o <= 0; 103 | if (axi_arready_i) state <= load_process; 104 | else state <= init_process; 105 | end 106 | load_process: begin 107 | if (axi_rvalid_i) 108 | if (axi_rlast_i) begin 109 | state <= end_process; 110 | // to avoid writting last data in first line word 111 | read_addr_o <= read_addr_o; 112 | // slave_error - received at the same time as the valid - needs to wait until the end to start all over - going directly to init_process would cause a stall to this burst 113 | if (axi_rresp_i != 2'b00) slave_error <= 1; 114 | end else begin 115 | read_addr_o <= read_addr_o + 1'b1; 116 | state <= load_process; 117 | // slave_error - received at the same time as the valid - needs to wait until the end to start all over - going directly to init_process would cause a stall to this burst 118 | if (axi_rresp_i != 2'b00) slave_error <= 1; 119 | end 120 | else begin 121 | read_addr_o <= read_addr_o; 122 | state <= load_process; 123 | end 124 | end 125 | // end_process - delay for the read_latency of the memories (if the rdata is the last word) 126 | default: begin 127 | if (slave_error) state <= init_process; 128 | else state <= idle; 129 | end 130 | endcase 131 | end 132 | end 133 | 134 | always @* begin 135 | axi_arvalid_int = 1'b0; 136 | axi_rready_int = 1'b0; 137 | replace_o = 1'b1; 138 | 139 | case (state) 140 | idle: replace_o = 1'b0; 141 | init_process: axi_arvalid_int = 1'b1; 142 | default: axi_rready_int = 1'b1; // load_process 143 | endcase 144 | end 145 | 146 | end else begin : g_no_line2be_w 147 | // Constant AXI signals 148 | assign axi_arid_o = AXI_ID; 149 | assign axi_arlock_o = 1'b0; 150 | assign axi_arcache_o = 4'b0011; 151 | assign axi_arprot_o = 3'd0; 152 | assign axi_arqos_o = 4'd0; 153 | 154 | // Burst parameters - single 155 | assign axi_arlen_o = 8'd0; // A single burst of Memory data width word 156 | assign axi_arsize_o = BE_NBYTES_W[3-1:0]; // each word will be the width of the memory for maximum bandwidth 157 | assign axi_arburst_o = 2'b00; 158 | assign axi_araddr_o = {BE_ADDR_W{1'b0}} + {replace_addr_i, {BE_NBYTES_W{1'b0}}}; // base address for the burst, with width extension 159 | 160 | // Read Line values 161 | assign read_valid_o = axi_rvalid_i; 162 | assign read_rdata_o = axi_rdata_i; 163 | 164 | localparam idle = 2'd0, init_process = 2'd1, load_process = 2'd2, end_process = 2'd3; 165 | 166 | reg [1:0] state; 167 | 168 | always @(posedge clk_i, posedge reset_i) begin 169 | if (reset_i) state <= idle; 170 | else 171 | case (state) 172 | idle: begin 173 | if (replace_valid_i) state <= init_process; 174 | else state <= idle; 175 | end 176 | init_process: begin 177 | if (axi_arready_i) state <= load_process; 178 | else state <= init_process; 179 | end 180 | load_process: begin 181 | if (axi_rvalid_i) 182 | if (axi_rresp_i != 2'b00) // slave_error - received at the same time as valid 183 | state <= init_process; 184 | else state <= end_process; 185 | else state <= load_process; 186 | end 187 | end_process: 188 | state <= idle; // delay for the read_latency of the memories (if the rdata is the last word) 189 | default: ; 190 | endcase 191 | end 192 | 193 | always @* begin 194 | axi_arvalid_int = 1'b0; 195 | axi_rready_int = 1'b0; 196 | replace_o = 1'b1; 197 | 198 | case (state) 199 | idle: begin 200 | replace_o = 1'b0; 201 | end 202 | init_process: begin 203 | axi_arvalid_int = 1'b1; 204 | end 205 | load_process: begin 206 | axi_rready_int = 1'b1; 207 | end 208 | default: ; 209 | endcase 210 | end 211 | end 212 | endgenerate 213 | 214 | endmodule 215 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_memory/iob_cache_memory.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def setup(py_params: dict): 7 | assert py_params.get( 8 | "config_macros", "" 9 | ), "Cache memory needs cache's configuration macros, like LRU, PLRU_MRU, etc." 10 | 11 | # Create dictionary with attributes of cache 12 | attributes_dict = { 13 | "generate_hw": False, 14 | } 15 | # 16 | # Confs 17 | # 18 | attributes_dict["confs"] = py_params["config_macros"] + [ 19 | { 20 | "name": "FE_ADDR_W", 21 | "descr": "Front-end address width (log2): defines the total memory space accessible via the cache, which must be a power of two.", 22 | "type": "P", 23 | "val": "24", 24 | "min": "1", 25 | "max": "64", 26 | }, 27 | { 28 | "name": "FE_DATA_W", 29 | "descr": "Front-end data width (log2): this parameter allows supporting processing elements with various data widths.", 30 | "type": "P", 31 | "val": "32", 32 | "min": "32", 33 | "max": "64", 34 | }, 35 | { 36 | "name": "BE_DATA_W", 37 | "descr": "Back-end data width (log2): the value of this parameter must be an integer multiple $k \\geq 1$ of DATA_W. If $k>1$, the memory controller can operate at a frequency higher than the cache's frequency. Typically, the memory controller has an asynchronous FIFO interface, so that it can sequentially process multiple commands received in paralell from the cache's back-end interface. ", 38 | "type": "P", 39 | "val": "32", 40 | "min": "32", 41 | "max": "256", 42 | }, 43 | { 44 | "name": "NWAYS_W", 45 | "descr": "Number of cache ways (log2): the miminum is 0 for a directly mapped cache; the default is 1 for a two-way cache; the maximum is limited by the desired maximum operating frequency, which degrades with the number of ways. ", 46 | "type": "P", 47 | "val": "1", 48 | "min": "0", 49 | "max": "8", 50 | }, 51 | { 52 | "name": "NLINES_W", 53 | "descr": "Line offset width (log2): the value of this parameter equals the number of cache lines, given by 2**NLINES_W.", 54 | "type": "P", 55 | "val": "7", 56 | "min": "", 57 | "max": "", 58 | }, 59 | { 60 | "name": "WORD_OFFSET_W", 61 | "descr": "Word offset width (log2): the value of this parameter equals the number of words per line, which is 2**OFFSET_W. ", 62 | "type": "P", 63 | "val": "3", 64 | "min": "1", 65 | "max": "", 66 | }, 67 | { 68 | "name": "WTBUF_DEPTH_W", 69 | "descr": "Write-through buffer depth (log2). A shallow buffer will fill up more frequently and cause write stalls; however, on a Read After Write (RAW) event, a shallow buffer will empty faster, decreasing the duration of the read stall. A deep buffer is unlkely to get full and cause write stalls; on the other hand, on a RAW event, it will take a long time to empty and cause long read stalls.", 70 | "type": "P", 71 | "val": "4", 72 | "min": "", 73 | "max": "", 74 | }, 75 | { 76 | "name": "REP_POLICY", 77 | "descr": "Line replacement policy: set to 0 for Least Recently Used (LRU); set to 1 for Pseudo LRU based on Most Recently Used (PLRU_MRU); set to 2 for tree-based Pseudo LRU (PLRU_TREE).", 78 | "type": "P", 79 | "val": "0", 80 | "min": "0", 81 | "max": "3", 82 | }, 83 | { 84 | "name": "WRITE_POL", 85 | "descr": "Write policy: set to 0 for write-through or set to 1 for write-back.", 86 | "type": "P", 87 | "val": "0 ", 88 | "min": "0", 89 | "max": "1", 90 | }, 91 | { 92 | "name": "USE_CTRL", 93 | "descr": "Instantiates a cache controller (1) or not (0). The cache controller provides memory-mapped software accessible registers to invalidate the cache data contents, and monitor the write through buffer status using the front-end interface. To access the cache controller, the MSB of the address mut be set to 1. For more information refer to the example software functions provided.", 94 | "type": "P", 95 | "val": "0", 96 | "min": "0", 97 | "max": "1", 98 | }, 99 | { 100 | "name": "USE_CTRL_CNT", 101 | "descr": "Instantiates hit/miss counters for reads, writes or both (1), or not (0). This parameter is meaningful if the cache controller is present (USE_CTRL: 1), providing additional software accessible functions for these functions.", 102 | "type": "P", 103 | "val": "0", 104 | "min": "0", 105 | "max": "1", 106 | }, 107 | # Derived parameters 108 | { 109 | "name": "FE_NBYTES", 110 | "type": "D", 111 | "val": "FE_DATA_W / 8", 112 | "min": "0", 113 | "max": "32", 114 | }, 115 | { 116 | "name": "FE_NBYTES_W", 117 | "type": "D", 118 | "val": "$clog2(FE_NBYTES)", 119 | "min": "0", 120 | "max": "32", 121 | }, 122 | { 123 | "name": "BE_NBYTES", 124 | "type": "D", 125 | "val": "BE_DATA_W / 8", 126 | "min": "0", 127 | "max": "32", 128 | }, 129 | { 130 | "name": "BE_NBYTES_W", 131 | "type": "D", 132 | "val": "$clog2(BE_NBYTES)", 133 | "min": "0", 134 | "max": "32", 135 | }, 136 | { 137 | "name": "LINE2BE_W", 138 | "type": "D", 139 | "val": "WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W)", 140 | "min": "0", 141 | "max": "32", 142 | }, 143 | { 144 | "name": "ADDR_W", 145 | "type": "D", 146 | "val": "FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)", 147 | "min": "0", 148 | "max": "32", 149 | }, 150 | { 151 | "name": "ADDR_REG_W", 152 | "type": "D", 153 | "val": "FE_ADDR_W-FE_NBYTES_W", 154 | "min": "0", 155 | "max": "32", 156 | }, 157 | ] 158 | # 159 | # Ports 160 | # 161 | attributes_dict["ports"] = [ 162 | { 163 | "name": "clk_en_rst_s", 164 | "descr": "Clock, clock enable and synchronous reset", 165 | "signals": { 166 | "type": "iob_clk", 167 | }, 168 | }, 169 | { 170 | "name": "fe_io", 171 | "descr": "Cache memory front-end interface", 172 | "signals": [ 173 | {"name": "req_i", "width": 1}, 174 | {"name": "addr_i", "width": "ADDR_W"}, 175 | {"name": "rdata_o", "width": "FE_DATA_W"}, 176 | {"name": "ack_o", "width": 1}, 177 | {"name": "req_reg_i", "width": 1}, 178 | {"name": "addr_reg_i", "width": "ADDR_REG_W"}, 179 | {"name": "wdata_reg_i", "width": "FE_DATA_W"}, 180 | {"name": "wstrb_reg_i", "width": "FE_NBYTES"}, 181 | ], 182 | }, 183 | { 184 | "name": "be_write_io", 185 | "descr": "Back-end write channel", 186 | "signals": [ 187 | {"name": "write_req_o", "width": 1}, 188 | { 189 | "name": "write_addr_o", 190 | "width": "FE_ADDR_W - (FE_NBYTES_W + WRITE_POL*WORD_OFFSET_W)", 191 | }, 192 | { 193 | "name": "write_wdata_o", 194 | "width": "FE_DATA_W + WRITE_POL*(FE_DATA_W*(2**WORD_OFFSET_W)-FE_DATA_W)", 195 | }, 196 | {"name": "write_wstrb_o", "width": "FE_NBYTES"}, 197 | {"name": "write_ack_i", "width": 1}, 198 | ], 199 | }, 200 | { 201 | "name": "be_read_io", 202 | "descr": "Back-end read channel", 203 | "signals": [ 204 | {"name": "replace_req_o", "width": 1}, 205 | {"name": "replace_i", "width": 1}, 206 | { 207 | "name": "replace_addr_o", 208 | "width": "FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)", 209 | }, 210 | {"name": "read_req_i", "width": 1}, 211 | {"name": "read_addr_i", "width": "LINE2BE_W"}, 212 | {"name": "read_rdata_i", "width": "BE_DATA_W"}, 213 | ], 214 | }, 215 | { 216 | "name": "ctrl_io", 217 | "descr": "", 218 | "signals": [ 219 | {"name": "invalidate_i", "width": 1}, 220 | {"name": "wtbuf_full_o", "width": 1}, 221 | {"name": "wtbuf_empty_o", "width": 1}, 222 | {"name": "write_hit_o", "width": 1}, 223 | {"name": "write_miss_o", "width": 1}, 224 | {"name": "read_hit_o", "width": 1}, 225 | {"name": "read_miss_o", "width": 1}, 226 | ], 227 | }, 228 | ] 229 | # 230 | # Wires 231 | # 232 | attributes_dict["wires"] = [] 233 | # 234 | # Subblocks 235 | # 236 | attributes_dict["subblocks"] = [ 237 | { 238 | "core_name": "iob_ram_t2p", 239 | "instance_name": "iob_ram_t2p_inst", 240 | }, 241 | { 242 | "core_name": "iob_fifo_sync", 243 | "instance_name": "iob_fifo_sync_inst", 244 | }, 245 | { 246 | "core_name": "iob_ram_sp", 247 | "instance_name": "iob_ram_sp_inst", 248 | }, 249 | # For iob_cache_replacement_policy.v 250 | { 251 | "core_name": "iob_regarray_sp", 252 | "instance_name": "iob_regarray_sp_inst", 253 | }, 254 | ] 255 | # 256 | # Snippets 257 | # 258 | attributes_dict["snippets"] = [] 259 | 260 | return attributes_dict 261 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_back_end_axi/hardware/src/iob_cache_write_channel_axi.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_axi_conf.vh" 8 | 9 | module iob_cache_write_channel_axi #( 10 | parameter ADDR_W = 1, 11 | parameter DATA_W = 32, 12 | parameter FE_DATA_W = `IOB_CACHE_AXI_FE_DATA_W, 13 | parameter BE_ADDR_W = `IOB_CACHE_AXI_BE_ADDR_W, 14 | parameter BE_DATA_W = `IOB_CACHE_AXI_BE_DATA_W, 15 | parameter WRITE_POL = `IOB_CACHE_AXI_WRITE_THROUGH, 16 | parameter WORD_OFFSET_W = `IOB_CACHE_AXI_WORD_OFFSET_W, 17 | parameter AXI_ID_W = `IOB_CACHE_AXI_AXI_ID_W, 18 | parameter [AXI_ID_W-1:0] AXI_ID = `IOB_CACHE_AXI_AXI_ID, 19 | parameter AXI_LEN_W = `IOB_CACHE_AXI_AXI_LEN_W, 20 | parameter AXI_ADDR_W = BE_ADDR_W, 21 | parameter AXI_DATA_W = BE_DATA_W, 22 | //derived parameters 23 | parameter FE_NBYTES = FE_DATA_W / 8, 24 | parameter FE_NBYTES_W = $clog2(FE_NBYTES), 25 | parameter BE_NBYTES = BE_DATA_W / 8, 26 | parameter BE_NBYTES_W = $clog2(BE_NBYTES), 27 | parameter LINE2BE_W = WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W) 28 | ) ( 29 | input valid_i, 30 | input [ ADDR_W-1 : FE_NBYTES_W + WRITE_POL*WORD_OFFSET_W] addr_i, 31 | input [DATA_W + WRITE_POL*(DATA_W*(2**WORD_OFFSET_W)-DATA_W)-1 : 0] wdata_i, 32 | input [ FE_NBYTES-1:0] wstrb_i, 33 | output reg ready_o, 34 | 35 | output [ AXI_ADDR_W-1:0] axi_awaddr_o, 36 | output [ 3-1:0] axi_awprot_o, 37 | output axi_awvalid_o, 38 | input axi_awready_i, 39 | output [ AXI_DATA_W-1:0] axi_wdata_o, 40 | output [AXI_DATA_W/8-1:0] axi_wstrb_o, 41 | output axi_wvalid_o, 42 | input axi_wready_i, 43 | input [ 2-1:0] axi_bresp_i, 44 | input axi_bvalid_i, 45 | output axi_bready_o, 46 | output [ AXI_ID_W-1:0] axi_awid_o, 47 | output [ AXI_LEN_W-1:0] axi_awlen_o, 48 | output [ 3-1:0] axi_awsize_o, 49 | output [ 2-1:0] axi_awburst_o, 50 | output axi_awlock_o, 51 | output [ 4-1:0] axi_awcache_o, 52 | output [ 4-1:0] axi_awqos_o, 53 | output axi_wlast_o, 54 | input [ AXI_ID_W-1:0] axi_bid_i, 55 | 56 | input clk_i, 57 | input reset_i 58 | ); 59 | 60 | reg axi_awvalid_int; 61 | reg axi_wvalid_int; 62 | reg axi_bready_int; 63 | 64 | assign axi_awvalid_o = axi_awvalid_int; 65 | assign axi_wvalid_o = axi_wvalid_int; 66 | assign axi_bready_o = axi_bready_int; 67 | 68 | genvar i; 69 | generate 70 | if (WRITE_POL == `IOB_CACHE_AXI_WRITE_THROUGH) begin : g_write_through 71 | // Constant AXI signals 72 | assign axi_awid_o = AXI_ID; 73 | assign axi_awlen_o = {AXI_LEN_W{1'd0}}; 74 | 75 | assign axi_awsize_o = BE_NBYTES_W[3-1:0]; // verify - Writes data of the size of BE_DATA_W 76 | assign axi_awburst_o = 2'd0; 77 | assign axi_awlock_o = 1'b0; // 00 - Normal Access 78 | assign axi_awcache_o = 4'b0011; 79 | assign axi_awprot_o = 3'd0; 80 | assign axi_awqos_o = 4'd0; 81 | assign axi_wlast_o = axi_wvalid_o; 82 | 83 | // AXI Buffer Output signals 84 | assign axi_awaddr_o = {BE_ADDR_W{1'b0}} + {addr_i[ADDR_W-1 : BE_NBYTES_W], {BE_NBYTES_W{1'b0}}}; 85 | 86 | if (BE_DATA_W == DATA_W) begin : g_same_data_w 87 | assign axi_wstrb_o = wstrb_i; 88 | assign axi_wdata_o = wdata_i; 89 | end else begin : g_not_same_data_w 90 | wire [BE_NBYTES_W - FE_NBYTES_W -1 :0] word_align = addr_i[FE_NBYTES_W +: (BE_NBYTES_W - FE_NBYTES_W)]; 91 | assign axi_wstrb_o = wstrb_i << (word_align * FE_NBYTES); 92 | 93 | for (i = 0; i < BE_DATA_W / DATA_W; i = i + 1) begin : g_wdata_block 94 | assign axi_wdata_o[(i+1)*DATA_W-1:i*DATA_W] = wdata_i; 95 | end 96 | end 97 | 98 | localparam idle = 2'd0, address = 2'd1, write = 2'd2, verif = 2'd3; 99 | 100 | reg [1:0] state; 101 | 102 | always @(posedge clk_i, posedge reset_i) begin 103 | if (reset_i) state <= idle; 104 | else 105 | case (state) 106 | idle: begin 107 | if (valid_i) state <= address; 108 | else state <= idle; 109 | end 110 | address: begin 111 | if (axi_awready_i) state <= write; 112 | else state <= address; 113 | end 114 | write: begin 115 | if (axi_wready_i) state <= verif; 116 | else state <= write; 117 | end 118 | default: begin // verif - needs to be after the last word has been written, so this can't be optim 119 | if (axi_bvalid_i & (axi_bresp_i == 2'b00) & ~valid_i) 120 | state <= idle; // no more words to write 121 | else if (axi_bvalid_i & (axi_bresp_i == 2'b00) & valid_i) 122 | state <= address; // buffer still isn't empty 123 | else if (axi_bvalid_i & ~(axi_bresp_i == 2'b00)) // error 124 | state <= address; // goes back to transfer the same data. 125 | else state <= verif; 126 | end 127 | endcase 128 | end 129 | 130 | always @* begin 131 | ready_o = 1'b0; 132 | axi_awvalid_int = 1'b0; 133 | axi_wvalid_int = 1'b0; 134 | axi_bready_int = 1'b0; 135 | 136 | case (state) 137 | idle: ready_o = 1'b1; 138 | address: axi_awvalid_int = 1'b1; 139 | write: axi_wvalid_int = 1'b1; 140 | default: begin // verif 141 | axi_bready_int = 1'b1; 142 | ready_o = axi_bvalid_i & ~(|axi_bresp_i); 143 | end 144 | endcase 145 | end 146 | end else begin : g_write_back // if (WRITE_POL == `IOB_CACHE_AXI_WRITE_BACK) 147 | if (LINE2BE_W > 0) begin : g_line2be_w 148 | // Constant AXI signals 149 | assign axi_awid_o = AXI_ID; 150 | assign axi_awlock_o = 1'b0; 151 | assign axi_awcache_o = 4'b0011; 152 | assign axi_awprot_o = 3'd0; 153 | assign axi_awqos_o = 4'd0; 154 | 155 | // Burst parameters 156 | assign axi_awlen_o = 2**LINE2BE_W - 1; // will choose the burst lenght depending on the cache's and slave's data width 157 | assign axi_awsize_o = BE_NBYTES_W[3-1:0]; // each word will be the width of the memory for maximum bandwidth 158 | assign axi_awburst_o = 2'b01; // incremental burst 159 | 160 | // memory address 161 | assign axi_awaddr_o = {BE_ADDR_W{1'b0}} + {addr_i, {(FE_NBYTES_W+WORD_OFFSET_W){1'b0}}}; // base address for the burst, with width extension 162 | 163 | // memory write-data 164 | reg [LINE2BE_W-1:0] word_counter; 165 | assign axi_wdata_o = wdata_i >> (word_counter * BE_DATA_W); 166 | assign axi_wstrb_o = {BE_NBYTES{1'b1}}; 167 | assign axi_wlast_o = &word_counter; 168 | 169 | localparam idle = 2'd0, address = 2'd1, write = 2'd2, verif = 2'd3; 170 | 171 | reg [1:0] state; 172 | 173 | always @(posedge clk_i, posedge reset_i) begin 174 | if (reset_i) begin 175 | state <= idle; 176 | word_counter <= 0; 177 | end else begin 178 | case (state) 179 | idle: 180 | if (valid_i) state <= address; 181 | else state <= idle; 182 | address: 183 | if (axi_awready_i) state <= write; 184 | else state <= address; 185 | write: 186 | if (axi_wready_i & (&word_counter)) begin // last word written 187 | state <= verif; 188 | word_counter <= 0; 189 | end else if (axi_wready_i & ~(&word_counter)) begin // word still available 190 | state <= write; 191 | word_counter <= word_counter + 1; 192 | end else begin // waiting for handshake 193 | state <= write; 194 | word_counter <= word_counter; 195 | end 196 | verif: 197 | if (axi_bvalid_i & (axi_bresp_i == 2'b00)) 198 | state <= idle; // write transfer completed 199 | else if (axi_bvalid_i & ~(axi_bresp_i == 2'b00)) 200 | state <= address; // error, requires re-transfer 201 | else state <= verif; // still waiting for response 202 | default: ; 203 | endcase 204 | end 205 | end 206 | 207 | always @* begin 208 | ready_o = 1'b0; 209 | axi_awvalid_int = 1'b0; 210 | axi_wvalid_int = 1'b0; 211 | axi_bready_int = 1'b0; 212 | 213 | case (state) 214 | idle: ready_o = ~valid_i; 215 | address: axi_awvalid_int = 1'b1; 216 | write: axi_wvalid_int = 1'b1; 217 | default: begin // verif 218 | axi_bready_int = 1'b1; 219 | ready_o = axi_bvalid_i & ~(|axi_bresp_i); 220 | end 221 | endcase 222 | end 223 | end else begin : g_no_line2be_w 224 | // Constant AXI signals 225 | assign axi_awid_o = AXI_ID; 226 | assign axi_awlock_o = 1'b0; 227 | assign axi_awcache_o = 4'b0011; 228 | assign axi_awprot_o = 3'd0; 229 | assign axi_awqos_o = 4'd0; 230 | 231 | // Burst parameters - single 232 | assign axi_awlen_o = 8'd0; // A single burst of Memory data width word 233 | assign axi_awsize_o = BE_NBYTES_W; // each word will be the width of the memory for maximum bandwidth 234 | assign axi_awburst_o = 2'b00; 235 | 236 | // memory address 237 | assign axi_awaddr_o = {BE_ADDR_W{1'b0}} + {addr_i, {BE_NBYTES_W{1'b0}}}; // base address for the burst, with width extension 238 | 239 | // memory write-data 240 | assign axi_wdata_o = wdata_i; 241 | assign axi_wstrb_o = {BE_NBYTES{1'b1}}; // uses entire bandwidth 242 | assign axi_wlast_o = axi_wvalid_o; 243 | 244 | localparam idle = 2'd0, address = 2'd1, write = 2'd2, verif = 2'd3; 245 | 246 | reg [1:0] state; 247 | 248 | always @(posedge clk_i, posedge reset_i) begin 249 | if (reset_i) state <= idle; 250 | else 251 | case (state) 252 | idle: 253 | if (valid_i) state <= address; 254 | else state <= idle; 255 | address: 256 | if (axi_awready_i) state <= write; 257 | else state <= address; 258 | write: 259 | if (axi_wready_i) state <= verif; 260 | else state <= write; 261 | default: // verif 262 | if (axi_bvalid_i & (axi_bresp_i == 2'b00)) 263 | state <= idle; // write transfer completed 264 | else if (axi_bvalid_i & ~(axi_bresp_i == 2'b00)) 265 | state <= address; // error, requires re-transfer 266 | else state <= verif; // still waiting for response 267 | endcase 268 | end 269 | 270 | always @* begin 271 | ready_o = 1'b0; 272 | axi_awvalid_int = 1'b0; 273 | axi_wvalid_int = 1'b0; 274 | axi_bready_int = 1'b0; 275 | 276 | case (state) 277 | idle: ready_o = ~valid_i; 278 | address: axi_awvalid_int = 1'b1; 279 | write: axi_wvalid_int = 1'b1; 280 | default: begin // verif 281 | axi_bready_int = 1'b1; 282 | ready_o = axi_bvalid_i & ~(|axi_bresp_i); 283 | end 284 | endcase 285 | end 286 | end 287 | end 288 | endgenerate 289 | 290 | endmodule 291 | -------------------------------------------------------------------------------- /hardware/modules/iob_cache_memory/hardware/src/iob_cache_memory.v: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 IObundle 2 | // 3 | // SPDX-License-Identifier: MIT 4 | 5 | `timescale 1ns / 1ps 6 | 7 | `include "iob_cache_memory_conf.vh" 8 | 9 | module iob_cache_memory #( 10 | `include "iob_cache_memory_params.vs" 11 | ) ( 12 | `include "iob_cache_memory_io.vs" 13 | ); 14 | 15 | localparam TAG_W = FE_ADDR_W - (FE_NBYTES_W + WORD_OFFSET_W + NLINES_W); 16 | localparam NWAYS = 2 ** NWAYS_W; 17 | localparam OFFSET_PAD_W = 32 - WORD_OFFSET_W; 18 | localparam LINE_WSTRB_W = (2**WORD_OFFSET_W)*FE_NBYTES; 19 | 20 | wire hit; 21 | 22 | // cache-memory internal signals 23 | wire [NWAYS-1:0] way_hit, way_select; 24 | 25 | wire [TAG_W-1:0] tag = addr_reg_i[ADDR_REG_W-1 -: TAG_W]; // so the tag doesnt update during ack on a read-access, losing the current hit status (can take the 1 clock-cycle delay) 26 | wire [NLINES_W-1:0] index = addr_i[ADDR_W-TAG_W-1 -: NLINES_W]; // cant wait, doesnt update during a write-access 27 | wire [NLINES_W-1:0] index_reg = addr_reg_i[ADDR_REG_W-TAG_W-1 -:NLINES_W]; // cant wait, doesnt update during a write-access 28 | wire [WORD_OFFSET_W-1:0] offset = addr_reg_i[0 +: WORD_OFFSET_W]; // so the offset doesnt update during ack on a read-access (can take the 1 clock-cycle delay) 29 | wire [NWAYS*(2**WORD_OFFSET_W)*FE_DATA_W-1:0] line_rdata; 30 | wire [NWAYS*TAG_W-1:0] line_tag; 31 | reg [NWAYS*(2**NLINES_W)-1:0] v_reg; 32 | reg [NWAYS-1:0] v; 33 | 34 | reg [LINE_WSTRB_W-1:0] line_wstrb; 35 | 36 | wire write_access = |wstrb_reg_i & req_reg_i; 37 | wire read_access = ~|wstrb_reg_i & req_reg_i; 38 | //signal mantains the access 1 addition clock-cycle after ack is asserted 39 | 40 | // back-end write channel 41 | wire buffer_empty, buffer_full; 42 | wire [FE_NBYTES+(FE_ADDR_W-FE_NBYTES_W)+(FE_DATA_W)-1:0] buffer_dout; 43 | 44 | // for write-back write-allocate only 45 | reg [ NWAYS-1:0] dirty; 46 | reg [ NWAYS*(2**NLINES_W)-1:0] dirty_reg; 47 | 48 | 49 | generate 50 | if (WRITE_POL == `IOB_CACHE_MEMORY_WRITE_THROUGH) begin : g_write_through 51 | localparam FIFO_DATA_W = FE_ADDR_W - FE_NBYTES_W + FE_DATA_W + FE_NBYTES; 52 | localparam FIFO_ADDR_W = WTBUF_DEPTH_W; 53 | 54 | wire mem_clk; 55 | 56 | wire mem_w_en; 57 | wire [FIFO_ADDR_W-1:0] mem_w_addr; 58 | wire [FIFO_DATA_W-1:0] mem_w_data; 59 | 60 | wire mem_r_en; 61 | wire [FIFO_ADDR_W-1:0] mem_r_addr; 62 | wire [FIFO_DATA_W-1:0] mem_r_data; 63 | 64 | // FIFO memory 65 | iob_ram_t2p #( 66 | .DATA_W(FIFO_DATA_W), 67 | .ADDR_W(FIFO_ADDR_W) 68 | ) iob_ram_t2p0 ( 69 | .clk_i(mem_clk), 70 | 71 | .w_en_i (mem_w_en), 72 | .w_addr_i(mem_w_addr), 73 | .w_data_i(mem_w_data), 74 | 75 | .r_en_i (mem_r_en), 76 | .r_addr_i(mem_r_addr), 77 | .r_data_o(mem_r_data) 78 | ); 79 | 80 | iob_fifo_sync #( 81 | .R_DATA_W(FIFO_DATA_W), 82 | .W_DATA_W(FIFO_DATA_W), 83 | .ADDR_W (FIFO_ADDR_W) 84 | ) write_throught_buffer ( 85 | .clk_i (clk_i), 86 | .rst_i (1'b0), 87 | .arst_i(arst_i), 88 | .cke_i (1'b1), 89 | 90 | .ext_mem_clk_o(mem_clk), 91 | 92 | .ext_mem_w_en_o (mem_w_en), 93 | .ext_mem_w_addr_o(mem_w_addr), 94 | .ext_mem_w_data_o(mem_w_data), 95 | 96 | .ext_mem_r_en_o (mem_r_en), 97 | .ext_mem_r_addr_o(mem_r_addr), 98 | .ext_mem_r_data_i(mem_r_data), 99 | 100 | .level_o(), 101 | 102 | .r_data_o (buffer_dout), 103 | .r_empty_o(buffer_empty), 104 | .r_en_i (write_ack_i), 105 | 106 | .w_data_i({addr_reg_i, wdata_reg_i, wstrb_reg_i}), 107 | .w_full_o(buffer_full), 108 | .w_en_i (write_access & ack_o) 109 | ); 110 | 111 | // buffer status 112 | assign wtbuf_full_o = buffer_full; 113 | assign wtbuf_empty_o = buffer_empty & write_ack_i & ~write_req_o; 114 | 115 | // back-end write channel 116 | assign write_req_o = ~buffer_empty; 117 | assign write_addr_o = buffer_dout[FE_NBYTES+FE_DATA_W+:FE_ADDR_W-FE_NBYTES_W]; 118 | assign write_wdata_o = buffer_dout[FE_NBYTES+:FE_DATA_W]; 119 | assign write_wstrb_o = buffer_dout[0+:FE_NBYTES]; 120 | 121 | // back-end read channel 122 | assign replace_req_o = (~hit & read_access & ~replace_i) & (buffer_empty & write_ack_i); 123 | assign replace_addr_o = addr_i[ADDR_W-1:0]; 124 | end else begin : g_write_back 125 | // if (WRITE_POL == WRITE_BACK) 126 | // back-end write channel 127 | assign write_wstrb_o = {FE_NBYTES{1'bx}}; 128 | // write_req_o, write_addr_o and write_wdata_o assigns are generated bellow (dependencies) 129 | 130 | // back-end read channel 131 | assign replace_req_o = (~|way_hit) & (write_ack_i) & req_reg_i & ~replace_i; 132 | assign replace_addr_o = addr_i[ADDR_W-1:0]; 133 | end 134 | endgenerate 135 | 136 | ////////////////////////////////////////////////////// 137 | // Read-After-Write (RAW) Hazard (pipeline) control 138 | ////////////////////////////////////////////////////// 139 | wire raw; 140 | reg write_hit_prev; 141 | reg [WORD_OFFSET_W-1:0] offset_prev; 142 | reg [ NWAYS-1:0] way_hit_prev; 143 | 144 | generate 145 | if (WRITE_POL == `IOB_CACHE_MEMORY_WRITE_THROUGH) begin : g_write_through_on_RAW 146 | always @(posedge clk_i) begin 147 | write_hit_prev <= write_access & (|way_hit); 148 | // previous write position 149 | offset_prev <= offset; 150 | way_hit_prev <= way_hit; 151 | end 152 | assign raw = write_hit_prev & (way_hit_prev == way_hit) & (offset_prev == offset); 153 | end else begin : g_write_back_on_RAW 154 | // if (WRITE_POL == WRITE_BACK) 155 | always @(posedge clk_i) begin 156 | // all writes will have the data in cache in the end 157 | write_hit_prev <= write_access; 158 | // previous write position 159 | offset_prev <= offset; 160 | way_hit_prev <= way_hit; 161 | end 162 | assign raw = write_hit_prev & (way_hit_prev == way_hit) & (offset_prev == offset) & read_access; 163 | // without read_access it is an infinite replacement loop 164 | end 165 | endgenerate 166 | 167 | /////////////////////////////////////////////////////////////// 168 | // Hit signal: data available and in the memory's output 169 | /////////////////////////////////////////////////////////////// 170 | assign hit = |way_hit & ~replace_i & (~raw); 171 | 172 | ///////////////////////////////// 173 | // front-end ACK signal 174 | ///////////////////////////////// 175 | generate 176 | if (WRITE_POL == `IOB_CACHE_MEMORY_WRITE_THROUGH) begin : g_write_through_ACK 177 | assign ack_o = (hit & read_access) | (~buffer_full & write_access); 178 | end else begin : g_write_back_ACK // if (WRITE_POL == WRITE_BACK) 179 | assign ack_o = hit & req_reg_i; 180 | end 181 | endgenerate 182 | 183 | // cache-control hit-miss counters enables 184 | generate 185 | if (USE_CTRL & USE_CTRL_CNT) begin : g_ctrl_cnt 186 | // cache-control hit-miss counters enables 187 | assign write_hit_o = ack_o & (hit & write_access); 188 | assign write_miss_o = ack_o & (~hit & write_access); 189 | assign read_hit_o = ack_o & (hit & read_access); 190 | assign read_miss_o = replace_req_o; //will also subtract read_hit_o 191 | end else begin : g_no_ctrl_cnt 192 | assign write_hit_o = 1'bx; 193 | assign write_miss_o = 1'bx; 194 | assign read_hit_o = 1'bx; 195 | assign read_miss_o = 1'bx; 196 | end 197 | endgenerate 198 | 199 | ///////////////////////////////////////// 200 | // Memories implementation configurations 201 | ///////////////////////////////////////// 202 | genvar i, j, k; 203 | generate 204 | // Data-Memory 205 | for (k = 0; k < NWAYS; k = k + 1) begin : g_n_ways_block 206 | for (j = 0; j < 2 ** LINE2BE_W; j = j + 1) begin : g_line2mem_block 207 | for (i = 0; i < BE_DATA_W / FE_DATA_W; i = i + 1) begin : g_BE_block 208 | wire [FE_NBYTES-1:0] we_gen; 209 | wire [NLINES_W-1:0] addr_gen; 210 | wire [FE_DATA_W-1:0] data_in_gen; 211 | 212 | assign we_gen = {FE_NBYTES{way_hit[k]}} & line_wstrb[(j*(BE_DATA_W/FE_DATA_W)+i)*FE_NBYTES +: FE_NBYTES]; 213 | assign addr_gen = (write_access & way_hit[k] & ((j*(BE_DATA_W/FE_DATA_W)+i) == {{OFFSET_PAD_W{1'b0}}, offset}))? index_reg[NLINES_W-1:0] : index[NLINES_W-1:0]; 214 | assign data_in_gen = (replace_i) ? read_rdata_i[i*FE_DATA_W+:FE_DATA_W] : wdata_reg_i; 215 | 216 | iob_cache_gen_sp_ram #( 217 | .DATA_W(FE_DATA_W), 218 | .ADDR_W(NLINES_W) 219 | ) cache_memory ( 220 | .clk_i(clk_i), 221 | .en_i(req_i), 222 | .we_i (we_gen), 223 | .addr_i(addr_gen), 224 | .data_i(data_in_gen), 225 | .data_o(line_rdata[(k*(2**WORD_OFFSET_W)+j*(BE_DATA_W/FE_DATA_W)+i)*FE_DATA_W+:FE_DATA_W]) 226 | ); 227 | end 228 | end 229 | end 230 | 231 | // Cache Line Write Strobe 232 | if (LINE2BE_W > 0) begin : g_line2be_w 233 | always @* begin 234 | if (replace_i) begin 235 | // line-replacement: read_addr_i indexes the words in cache-line 236 | line_wstrb = {{(LINE_WSTRB_W-BE_NBYTES){1'b0}}, {BE_NBYTES{read_req_i}}} << (read_addr_i * BE_NBYTES); 237 | end else begin 238 | line_wstrb = {{(LINE_WSTRB_W-FE_NBYTES){1'b0}}, (wstrb_reg_i & {FE_NBYTES{write_access}})} << (offset * FE_NBYTES); 239 | end 240 | end 241 | end else begin : g_no_line2be_w 242 | always @* begin 243 | if (replace_i) begin 244 | // line-replacement: mem's word replaces entire line 245 | line_wstrb = {{(LINE_WSTRB_W-BE_NBYTES){1'b0}}, {BE_NBYTES{read_req_i}}}; 246 | end else begin 247 | line_wstrb = {{(LINE_WSTRB_W-FE_NBYTES){1'b0}}, (wstrb_reg_i & {FE_NBYTES{write_access}})} << (offset * FE_NBYTES); 248 | end 249 | end 250 | end 251 | 252 | // Valid-Tag memories & replacement-policy 253 | if (NWAYS > 1) begin : g_nways 254 | // reason for the 2 generates for single vs multiple ways 255 | wire [NWAYS_W-1:0] way_hit_bin, way_select_bin; 256 | // valid-memory 257 | always @(posedge clk_i, posedge arst_i) begin 258 | if (arst_i) v_reg <= 0; 259 | else if (invalidate_i) v_reg <= 0; 260 | else if (replace_req_o) 261 | v_reg <= v_reg | (1 << (way_select_bin * (2 ** NLINES_W) + index_reg)); 262 | else v_reg <= v_reg; 263 | end 264 | 265 | for (k = 0; k < NWAYS; k = k + 1) begin : g_tag_mem_block 266 | // valid-memory output stage register - 1 c.c. read-latency (cleaner simulation during rep.) 267 | always @(posedge clk_i) 268 | if (invalidate_i) v[k] <= 0; 269 | else v[k] <= v_reg[(2**NLINES_W)*k+index]; 270 | 271 | // tag-memory 272 | iob_ram_sp #( 273 | .DATA_W(TAG_W), 274 | .ADDR_W(NLINES_W) 275 | ) tag_memory ( 276 | .clk_i (clk_i), 277 | .en_i (req_i), 278 | .we_i (way_select[k] & replace_req_o), 279 | .addr_i(index[NLINES_W-1:0]), 280 | .d_i (tag), 281 | .d_o (line_tag[TAG_W*k+:TAG_W]) 282 | ); 283 | 284 | // Way hit signal - hit or replacement 285 | assign way_hit[k] = (tag == line_tag[TAG_W*k+:TAG_W]) & v[k]; 286 | end 287 | // Read Data Multiplexer 288 | wire [NWAYS*(2**WORD_OFFSET_W)*FE_DATA_W-1:0] line_rdata_tmp = line_rdata >> (FE_DATA_W*({{OFFSET_PAD_W{1'b0}}, offset} + (2**WORD_OFFSET_W)*way_hit_bin)); 289 | assign rdata_o[FE_DATA_W-1:0] = line_rdata_tmp[FE_DATA_W-1:0]; 290 | 291 | // replacement-policy module 292 | iob_cache_replacement_policy #( 293 | .N_WAYS (NWAYS), 294 | .NLINES_W (NLINES_W), 295 | .REP_POLICY(REP_POLICY) 296 | ) replacement_policy_algorithm ( 297 | .clk_i (clk_i), 298 | .cke_i (cke_i), 299 | .reset_i (arst_i | invalidate_i), 300 | .write_en_i (ack_o), 301 | .way_hit_i (way_hit), 302 | .line_addr_i (index_reg[NLINES_W-1:0]), 303 | .way_select_o (way_select), 304 | .way_select_bin_o(way_select_bin) 305 | ); 306 | 307 | // onehot-to-binary for way-hit 308 | iob_cache_onehot_to_bin #( 309 | .BIN_W(NWAYS_W) 310 | ) way_hit_encoder ( 311 | .onehot_i(way_hit[NWAYS-1:1]), 312 | .bin_o (way_hit_bin) 313 | ); 314 | 315 | // dirty-memory 316 | if (WRITE_POL == `IOB_CACHE_MEMORY_WRITE_BACK) begin : g_write_back 317 | always @(posedge clk_i, posedge arst_i) begin 318 | if (arst_i) dirty_reg <= 0; 319 | else if (write_req_o) 320 | dirty_reg <= dirty_reg & ~(1<<(way_select_bin*(2**NLINES_W) + index_reg)); // updates position with 0 321 | else if (write_access & hit) 322 | dirty_reg <= dirty_reg | (1<<(way_hit_bin*(2**NLINES_W) + index_reg)); // updates position with 1 323 | else dirty_reg <= dirty_reg; 324 | end 325 | 326 | for (k = 0; k < NWAYS; k = k + 1) begin : g_dirty_block 327 | // valid-memory output stage register - 1 c.c. read-latency (cleaner simulation during rep.) 328 | always @(posedge clk_i) dirty[k] <= dirty_reg[(2**NLINES_W)*k+index]; 329 | end 330 | 331 | // flush line 332 | assign write_req_o = req_reg_i & ~(|way_hit) & dirty[way_select_bin]; //flush if there is not a hit, and the way selected is dirty 333 | wire [TAG_W-1:0] tag_flush = line_tag >> (way_select_bin * TAG_W); //auxiliary wire 334 | assign write_addr_o = { 335 | tag_flush, index_reg 336 | }; //the position of the current block in cache (not of the access) 337 | assign write_wdata_o = line_rdata >> (way_select_bin * FE_DATA_W * (2 ** WORD_OFFSET_W)); 338 | 339 | end 340 | end else begin : g_one_way // (NWAYS = 1) 341 | // valid-memory 342 | always @(posedge clk_i, posedge arst_i) begin 343 | if (arst_i) v_reg <= 0; 344 | else if (invalidate_i) v_reg <= 0; 345 | else if (replace_req_o) v_reg <= v_reg | (1 << index); 346 | else v_reg <= v_reg; 347 | end 348 | 349 | // valid-memory output stage register - 1 c.c. read-latency (cleaner simulation during rep.) 350 | always @(posedge clk_i) begin 351 | if (invalidate_i) v <= 0; 352 | else v <= v_reg[index]; 353 | end 354 | 355 | // tag-memory 356 | iob_ram_sp #( 357 | .DATA_W(TAG_W), 358 | .ADDR_W(NLINES_W) 359 | ) tag_memory ( 360 | .clk_i (clk_i), 361 | .en_i (req_i), 362 | .we_i (replace_req_o), 363 | .addr_i(index), 364 | .d_i (tag), 365 | .d_o (line_tag) 366 | ); 367 | 368 | // Cache hit signal that indicates which way has had the hit (also during replacement) 369 | assign way_hit = (tag == line_tag) & v; 370 | 371 | // Read Data Multiplexer 372 | assign rdata_o[FE_DATA_W-1:0] = line_rdata >> FE_DATA_W * offset; 373 | 374 | // dirty-memory 375 | if (WRITE_POL == `IOB_CACHE_MEMORY_WRITE_BACK) begin : g_write_back 376 | // dirty-memory 377 | always @(posedge clk_i, posedge arst_i) begin 378 | if (arst_i) begin 379 | dirty_reg <= 0; 380 | end else if (write_req_o) begin 381 | // updates postion with 0 382 | dirty_reg <= dirty_reg & ~(1 << (index_reg)); 383 | end else if (write_access & hit) begin 384 | // updates position with 1 (needs to be index_reg otherwise updates the new index if the previous access was a write) 385 | dirty_reg <= dirty_reg | (1 << (index_reg)); 386 | end else begin 387 | dirty_reg <= dirty_reg; 388 | end 389 | end 390 | 391 | always @(posedge clk_i) dirty <= dirty_reg[index]; 392 | 393 | // flush line 394 | // flush if there is not a hit, and is dirty 395 | assign write_req_o = req_reg_i & ~(way_hit) & dirty; 396 | assign write_addr_o = { 397 | line_tag, index 398 | }; // the position of the current block in cache (not of the access) 399 | assign write_wdata_o = line_rdata; 400 | end 401 | end 402 | endgenerate 403 | 404 | endmodule 405 | -------------------------------------------------------------------------------- /iob_cache.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 IObundle 2 | # 3 | # SPDX-License-Identifier: MIT 4 | 5 | 6 | def setup(py_params: dict): 7 | VERSION = "0.71" 8 | 9 | # 10 | # List of supported python parameters 11 | # 12 | 13 | # Backend interface data width 14 | BE_DATA_W = py_params.get("be_data_w", "32") 15 | # Backend interface type 16 | BE_IF = py_params.get("be_if", "AXI4") 17 | # Name of generated cache's verilog. We may use multiple names to generate caches with different configurations. 18 | be_if = "axi" if BE_IF == "AXI4" else "iob" 19 | NAME = py_params.get("name", f"iob_cache_{be_if}") 20 | # Build directory. Usually auto-filled by Py2HWSW. 21 | BUILD_DIR = py_params.get("build_dir", "") or f"../{NAME}_V{VERSION}" 22 | 23 | # Check if parameters are valid 24 | assert BUILD_DIR, "Build directory is empty" 25 | if BE_DATA_W not in ["32", "64", "128", "256"]: 26 | print("ERROR: backend interface width must be 32, 64, 128 or 256") 27 | exit(1) 28 | if BE_IF not in ["AXI4", "IOb"]: 29 | print("ERROR: backend interface must be either AXI4 or IOb") 30 | exit(1) 31 | 32 | # Create dictionary with attributes of cache 33 | attributes_dict = { 34 | "name": NAME, 35 | "version": VERSION, 36 | "build_dir": BUILD_DIR, 37 | "generate_hw": True, 38 | "board_list": ["iob_aes_ku040_db_g"], 39 | } 40 | # 41 | # Confs 42 | # 43 | config_macros = [ 44 | { 45 | "name": "LRU", 46 | "descr": "Least Recently Used -- more resources intensive - N*log2(N) bits per cache line - Uses counters", 47 | "type": "M", 48 | "val": "0", 49 | "min": "?", 50 | "max": "?", 51 | }, 52 | { 53 | "name": "PLRU_MRU", 54 | "descr": "bit-based Pseudo-Least-Recently-Used, a simpler replacement policy than LRU, using a much lower complexity (lower resources) - N bits per cache line", 55 | "type": "M", 56 | "val": "1", 57 | "min": "?", 58 | "max": "?", 59 | }, 60 | { 61 | "name": "PLRU_TREE", 62 | "descr": "tree-based Pseudo-Least-Recently-Used, uses a tree that updates after any way received an hit, and points towards the oposing one. Uses less resources than bit-pseudo-lru - N-1 bits per cache line", 63 | "type": "M", 64 | "val": "2", 65 | "min": "?", 66 | "max": "?", 67 | }, 68 | # Write Policy 69 | { 70 | "name": "WRITE_THROUGH", 71 | "descr": "write-through not allocate: implements a write-through buffer", 72 | "type": "M", 73 | "val": "0", 74 | "min": "?", 75 | "max": "?", 76 | }, 77 | { 78 | "name": "WRITE_BACK", 79 | "descr": "write-back allocate: implementes a dirty-memory", 80 | "type": "M", 81 | "val": "1", 82 | "min": "?", 83 | "max": "?", 84 | }, 85 | ] 86 | attributes_dict["confs"] = config_macros + [ 87 | # Currently, Py2hwsw does not have a way of adding `includes. So we need to repeat this macro manually here 88 | { 89 | "name": "ADDR_W_CSRS", 90 | "descr": "Address width of CSRs", 91 | "type": "M", 92 | "val": "5", 93 | "min": "?", 94 | "max": "?", 95 | }, 96 | # 97 | # Verilog Parameters 98 | # 99 | { 100 | "name": "FE_ADDR_W", 101 | "descr": "Front-end address width (log2): defines the total memory space accessible via the cache, which must be a power of two.", 102 | "type": "P", 103 | "val": "24", 104 | "min": "1", 105 | "max": "64", 106 | }, 107 | { 108 | "name": "FE_DATA_W", 109 | "descr": "Front-end data width (log2): this parameter allows supporting processing elements with various data widths.", 110 | "type": "P", 111 | "val": "32", 112 | "min": "32", 113 | "max": "64", 114 | }, 115 | { 116 | "name": "BE_ADDR_W", 117 | "descr": "Back-end address width (log2): the value of this parameter must be equal or greater than FE_ADDR_W to match the width of the back-end interface, but the address space is still dictated by ADDR_W.", 118 | "type": "P", 119 | "val": "24", 120 | "min": "1", 121 | "max": "", 122 | }, 123 | { 124 | "name": "BE_DATA_W", 125 | "descr": "Back-end data width (log2): the value of this parameter must be an integer multiple $k \\geq 1$ of DATA_W. If $k>1$, the memory controller can operate at a frequency higher than the cache's frequency. Typically, the memory controller has an asynchronous FIFO interface, so that it can sequentially process multiple commands received in paralell from the cache's back-end interface. ", 126 | "type": "P", 127 | "val": BE_DATA_W, 128 | "min": "32", 129 | "max": "256", 130 | }, 131 | { 132 | "name": "NWAYS_W", 133 | "descr": "Number of cache ways (log2): the miminum is 0 for a directly mapped cache; the default is 1 for a two-way cache; the maximum is limited by the desired maximum operating frequency, which degrades with the number of ways. ", 134 | "type": "P", 135 | "val": "1", 136 | "min": "0", 137 | "max": "8", 138 | }, 139 | { 140 | "name": "NLINES_W", 141 | "descr": "Line offset width (log2): the value of this parameter equals the number of cache lines, given by 2**NLINES_W.", 142 | "type": "P", 143 | "val": "7", 144 | "min": "", 145 | "max": "", 146 | }, 147 | { 148 | "name": "WORD_OFFSET_W", 149 | "descr": "Word offset width (log2): the value of this parameter equals the number of words per line, which is 2**OFFSET_W. ", 150 | "type": "P", 151 | "val": "3", 152 | "min": "1", 153 | "max": "", 154 | }, 155 | { 156 | "name": "WTBUF_DEPTH_W", 157 | "descr": "Write-through buffer depth (log2). A shallow buffer will fill up more frequently and cause write stalls; however, on a Read After Write (RAW) event, a shallow buffer will empty faster, decreasing the duration of the read stall. A deep buffer is unlkely to get full and cause write stalls; on the other hand, on a RAW event, it will take a long time to empty and cause long read stalls.", 158 | "type": "P", 159 | "val": "4", 160 | "min": "", 161 | "max": "", 162 | }, 163 | { 164 | "name": "REP_POLICY", 165 | "descr": "Line replacement policy: set to 0 for Least Recently Used (LRU); set to 1 for Pseudo LRU based on Most Recently Used (PLRU_MRU); set to 2 for tree-based Pseudo LRU (PLRU_TREE).", 166 | "type": "P", 167 | "val": "0", 168 | "min": "0", 169 | "max": "3", 170 | }, 171 | { 172 | "name": "WRITE_POL", 173 | "descr": "Write policy: set to 0 for write-through or set to 1 for write-back.", 174 | "type": "P", 175 | "val": "0 ", 176 | "min": "0", 177 | "max": "1", 178 | }, 179 | { 180 | "name": "USE_CTRL", 181 | "descr": "Instantiates a cache controller (1) or not (0). The cache controller provides memory-mapped software accessible registers to invalidate the cache data contents, and monitor the write through buffer status using the front-end interface. To access the cache controller, the MSB of the address mut be set to 1. For more information refer to the example software functions provided.", 182 | "type": "P", 183 | "val": "0", 184 | "min": "0", 185 | "max": "1", 186 | }, 187 | { 188 | "name": "USE_CTRL_CNT", 189 | "descr": "Instantiates hit/miss counters for reads, writes or both (1), or not (0). This parameter is meaningful if the cache controller is present (USE_CTRL: 1), providing additional software accessible functions for these functions.", 190 | "type": "P", 191 | "val": "0", 192 | "min": "0", 193 | "max": "1", 194 | }, 195 | # Derived parameters 196 | { 197 | "name": "FE_NBYTES", 198 | "type": "D", 199 | "val": "FE_DATA_W / 8", 200 | "min": "0", 201 | "max": "32", 202 | }, 203 | { 204 | "name": "FE_NBYTES_W", 205 | "type": "D", 206 | "val": "$clog2(FE_NBYTES)", 207 | "min": "0", 208 | "max": "32", 209 | }, 210 | { 211 | "name": "BE_NBYTES", 212 | "type": "D", 213 | "val": "BE_DATA_W / 8", 214 | "min": "0", 215 | "max": "32", 216 | }, 217 | { 218 | "name": "BE_NBYTES_W", 219 | "type": "D", 220 | "val": "$clog2(BE_NBYTES)", 221 | "min": "0", 222 | "max": "32", 223 | }, 224 | { 225 | "name": "LINE2BE_W", 226 | "type": "D", 227 | "val": "WORD_OFFSET_W - $clog2(BE_DATA_W / FE_DATA_W)", 228 | "min": "0", 229 | "max": "32", 230 | }, 231 | { 232 | "name": "ADDR_W", 233 | "descr": "Width of the (word aligned) front-end address bus, optionally including the highest bit to access cache controller CSRs (if enabled)", 234 | "type": "D", 235 | "val": "USE_CTRL + FE_ADDR_W", 236 | "min": "NA", 237 | "max": "NA", 238 | }, 239 | { 240 | "name": "DATA_W", 241 | "type": "D", 242 | "val": "FE_DATA_W", 243 | "min": "NA", 244 | "max": "NA", 245 | }, 246 | ] 247 | if BE_IF == "AXI4": 248 | attributes_dict["confs"] += [ 249 | { 250 | "name": "AXI", 251 | "descr": "AXI interface used by backend", 252 | "type": "M", 253 | "val": "NA", 254 | "min": "NA", 255 | "max": "NA", 256 | }, 257 | { 258 | "name": "AXI_ID_W", 259 | "descr": "AXI ID width", 260 | "type": "P", 261 | "val": "1", 262 | "min": "0", 263 | "max": "32", 264 | }, 265 | { 266 | "name": "AXI_ID", 267 | "descr": "AXI ID", 268 | "type": "P", 269 | "val": "0", 270 | "min": "0", 271 | "max": "32", 272 | }, 273 | { 274 | "name": "AXI_LEN_W", 275 | "descr": "AXI length", 276 | "type": "P", 277 | "val": "4", 278 | "min": "0", 279 | "max": "32", 280 | }, 281 | { 282 | "name": "AXI_ADDR_W", 283 | "descr": "AXI address width", 284 | "type": "P", 285 | "val": "BE_ADDR_W", 286 | "min": "0", 287 | "max": "32", 288 | }, 289 | { 290 | "name": "AXI_DATA_W", 291 | "descr": "AXI data width", 292 | "type": "P", 293 | "val": "BE_DATA_W", 294 | "min": "0", 295 | "max": "32", 296 | }, 297 | ] 298 | 299 | # 300 | # Ports 301 | # 302 | attributes_dict["ports"] = [ 303 | { 304 | "name": "clk_en_rst_s", 305 | "descr": "Clock, clock enable and reset", 306 | "signals": { 307 | "type": "iob_clk", 308 | }, 309 | }, 310 | { 311 | "name": "iob_s", 312 | "descr": "Front-end interface", 313 | "signals": { 314 | "type": "iob", 315 | "ADDR_W": "ADDR_W", 316 | "DATA_W": "DATA_W", 317 | }, 318 | }, 319 | { 320 | "name": "ie_io", 321 | "descr": "Cache invalidate and write-trough buffer IO chain", 322 | "signals": [ 323 | { 324 | "name": "invalidate_i", 325 | "descr": "Invalidates all cache lines instantaneously if high.", 326 | "width": 1, 327 | }, 328 | { 329 | "name": "invalidate_o", 330 | "descr": "This output is asserted high when the cache is invalidated via the cache controller or the direct {\\tt invalidate_in} signal. The present {\\tt invalidate_out} signal is useful for invalidating the next-level cache if there is one. If not, this output should be floated.", 331 | "width": 1, 332 | }, 333 | { 334 | "name": "wtb_empty_i", 335 | "descr": "This input is driven by the next-level cache, if there is one, when its write-through buffer is empty. It should be tied high if there is no next-level cache. This signal is used to compute the overall empty status of a cache hierarchy, as explained for signal {\\tt wtb_empty_out}.", 336 | "width": 1, 337 | }, 338 | { 339 | "name": "wtb_empty_o", 340 | "descr": "This output is high if the cache's write-through buffer is empty and its {\tt wtb_empty_in} signal is high. This signal informs that all data written to the cache has been written to the destination memory module, and all caches on the way are empty.", 341 | "width": 1, 342 | }, 343 | ], 344 | }, 345 | ] 346 | # Back-end interface 347 | if BE_IF == "AXI4": 348 | attributes_dict["ports"] += [ 349 | { 350 | "name": "axi_m", 351 | "descr": "Back-end interface", 352 | "signals": { 353 | "type": "axi", 354 | "ID_W": "AXI_ID_W", 355 | "ADDR_W": "AXI_ADDR_W", 356 | "DATA_W": "AXI_DATA_W", 357 | "LEN_W": "AXI_LEN_W", 358 | "LOCK_W": 1, 359 | }, 360 | }, 361 | ] 362 | elif BE_IF == "IOb": 363 | attributes_dict["ports"] += [ 364 | { 365 | "name": "iob_m", 366 | "descr": "Back-end interface", 367 | "signals": { 368 | "type": "iob", 369 | "prefix": "be_", 370 | "ADDR_W": "BE_ADDR_W", 371 | "DATA_W": "BE_DATA_W", 372 | }, 373 | }, 374 | ] 375 | # 376 | # Wires 377 | # 378 | attributes_dict["wires"] = [ 379 | # Front-end 380 | { 381 | "name": "fe_cache_mem", 382 | "descr": "Cache memory front-end interface", 383 | "signals": [ 384 | {"name": "data_req", "width": 1}, 385 | {"name": "data_addr", "width": "FE_ADDR_W - FE_NBYTES_W"}, 386 | {"name": "data_rdata", "width": "FE_DATA_W"}, 387 | {"name": "data_ack", "width": 1}, 388 | {"name": "data_req_reg", "width": 1}, 389 | {"name": "data_addr_reg", "width": "FE_ADDR_W - FE_NBYTES_W"}, 390 | {"name": "data_wdata_reg", "width": "FE_DATA_W"}, 391 | {"name": "data_wstrb_reg", "width": "FE_NBYTES"}, 392 | ], 393 | }, 394 | { 395 | "name": "fe_ctrl", 396 | "descr": "Control interface.", 397 | "signals": [ 398 | {"name": "ctrl_req", "width": 1}, 399 | {"name": "ctrl_addr", "width": f"`{NAME.upper()}_ADDR_W_CSRS"}, 400 | {"name": "ctrl_wstrb", "width": "DATA_W/8"}, 401 | {"name": "ctrl_rdata", "width": "USE_CTRL*(FE_DATA_W-1)+1"}, 402 | {"name": "ctrl_ack", "width": 1}, 403 | ], 404 | }, 405 | # Cache memory 406 | { 407 | "name": "cache_mem_fe", 408 | "descr": "Cache memory front-end interface", 409 | "signals": [ 410 | {"name": "data_req"}, 411 | { 412 | "name": "cache_mem_data_addr", 413 | "width": "FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)", 414 | }, 415 | {"name": "data_rdata"}, 416 | {"name": "data_ack"}, 417 | {"name": "data_req_reg"}, 418 | {"name": "data_addr_reg"}, 419 | {"name": "data_wdata_reg"}, 420 | {"name": "data_wstrb_reg"}, 421 | ], 422 | }, 423 | { 424 | "name": "be_write_if", 425 | "descr": "Back-end write channel", 426 | "signals": [ 427 | {"name": "write_req", "width": 1}, 428 | { 429 | "name": "write_addr", 430 | "width": "FE_ADDR_W - (FE_NBYTES_W + WRITE_POL*WORD_OFFSET_W)", 431 | }, 432 | { 433 | "name": "write_wdata", 434 | "width": "FE_DATA_W + WRITE_POL*(FE_DATA_W*(2**WORD_OFFSET_W)-FE_DATA_W)", 435 | }, 436 | {"name": "write_wstrb", "width": "FE_NBYTES"}, 437 | {"name": "write_ack", "width": 1}, 438 | ], 439 | }, 440 | { 441 | "name": "be_read_if", 442 | "descr": "Back-end read channel", 443 | "signals": [ 444 | {"name": "replace_req", "width": 1}, 445 | {"name": "replace", "width": 1}, 446 | {"name": "replace_addr", "width": "FE_ADDR_W-(BE_NBYTES_W+LINE2BE_W)"}, 447 | {"name": "read_req", "width": 1}, 448 | {"name": "read_addr", "width": "LINE2BE_W"}, 449 | {"name": "read_rdata", "width": "BE_DATA_W"}, 450 | ], 451 | }, 452 | { 453 | "name": "cache_mem_ctrl", 454 | "descr": "", 455 | "signals": [ 456 | {"name": "invalidate_o"}, 457 | {"name": "wtbuf_full", "width": 1}, 458 | {"name": "wtbuf_empty", "width": 1}, 459 | {"name": "write_hit", "width": 1}, 460 | {"name": "write_miss", "width": 1}, 461 | {"name": "read_hit", "width": 1}, 462 | {"name": "read_miss", "width": 1}, 463 | ], 464 | }, 465 | # Internal signals 466 | { 467 | "name": "ctrl_internal", 468 | "descr": "Internal signals for control interface.", 469 | "signals": [ 470 | {"name": "ctrl_invalidate", "width": 1}, 471 | ], 472 | }, 473 | ] 474 | if BE_IF == "AXI4": 475 | attributes_dict["wires"] += [ 476 | { 477 | "name": "clk_rst_s", 478 | "descr": "", 479 | "signals": [ 480 | {"name": "clk_i"}, 481 | {"name": "arst_i"}, 482 | ], 483 | }, 484 | ] 485 | # 486 | # Subblocks 487 | # 488 | attributes_dict["subblocks"] = [ 489 | { 490 | "core_name": "iob_cache_front_end", 491 | "instance_name": "front_end", 492 | "instance_description": "This IOb interface is connected to a processor or any other processing element that needs a cache buffer to improve the performance of accessing a slower but larger memory", 493 | "parameters": { 494 | "ADDR_W": "ADDR_W", 495 | "DATA_W": "DATA_W", 496 | "USE_CTRL": "USE_CTRL", 497 | }, 498 | "connect": { 499 | "clk_en_rst_s": "clk_en_rst_s", 500 | "iob_s": "iob_s", 501 | "cache_mem_io": "fe_cache_mem", 502 | "ctrl_io": "fe_ctrl", 503 | }, 504 | }, 505 | { 506 | "core_name": "iob_cache_memory", 507 | "instance_name": "cache_memory", 508 | "instance_description": "This block contains the tag, data storage memories and the Write Through Buffer if the correspeonding write policy is selected; these memories are implemented either with RAM if large enough, or with registers if small enough", 509 | "config_macros": config_macros, 510 | "parameters": { 511 | "FE_ADDR_W": "FE_ADDR_W", 512 | "FE_DATA_W": "FE_DATA_W", 513 | "BE_DATA_W": "BE_DATA_W", 514 | "NWAYS_W": "NWAYS_W", 515 | "NLINES_W": "NLINES_W", 516 | "WORD_OFFSET_W": "WORD_OFFSET_W", 517 | "WTBUF_DEPTH_W": "WTBUF_DEPTH_W", 518 | "REP_POLICY": "REP_POLICY", 519 | "WRITE_POL": "WRITE_POL", 520 | "USE_CTRL": "USE_CTRL", 521 | "USE_CTRL_CNT": "USE_CTRL_CNT", 522 | }, 523 | "connect": { 524 | "clk_en_rst_s": "clk_en_rst_s", 525 | "fe_io": "cache_mem_fe", 526 | "be_write_io": "be_write_if", 527 | "be_read_io": "be_read_if", 528 | "ctrl_io": "cache_mem_ctrl", 529 | }, 530 | }, 531 | ] 532 | if BE_IF == "AXI4": 533 | attributes_dict["subblocks"] += [ 534 | { 535 | "core_name": "iob_cache_back_end_axi", 536 | "instance_name": "back_end_axi", 537 | "instance_description": "Memory-side interface: if the cache is at the last level before the target memory module, the back-end interface connects to the target memory (e.g. DDR) controller; if the cache is not at the last level, the back-end interface connects to the next-level cache. This module implements an AXI4 interface", 538 | "parameters": { 539 | "FE_ADDR_W": "FE_ADDR_W", 540 | "FE_DATA_W": "FE_DATA_W", 541 | "BE_ADDR_W": "BE_ADDR_W", 542 | "BE_DATA_W": "BE_DATA_W", 543 | "WORD_OFFSET_W": "WORD_OFFSET_W", 544 | "WRITE_POL": "WRITE_POL", 545 | "AXI_ADDR_W": "AXI_ADDR_W", 546 | "AXI_DATA_W": "AXI_DATA_W", 547 | "AXI_ID_W": "AXI_ID_W", 548 | "AXI_LEN_W": "AXI_LEN_W", 549 | "AXI_ID": "AXI_ID", 550 | }, 551 | "connect": { 552 | "clk_rst_s": "clk_rst_s", 553 | "write_io": "be_write_if", 554 | "read_io": "be_read_if", 555 | "axi_m": "axi_m", 556 | }, 557 | }, 558 | ] 559 | elif BE_IF == "IOb": 560 | attributes_dict["subblocks"] += [ 561 | { 562 | "core_name": "iob_cache_back_end_iob", 563 | "instance_name": "back_end_iob", 564 | "instance_description": "Memory-side interface: if the cache is at the last level before the target memory module, the back-end interface connects to the target memory (e.g. DDR) controller; if the cache is not at the last level, the back-end interface connects to the next-level cache. This module implements an IOb interface", 565 | "parameters": { 566 | "FE_ADDR_W": "FE_ADDR_W", 567 | "FE_DATA_W": "FE_DATA_W", 568 | "BE_ADDR_W": "BE_ADDR_W", 569 | "BE_DATA_W": "BE_DATA_W", 570 | "WORD_OFFSET_W": "WORD_OFFSET_W", 571 | "WRITE_POL": "WRITE_POL", 572 | }, 573 | "connect": { 574 | "clk_en_rst_s": "clk_en_rst_s", 575 | "write_io": "be_write_if", 576 | "read_io": "be_read_if", 577 | "iob_m": "iob_m", 578 | }, 579 | }, 580 | ] 581 | attributes_dict["subblocks"] += [ 582 | { 583 | "core_name": "iob_cache_control", 584 | "instance_name": "cache_control", 585 | "be_if": be_if, 586 | "instantiate": False, # Instantiated manually in the verilog snippet 587 | }, 588 | # Generate CSRs but don't instantiate it (generated hardware unused; only for software and docs) 589 | { 590 | "core_name": "iob_csrs", 591 | "instance_name": "csrs_inst", 592 | "name": f"iob_cache_{be_if}_csrs", 593 | "instantiate": False, 594 | "autoaddr": False, 595 | "rw_overlap": False, 596 | "version": VERSION, 597 | "csrs": [ 598 | { 599 | "name": "cache", 600 | "descr": "CACHE software accessible registers.", 601 | "regs": [ 602 | { 603 | "name": "WTB_EMPTY", 604 | "descr": "Write-through buffer empty (1) or non-empty (0).", 605 | "type": "NOAUTO", 606 | "mode": "R", 607 | "n_bits": 1, 608 | "rst_val": 0, 609 | "addr": 0, 610 | "log2n_items": 0, 611 | }, 612 | { 613 | "name": "WTB_FULL", 614 | "descr": "Write-through buffer full (1) or non-full (0).", 615 | "type": "NOAUTO", 616 | "mode": "R", 617 | "n_bits": 1, 618 | "rst_val": 0, 619 | "addr": 1, 620 | "log2n_items": 0, 621 | }, 622 | { 623 | "name": "RW_HIT", 624 | "descr": "Read and write hit counter.", 625 | "type": "NOAUTO", 626 | "mode": "R", 627 | "n_bits": 32, 628 | "rst_val": 0, 629 | "addr": 4, 630 | "log2n_items": 0, 631 | }, 632 | { 633 | "name": "RW_MISS", 634 | "descr": "Read and write miss counter.", 635 | "type": "NOAUTO", 636 | "mode": "R", 637 | "n_bits": 32, 638 | "rst_val": 0, 639 | "addr": 8, 640 | "log2n_items": 0, 641 | }, 642 | { 643 | "name": "READ_HIT", 644 | "descr": "Read hit counter.", 645 | "type": "NOAUTO", 646 | "mode": "R", 647 | "n_bits": 32, 648 | "rst_val": 0, 649 | "addr": 12, 650 | "log2n_items": 0, 651 | }, 652 | { 653 | "name": "READ_MISS", 654 | "descr": "Read miss counter.", 655 | "type": "NOAUTO", 656 | "mode": "R", 657 | "n_bits": 32, 658 | "rst_val": 0, 659 | "addr": 16, 660 | "log2n_items": 0, 661 | }, 662 | { 663 | "name": "WRITE_HIT", 664 | "descr": "Write hit counter.", 665 | "type": "NOAUTO", 666 | "mode": "R", 667 | "n_bits": 32, 668 | "rst_val": 0, 669 | "addr": 20, 670 | "log2n_items": 0, 671 | }, 672 | { 673 | "name": "WRITE_MISS", 674 | "descr": "Write miss counter.", 675 | "type": "NOAUTO", 676 | "mode": "R", 677 | "n_bits": 32, 678 | "rst_val": 0, 679 | "addr": 24, 680 | "log2n_items": 0, 681 | }, 682 | { 683 | "name": "RST_CNTRS", 684 | "descr": "Reset read/write hit/miss counters by writing any value to this register.", 685 | "type": "NOAUTO", 686 | "mode": "W", 687 | "n_bits": 1, 688 | "rst_val": 0, 689 | "addr": 28, 690 | "log2n_items": 0, 691 | }, 692 | { 693 | "name": "INVALIDATE", 694 | "descr": "Invalidate the cache data contents by writing any value to this register.", 695 | "type": "NOAUTO", 696 | "mode": "W", 697 | "n_bits": 1, 698 | "rst_val": 0, 699 | "addr": 29, 700 | "log2n_items": 0, 701 | }, 702 | ], 703 | }, 704 | ], 705 | }, 706 | # For simulation 707 | { 708 | "core_name": "iob_tasks", 709 | "instance_name": "iob_tasks_inst", 710 | "dest_dir": "hardware/simulation/src", 711 | "instantiate": False, 712 | }, 713 | ] 714 | # 715 | # Superblocks 716 | # 717 | attributes_dict["superblocks"] = [ 718 | # Simulation wrapper 719 | { 720 | "core_name": "iob_cache_sim_wrapper", 721 | "dest_dir": "hardware/simulation/src", 722 | "cache_confs": [ 723 | conf for conf in attributes_dict["confs"] if conf["type"] in ["P", "D"] 724 | ], 725 | "be_if": be_if, 726 | }, 727 | ] 728 | # 729 | # Software Modules 730 | # 731 | attributes_dict["sw_modules"] = [ 732 | { 733 | "core_name": "iob_coverage_analyze", 734 | "instance_name": "iob_coverage_analyze_inst", 735 | }, 736 | ] 737 | # 738 | # Combinatorial 739 | # 740 | attributes_dict["comb"] = { 741 | "code": """ 742 | invalidate_o = ctrl_invalidate | invalidate_i; 743 | wtb_empty_o = wtbuf_empty & wtb_empty_i; 744 | cache_mem_data_addr = data_addr[FE_ADDR_W-FE_NBYTES_W-1:BE_NBYTES_W+LINE2BE_W-FE_NBYTES_W]; 745 | 746 | """ 747 | } 748 | # 749 | # Snippets 750 | # 751 | attributes_dict["snippets"] = [ 752 | { 753 | "verilog_code": """ 754 | //Cache control & Cache controller: this block is used for invalidating the cache, monitoring the status of the Write Thorough buffer, and accessing read/write hit/miss counters. 755 | generate 756 | if (USE_CTRL) begin : g_ctrl 757 | iob_cache_control #( 758 | .DATA_W (FE_DATA_W), 759 | .USE_CTRL_CNT(USE_CTRL_CNT) 760 | ) cache_control ( 761 | .clk_i (clk_i), 762 | .cke_i (cke_i), 763 | .arst_i (arst_i), 764 | 765 | // control's signals 766 | .valid_i(ctrl_req), 767 | .addr_i (ctrl_addr), 768 | .wstrb_i (ctrl_wstrb), 769 | 770 | // write data 771 | .wtbuf_full_i (wtbuf_full), 772 | .wtbuf_empty_i(wtbuf_empty), 773 | .write_hit_i (write_hit), 774 | .write_miss_i (write_miss), 775 | .read_hit_i (read_hit), 776 | .read_miss_i (read_miss), 777 | 778 | .rdata_o (ctrl_rdata), 779 | .ready_o (ctrl_ack), 780 | .invalidate_o(ctrl_invalidate) 781 | ); 782 | end else begin : g_no_ctrl 783 | assign ctrl_rdata = 1'b0; 784 | assign ctrl_ack = 1'b0; 785 | assign ctrl_invalidate = 1'b0; 786 | end 787 | endgenerate 788 | """ 789 | } 790 | ] 791 | 792 | return attributes_dict 793 | --------------------------------------------------------------------------------