├── .gitignore ├── doc ├── mark.md ├── lsu_learn.md ├── issue.md └── ROB_PC.md ├── src ├── main │ ├── scala │ │ ├── backend │ │ │ ├── memSystem │ │ │ │ ├── Dcache │ │ │ │ │ ├── imgs │ │ │ │ │ │ ├── cache.jpg │ │ │ │ │ │ └── image.png │ │ │ │ │ ├── DataLogic.scala │ │ │ │ │ ├── WordWrite.scala │ │ │ │ │ ├── WordData.scala │ │ │ │ │ ├── Missarbiter.scala │ │ │ │ │ ├── MMIOUnit.scala │ │ │ │ │ ├── WFU.scala │ │ │ │ │ ├── doc.md │ │ │ │ │ └── Meta.scala │ │ │ │ ├── Utils.scala │ │ │ │ └── IndexAllocator.scala │ │ │ ├── execute │ │ │ │ ├── FunctionalUnit │ │ │ │ │ ├── CSA.scala │ │ │ │ │ ├── Counter.scala │ │ │ │ │ ├── Comparer.scala │ │ │ │ │ ├── Multiplier.scala │ │ │ │ │ └── Alu.scala │ │ │ │ └── ExecutionUnits.scala │ │ │ ├── rename │ │ │ │ ├── BusyTable.scala │ │ │ │ ├── MapTable.scala │ │ │ │ └── FreeList.scala │ │ │ ├── dispatch │ │ │ │ └── Dispatch.scala │ │ │ ├── decode │ │ │ │ ├── PreDecode.scala │ │ │ │ ├── DecodeLogic.scala │ │ │ │ └── DecodeStage.scala │ │ │ ├── branch │ │ │ │ └── BranchUnit.scala │ │ │ ├── register │ │ │ │ ├── Regfile.scala │ │ │ │ └── RegisterRead.scala │ │ │ └── issue │ │ │ │ ├── IssueSlot.scala │ │ │ │ └── IssueUnit.scala │ │ ├── common │ │ │ ├── Types.scala │ │ │ ├── Replacement.scala │ │ │ ├── MicroOp.scala │ │ │ ├── CommonClasses.scala │ │ │ ├── Utils.scala │ │ │ └── CoreParameters.scala │ │ ├── frontend │ │ │ ├── FrontendUtils.scala │ │ │ ├── bpu │ │ │ │ ├── RAS.scala │ │ │ │ ├── common.scala │ │ │ │ ├── Bim.scala │ │ │ │ ├── LocalHistory.scala │ │ │ │ ├── BranchPredictor.scala │ │ │ │ └── Btb.scala │ │ │ └── FetchBuffer.scala │ │ ├── difftest │ │ │ ├── LogicRegisters.scala │ │ │ ├── InstrCommits.scala │ │ │ └── difftest.scala │ │ ├── main.scala │ │ ├── tlb │ │ │ ├── common.scala │ │ │ ├── ITLB.scala │ │ │ └── DTLB.scala │ │ ├── ram │ │ │ ├── xpm_memory_sdpram.scala │ │ │ └── SDPRam.scala │ │ ├── sma │ │ │ └── AXI3.scala │ │ └── isa │ │ │ └── Instructions.scala │ └── verilog │ │ └── mycpu_top.v └── test │ └── scala │ ├── TestTemplate.scala │ ├── CmperTester.scala │ ├── AluTester.scala │ ├── MultTester.scala │ └── DivTester.scala ├── Makefile ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .bsp 3 | .vscode 4 | 5 | gen 6 | target 7 | project 8 | -------------------------------------------------------------------------------- /doc/mark.md: -------------------------------------------------------------------------------- 1 | # 文件功能(持续更新中...) 2 | 3 | 1. iFu\src\main\scala\backend\decode\DecodeLogic.scala 4 | 根据提供的table返回译码结果 -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/imgs/cache.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iFuProcessor/iFuCore/HEAD/src/main/scala/backend/memSystem/Dcache/imgs/cache.jpg -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/imgs/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iFuProcessor/iFuCore/HEAD/src/main/scala/backend/memSystem/Dcache/imgs/image.png -------------------------------------------------------------------------------- /src/main/scala/common/Types.scala: -------------------------------------------------------------------------------- 1 | package iFu.common 2 | 3 | import chisel3._ 4 | 5 | abstract class CoreModule extends Module with HasCoreParameters 6 | 7 | abstract class CoreBundle extends Bundle with HasCoreParameters 8 | -------------------------------------------------------------------------------- /src/test/scala/TestTemplate.scala: -------------------------------------------------------------------------------- 1 | import chisel3._ 2 | import chiseltest._ 3 | import org.scalatest.flatspec.AnyFlatSpec 4 | 5 | class TestTemplate extends AnyFlatSpec with ChiselScalatestTester { 6 | "DUT" should "pass" in { 7 | test(new DeviceUnderTest) { dut => 8 | println("Testing DUT") 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: compile 2 | 3 | base_dir = $(abspath .) 4 | src_dir = $(base_dir)/src/main 5 | gen_dir = $(base_dir)/gen 6 | 7 | PROJECT_NAME = iFuCore 8 | 9 | SBT = sbt 10 | 11 | compile: $(gen_dir)/$(PROJECT_NAME).sv 12 | 13 | $(gen_dir)/$(PROJECT_NAME).sv: $(shell find $(src_dir) -name '*.scala') 14 | $(SBT) "run $(gen_dir)" 15 | 16 | log: 17 | make > ./log 18 | 19 | clean: 20 | rm -rf $(gen_dir)/$(PROJECT_NAME).* 21 | 22 | .PHONY: clean log 23 | -------------------------------------------------------------------------------- /src/main/scala/frontend/FrontendUtils.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common.FrontendParameters 7 | 8 | object FrontendUtils extends FrontendParameters { 9 | 10 | def fetchAlign(addr: UInt): UInt = ~(~addr | (iCacheParams.fetchBytes - 1).U) 11 | 12 | def nextFetch(addr: UInt): UInt = { 13 | fetchAlign(addr) + fetchBytes.U 14 | } 15 | 16 | def fetchMask(addr: UInt): UInt = { 17 | val idx = addr(log2Ceil(fetchBytes) - 1, log2Ceil(instrBytes)) 18 | (((1 << fetchWidth) - 1).U << idx).asUInt(fetchWidth - 1, 0) 19 | } 20 | 21 | def fetchIdx(addr: UInt): UInt = (addr >> log2Ceil(fetchBytes)).asUInt 22 | 23 | def getPc(pc: UInt, idx: UInt): UInt = Cat(fetchIdx(pc), idx(log2Ceil(fetchWidth) - 1, 0), 0.U(2.W)) 24 | 25 | } -------------------------------------------------------------------------------- /src/main/scala/backend/execute/FunctionalUnit/CSA.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | 7 | abstract class CarrySaveAdderMToN(m: Int, n: Int)(len: Int) extends Module{ 8 | val io = IO(new Bundle() { 9 | val in = Input(Vec(m, UInt(len.W))) 10 | val out = Output(Vec(n, UInt(len.W))) 11 | }) 12 | } 13 | 14 | class CSA3_2(len: Int) extends CarrySaveAdderMToN(3, 2)(len){ 15 | val temp = Wire(Vec(len, UInt(2.W))) 16 | for((t, i) <- temp.zipWithIndex){ 17 | val (a, b, cin) = (io.in(0)(i), io.in(1)(i), io.in(2)(i)) 18 | val a_xor_b = a ^ b 19 | val a_and_b = a & b 20 | val sum = a_xor_b ^ cin 21 | val cout = a_and_b | (a_xor_b & cin) 22 | t := Cat(cout, sum) 23 | } 24 | io.out.zipWithIndex.foreach({case(x, i) => x := Cat(temp.reverse map(_(i)))}) 25 | } -------------------------------------------------------------------------------- /src/main/scala/backend/execute/FunctionalUnit/Counter.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | 7 | class CntFuncCode { 8 | val SZ_CNT_FN = 4 9 | def FN_X = BitPat("b????") 10 | def FN_VL = BitPat("b1000") 11 | def FN_VH = BitPat("b1001") 12 | 13 | def isLsb(cmd: UInt) = !cmd(0) 14 | } 15 | 16 | object CntFuncCode { 17 | def apply() = new CntFuncCode 18 | } 19 | 20 | abstract class AbstractCnt[T <: CntFuncCode](val cntFn: T) extends CoreModule { 21 | val io = IO(new Bundle { 22 | val fn = Input(UInt(cntFn.SZ_CNT_FN.W)) 23 | val data = UInt(xLen.W) 24 | }) 25 | } 26 | 27 | class Counter64 extends AbstractCnt(CntFuncCode()) { 28 | val cnt = RegInit(0.U((xLen * 2).W)) 29 | cnt := cnt + 1.U 30 | 31 | io.data := Mux(cntFn.isLsb(io.fn), cnt(xLen - 1, 0), cnt(xLen * 2 - 1, xLen)) 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/difftest/LogicRegisters.scala: -------------------------------------------------------------------------------- 1 | package iFu.difftest 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | 9 | import iFu.difftest._ 10 | 11 | class LogicRegisters extends CoreModule { 12 | val io = IO(new Bundle { 13 | val commit = Input(new CommitSignals()) 14 | }) 15 | 16 | val debug_reg = RegInit(VecInit(Seq.fill(numLRegs)(0.U(32.W)))) 17 | 18 | for (w <- 0 until coreWidth) { 19 | when (io.commit.valids(w)) { 20 | val lreg = io.commit.uops(w).ldst 21 | val wdata = io.commit.debug_wdata(w) 22 | val wen = io.commit.uops(w).ldst_val 23 | 24 | when (wen && lreg =/= 0.U) { 25 | debug_reg(lreg) := wdata 26 | } 27 | } 28 | } 29 | 30 | val difftest = Module(new DifftestGRegState) 31 | difftest.io.clock := clock 32 | difftest.io.coreid := 0.U // only support 1 core now 33 | 34 | for (i <- 0 until numLRegs) { 35 | difftest.io.gpr(i) := debug_reg(i) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 iFuProcessor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/DataLogic.scala: -------------------------------------------------------------------------------- 1 | 2 | package iFu.backend 3 | 4 | import chisel3._ 5 | import chisel3.util._ 6 | import scala.annotation.switch 7 | 8 | import iFu.common._ 9 | import iFu.common.Consts._ 10 | import iFu.util._ 11 | 12 | class DcacheDataReq extends CoreBundle with HasDcacheParameters { 13 | val idx = UInt(nIdxBits.W) 14 | val offset = UInt(log2Ceil(nRowWords).W) 15 | val pos = UInt(log2Ceil(nWays).W) 16 | val data = UInt(xLen.W) 17 | 18 | } 19 | 20 | class DcacheDataResp extends CoreBundle with HasDcacheParameters { 21 | val data = UInt(xLen.W) 22 | } 23 | 24 | class DcacheDataIO extends CoreBundle with HasDcacheParameters { 25 | val req = Input(Valid(new DcacheDataReq)) 26 | val resp = Output(Valid(new DcacheDataResp)) 27 | } 28 | 29 | 30 | class DcacheDataLogic extends Module with HasDcacheParameters{ 31 | val io = IO(new CoreBundle{ 32 | val read = Vec( memWidth, (new DcacheDataIO)) 33 | val write = (new DcacheDataIO) 34 | }) 35 | 36 | // data 37 | val data = Module(new DcacheData) 38 | 39 | // Read 40 | for(w <- 0 until memWidth){ 41 | data.io.read(w) <> io.read(w) 42 | } 43 | data.io.write <> io.write 44 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iFu 2 | 3 | 你说得对,但是《 **iFu** 》是由 **???** 自主编写的一款全新超标量乱序CPU。CPU运行在一个被称作「 **FPGA** 」的物理世界,在这里被 **SBT** 选中的 **Chisel Codes** 将被授予「 **编译运行** 」,引导 **仿真与综合** 之力。你将扮演一位名为「 **/\*TODO\*/** 」的神秘角色,在自由的 **LoongArch32** 中邂逅性格各异、能力独特的 **Instructions** ,和它们一起击败 **Func/Perf/Sys Test** ,找回 **性能** 的同时,逐步发掘「 **B+ = 3.3** 」的真相。 4 | 5 | ## 项目结构 6 | ```shell 7 | repo 8 | ├── build.sbt 9 | ├── doc 10 | ├── LICENSE 11 | ├── Makefile 12 | ├── README.md 13 | └── src 14 | ├── main 15 | │ ├── scala 16 | │ └── verilog 17 | └── test 18 | └── scala 19 | ``` 20 | 21 | ## 环境配置 22 | 1. 参考`https://www.chisel-lang.org/docs/installation`安装`Javac`以及`SBT`。 23 | 24 | ## 编译运行 25 | 1. 修改`repo/Makfile`中的`gen_dir`为`system verilog`代码生成目录 26 | 2. 运行`make`命令,即可在`gen_dir`目录下生成`iFuCore.sv`文件 27 | 3. 将`repo/src/main/verilog/mycpu_top.v`复制到`gen_dir`目录下 28 | 4. 此时,整个项目对外暴露出一个`core_top`模块,其接口使用`AXI3`协议,另外还需传入`intrpt`信号,用于中断处理 29 | 30 | ## 仿真环境 31 | 1. 本项目仿真依赖于[chiplab](https://gitee.com/loongson-edu/chiplab) 32 | 2. 将`gen_dir`设置为`chiplab/IP/myCPU` 33 | 3. 将`repo/src/main/verilog/mycpu_top.v`复制到`chiplab/IP/myCPU`目录下 34 | 4. 完成上述步骤后,即可使用`chiplab`提供的仿真环境进行仿真 35 | 36 | ## 更多信息 37 | [网站主页](https://ys.mihoyo.com/) 38 | [资料下载](https://ys-api.mihoyo.com/event/download_porter/link/ys_cn/official/pc_default) 39 | -------------------------------------------------------------------------------- /src/main/scala/main.scala: -------------------------------------------------------------------------------- 1 | package iFu 2 | 3 | //> using scala "2.13.12" 4 | //> using dep "org.chipsalliance::chisel::6.2.0" 5 | //> using plugin "org.chipsalliance:::chisel-plugin::6.2.0" 6 | //> using options "-unchecked", "-deprecation", "-language:reflectiveCalls", "-feature", "-Xcheckinit", "-Xfatal-warnings", "-Ywarn-dead-code", "-Ywarn-unused", "-Ymacro-annotations" 7 | 8 | import chisel3._ 9 | // _root_ disambiguates from package chisel3.util.circt if user imports chisel3.util._ 10 | import _root_.circt.stage.ChiselStage 11 | 12 | object Main extends App { 13 | 14 | println(chisel3.BuildInfo.toString) 15 | println(args.mkString(" ")) 16 | 17 | val targetDirectory = args.head 18 | 19 | val firtoolOpts = Array( 20 | "-O=release", 21 | "--disable-annotation-unknown", 22 | "--lowering-options=explicitBitcast,disallowLocalVariables,disallowPortDeclSharing", 23 | // "--repl-seq-mem", 24 | // "--repl-seq-mem-file=Foo.sv.conf" 25 | "--disable-all-randomization", 26 | ) 27 | 28 | ChiselStage.emitSystemVerilogFile( 29 | gen = new iFuCore, 30 | args = Array("--target-dir", targetDirectory), 31 | firtoolOpts = firtoolOpts 32 | ) 33 | 34 | 35 | // new chisel3.stage.ChiselStage().execute( 36 | // buildArgs, 37 | // Seq( 38 | // ChiselGeneratorAnnotation(() => new iFuCore), 39 | // TargetDirAnnotation(targetDirectory) 40 | // ) 41 | // ) 42 | } -------------------------------------------------------------------------------- /src/main/scala/tlb/common.scala: -------------------------------------------------------------------------------- 1 | package iFu.tlb 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | 8 | class TLBCsrContext extends CoreBundle { 9 | val inv_l0_tlb = Bool() 10 | val asid_asid = UInt(10.W) 11 | 12 | val da_mode = Bool() 13 | val pg_mode = Bool() 14 | val crmd_datm = UInt(2.W) 15 | val crmd_plv = UInt(2.W) 16 | 17 | val dmw0_plv0 = Bool() 18 | val dmw0_plv3 = Bool() 19 | val dmw0_mat = UInt(2.W) 20 | val dmw0_pseg = UInt(3.W) 21 | val dmw0_vseg = UInt(3.W) 22 | 23 | val dmw1_plv0 = Bool() 24 | val dmw1_plv3 = Bool() 25 | val dmw1_mat = UInt(2.W) 26 | val dmw1_pseg = UInt(3.W) 27 | val dmw1_vseg = UInt(3.W) 28 | } 29 | 30 | class L0ITLBEntry extends CoreBundle { 31 | val exist = Bool() 32 | val entry = new TLBEntry() 33 | } 34 | object L0ITLBEntry { 35 | def new_entry(entry: TLBEntry) = { 36 | val e = Wire(new L0ITLBEntry) 37 | e.exist := true.B 38 | e.entry := entry 39 | e 40 | } 41 | 42 | def fake_entry(vppn: UInt, asid: UInt) = { 43 | val e = Wire(new L0ITLBEntry) 44 | e := DontCare 45 | e.exist := false.B 46 | e.entry.meta.vppn := vppn 47 | e.entry.meta.ps := 12.U 48 | e.entry.meta.g := false.B 49 | e.entry.meta.asid := asid 50 | e.entry.meta.e := true.B 51 | e 52 | } 53 | } 54 | 55 | trait L0TLBState { 56 | val s_ready :: s_refill :: Nil = Enum(2) 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/backend/rename/BusyTable.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | 8 | class BusyTableResp extends Bundle { 9 | val prs1_busy = Bool() 10 | val prs2_busy = Bool() 11 | } 12 | 13 | class BusyTable ( 14 | val plWidth : Int, 15 | val numPregs: Int, 16 | val numWakeupPorts: Int 17 | ) extends CoreModule { 18 | val pregSize = log2Ceil(numPregs) 19 | 20 | val io = IO(new Bundle { 21 | val ren_uops = Input(Vec(plWidth, new MicroOp)) 22 | val busy_resps = Output(Vec(plWidth, new BusyTableResp)) 23 | val rebusy_reqs = Input(Vec(plWidth, Bool())) 24 | 25 | val wakeup_valids = Input(Vec(numWakeupPorts, Bool())) 26 | val wakeup_pdsts = Input(Vec(numWakeupPorts, UInt(pregSize.W))) 27 | }) 28 | 29 | val busyTable = RegInit(0.U(numPregs.W)) 30 | 31 | //将写回的寄存器置为非忙 32 | val busyTableWakeup = busyTable & 33 | ~(io.wakeup_pdsts zip io.wakeup_valids).map { 34 | case (pdst, valid) => valid.asUInt << pdst 35 | }.reduce(_|_) 36 | 37 | //将新分配的寄存器置为忙 38 | val busyTableNext = busyTableWakeup | 39 | (io.ren_uops zip io.rebusy_reqs) .map { 40 | case (uop, req) => req.asUInt << uop.pdst 41 | }.reduce(_|_) 42 | 43 | //更新busytable 44 | busyTable := busyTableNext 45 | 46 | //输出 47 | //这里我们将不考虑转发,转发逻辑在外面顶层模块进行 48 | for (i <- 0 until plWidth){ 49 | io.busy_resps(i).prs1_busy := busyTable(io.ren_uops(i).prs1) 50 | io.busy_resps(i).prs2_busy := busyTable(io.ren_uops(i).prs2) 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/scala/ram/xpm_memory_sdpram.scala: -------------------------------------------------------------------------------- 1 | package ram 2 | 3 | import chisel3._ 4 | 5 | class xpm_memory_sdpram(ADDR_WIDTH: Int, DATA_WIDTH: Int, BYTE_WIDTH: Int) extends BlackBox (Map( 6 | "ADDR_WIDTH_A" -> ADDR_WIDTH, 7 | "ADDR_WIDTH_B" -> ADDR_WIDTH, 8 | "AUTO_SLEEP_TIME" -> 0, 9 | "BYTE_WRITE_WIDTH_A" -> BYTE_WIDTH, 10 | "ECC_MODE" -> "no_ecc", 11 | "MEMORY_INIT_FILE" -> "none", 12 | "MEMORY_INIT_PARAM" -> "0", 13 | "MEMORY_OPTIMIZATION" -> "true", 14 | "MEMORY_PRIMITIVE" -> "auto", 15 | "MEMORY_SIZE" -> (math.pow(2, ADDR_WIDTH) * DATA_WIDTH).toInt, 16 | "MESSAGE_CONTROL" -> 0, 17 | "READ_DATA_WIDTH_B" -> DATA_WIDTH, 18 | "READ_LATENCY_B" -> 1, 19 | "READ_RESET_VALUE_B" -> "0", 20 | "RST_MODE_A" -> "SYNC", 21 | "RST_MODE_B" -> "SYNC", 22 | "USE_MEM_INIT" -> 0, 23 | "WAKEUP_TIME" -> "disable_sleep", 24 | "WRITE_DATA_WIDTH_A" -> DATA_WIDTH, 25 | "WRITE_MODE_B" -> "no_change" 26 | )) { 27 | val io = IO(new Bundle { 28 | val clka = Input(Bool()) 29 | val clkb = Input(Bool()) 30 | val ena = Input(Bool()) 31 | val enb = Input(Bool()) 32 | val addra = Input(UInt(ADDR_WIDTH.W)) 33 | val addrb = Input(UInt(ADDR_WIDTH.W)) 34 | val wea = Input(UInt((DATA_WIDTH / BYTE_WIDTH).W)) 35 | val dina = Input(UInt(DATA_WIDTH.W)) 36 | val doutb = Output(UInt(DATA_WIDTH.W)) 37 | val regceb = Input(Bool()) 38 | val rstb = Input(Bool()) 39 | val sleep = Input(Bool()) 40 | val injectdbiterra = Input(Bool()) 41 | val injectsbiterra = Input(Bool()) 42 | }) 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/WordWrite.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | import iFu.util._ 9 | 10 | object WordWrite { 11 | def apply(req: DCacheReq, rawData: UInt) = { 12 | val memSize = req.uop.mem_size 13 | val wdata = rawData.asTypeOf(Vec(4, UInt(8.W))) 14 | val lob = req.data(7, 0) 15 | val loh = req.data(15, 0) 16 | val low = req.data(31, 0) 17 | when(memSize === 0.U) { 18 | 19 | when(req.mask === "b0001".U) { 20 | wdata(0) := lob 21 | } 22 | .elsewhen(req.mask === "b0010".U) { 23 | wdata(1) := lob 24 | } 25 | .elsewhen(req.mask === "b0100".U) { 26 | wdata(2) := lob 27 | } 28 | .elsewhen(req.mask === "b1000".U) { 29 | wdata(3) := lob 30 | } 31 | 32 | }.elsewhen(memSize === 1.U) { 33 | 34 | when(req.mask === "b0011".U) { 35 | wdata(0) := loh(7, 0) 36 | wdata(1) := loh(15, 8) 37 | }.elsewhen(req.mask === "b1100".U) { 38 | wdata(2) := loh(7, 0) 39 | wdata(3) := loh(15, 8) 40 | } 41 | 42 | }.elsewhen(memSize === 2.U) { 43 | when(req.mask === "b1111".U) { 44 | wdata(0) := low(7, 0) 45 | wdata(1) := low(15, 8) 46 | wdata(2) := low(23, 16) 47 | wdata(3) := low(31, 24) 48 | } 49 | } 50 | wdata.asUInt 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/backend/dispatch/Dispatch.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | 9 | class DispatchIO extends CoreBundle { 10 | val ren_uops = Vec(coreWidth, Flipped(DecoupledIO(new MicroOp))) 11 | val dis_uops = MixedVec( 12 | issueParams.map(ip => 13 | Vec(ip.dispatchWidth, DecoupledIO(new MicroOp)) 14 | ) 15 | ) 16 | } 17 | 18 | abstract class Dispatcher extends CoreModule { 19 | val io = IO(new DispatchIO) 20 | } 21 | 22 | class BasicDispatcher extends Dispatcher { 23 | issueParams.map(ip => require(ip.dispatchWidth == coreWidth)) 24 | 25 | /* // both int issue queue and mem issue queue must be ready 26 | val ren_readys = io.dis_uops.map(d => VecInit(d.map(_.ready)).asUInt).reduce(_&_) 27 | 28 | for (w <- 0 until coreWidth) { 29 | io.ren_uops(w).ready := ren_readys(w) 30 | } */ 31 | 32 | val ren_readys = Wire(Vec(issueParams.size, Vec(coreWidth, Bool()))) 33 | for (i <- 0 until issueParams.size) { 34 | val ip = issueParams(i) 35 | val iqType_match = io.ren_uops.map(r => (r.bits.iqType & ip.iqType.U).orR) 36 | val iqType_ready = io.dis_uops(i).map(_.ready) 37 | ren_readys(i) := VecInit(iqType_match zip iqType_ready map { case (m, r) => m && r }) 38 | } 39 | for (w <- 0 until coreWidth) { 40 | io.ren_uops(w).ready := ren_readys.map(_(w)).reduce(_||_) || io.ren_uops(w).bits.xcpt_valid 41 | } 42 | 43 | for (i <- 0 until issueParams.size) { 44 | val issueParam = issueParams(i) 45 | val dis = io.dis_uops(i) 46 | for (w <- 0 until coreWidth) { 47 | dis(w).valid := io.ren_uops(w).valid && ((io.ren_uops(w).bits.iqType & issueParam.iqType.U) =/= 0.U) 48 | dis(w).bits := io.ren_uops(w).bits 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/scala/frontend/bpu/RAS.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.util._ 8 | import ram.SDPRam 9 | 10 | 11 | class RAS extends CoreModule { 12 | val numRasEntries = frontendParams.bpdParams.numRasEntries 13 | val targetSz = frontendParams.targetSz 14 | 15 | val io = IO(new Bundle { 16 | val read_idx = Input(UInt(log2Ceil(numRasEntries).W)) 17 | val read_tgt = Output(UInt(targetSz.W)) 18 | 19 | val write_valid = Input(Bool()) 20 | val write_idx = Input(UInt(log2Ceil(numRasEntries).W)) 21 | val write_tgt = Input(UInt(targetSz.W)) 22 | }) 23 | 24 | // val ras = Reg(Vec(numRasEntries, UInt(targetSz.W))) 25 | val ras = Module(new SDPRam(numRasEntries, UInt(targetSz.W))) 26 | 27 | ras.io.raddr := io.read_idx 28 | 29 | // io.read_tgt := Mux( 30 | // RegNext(io.write_valid && io.write_idx === io.read_idx), RegNext(io.write_tgt), 31 | // RegNext(ras(io.read_idx)) 32 | // ) 33 | 34 | io.read_tgt := ras.io.rdata.head 35 | 36 | 37 | // when (io.write_valid) { 38 | // ras(io.write_idx) := io.write_tgt 39 | // } 40 | 41 | ras.io.wen := io.write_valid 42 | ras.io.waddr := io.write_idx 43 | ras.io.wdata.head := io.write_tgt 44 | ras.io.wstrobe := 1.U 45 | 46 | } 47 | 48 | class RASPtr extends CoreBundle { 49 | /*--------------------------*/ 50 | val numRasEntries = frontendParams.bpdParams.numRasEntries 51 | /*--------------------------*/ 52 | val bits = UInt(log2Ceil(numRasEntries).W) 53 | 54 | def update( 55 | en: Bool, is_call: Bool, is_ret: Bool 56 | ): RASPtr = { 57 | val new_ptr = Wire(new RASPtr) 58 | new_ptr.bits := Mux(en && is_call, WrapInc(bits, numRasEntries), 59 | Mux(en && is_ret , WrapDec(bits, numRasEntries), 60 | bits)) 61 | new_ptr 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/backend/execute/FunctionalUnit/Comparer.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | 8 | class CmpFuncCode { 9 | val SZ_CMP_FN = 3 10 | def FN_X = BitPat("b???") 11 | def FN_EQ = 0.U(SZ_CMP_FN.W) // 0b000 12 | def FN_NE = 1.U(SZ_CMP_FN.W) // 0b001 13 | def FN_LT = 2.U(SZ_CMP_FN.W) // 0b010 14 | def FN_GE = 3.U(SZ_CMP_FN.W) // 0b011 15 | def FN_LTU = 6.U(SZ_CMP_FN.W) // 0b110 16 | def FN_GEU = 7.U(SZ_CMP_FN.W) // 0b111 17 | 18 | def cmpUnsigned(cmd: UInt) = cmd(2) 19 | def cmpInverted(cmd: UInt) = cmd(0) 20 | def cmpEq(cmd: UInt) = !cmd(1) 21 | } 22 | 23 | object CmpFuncCode { 24 | def apply() = new CmpFuncCode() 25 | } 26 | 27 | abstract class AbstractCmper[T <: CmpFuncCode](val cmpFn: T) extends CoreModule { 28 | val io = IO(new Bundle { 29 | val fn = Input(UInt(cmpFn.SZ_CMP_FN.W)) 30 | val op1 = Input(UInt(xLen.W)) 31 | val op2 = Input(UInt(xLen.W)) 32 | val out = Output(Bool()) 33 | }) 34 | } 35 | 36 | class Comparer(val debug: Boolean = false) extends AbstractCmper(CmpFuncCode()) { 37 | val lt = Mux(cmpFn.cmpUnsigned(io.fn), io.op1.asUInt < io.op2.asUInt, io.op1.asSInt < io.op2.asSInt) 38 | io.out := cmpFn.cmpInverted(io.fn) ^ Mux(cmpFn.cmpEq(io.fn), io.op1 === io.op2, lt) 39 | 40 | if (debug) { 41 | when (io.fn === cmpFn.FN_EQ) { 42 | printf(p"fn: FN_EQ, ") 43 | }.elsewhen (io.fn === cmpFn.FN_NE) { 44 | printf(p"fn: FN_NE, ") 45 | }.elsewhen (io.fn === cmpFn.FN_LT) { 46 | printf(p"fn: FN_LT, ") 47 | }.elsewhen (io.fn === cmpFn.FN_GE) { 48 | printf(p"fn: FN_GE, ") 49 | }.elsewhen (io.fn === cmpFn.FN_LTU) { 50 | printf(p"fn: FN_LTU, ") 51 | }.elsewhen (io.fn === cmpFn.FN_GEU) { 52 | printf(p"fn: FN_GEU, ") 53 | }.otherwise { 54 | printf(p"fn: UNKNOWN, ") 55 | } 56 | printf("op1: %d, op2: %d, out: %d\n", io.op1.asSInt, io.op2.asSInt, io.out) 57 | } 58 | } -------------------------------------------------------------------------------- /src/main/scala/backend/rename/MapTable.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | 7 | class MaptableReq extends CoreBundle { 8 | val lrs1 = UInt(lregSz.W) 9 | val lrs2 = UInt(lregSz.W) 10 | val ldst = UInt(lregSz.W) 11 | } 12 | 13 | class MaptableResp extends CoreBundle { 14 | val prs1 = UInt(pregSz.W) 15 | val prs2 = UInt(pregSz.W) 16 | val stale_pdst = UInt(pregSz.W) 17 | } 18 | 19 | class ReMapReq extends CoreBundle { 20 | val ldst = UInt(lregSz.W) 21 | val pdst = UInt(pregSz.W) 22 | val valid = Bool() 23 | } 24 | 25 | class MapTable extends CoreModule { 26 | 27 | val io = IO(new CoreBundle{ 28 | val map_reqs = Input(Vec(coreWidth, new MaptableReq)) 29 | val map_resps = Output(Vec(coreWidth, new MaptableResp)) 30 | 31 | val remap_reqs = Input(Vec(coreWidth, new ReMapReq)) 32 | 33 | val ren_br_tags = Input(Vec(coreWidth, Valid(UInt(brTagSz.W)))) 34 | 35 | val brupdate = Input(new BrUpdateInfo) 36 | }) 37 | 38 | val mapTable = RegInit(VecInit(Seq.fill(numLRegs) { 0.U(pregSz.W) })) 39 | val brShot = Reg(Vec(maxBrCount, Vec(numLRegs, UInt(pregSz.W)))) 40 | 41 | val remapTable = io.remap_reqs.scanLeft(mapTable) { case (table, req) => 42 | VecInit(table.zipWithIndex map { case (preg, lreg) => 43 | if (lreg == 0) { 44 | 0.U 45 | } else { 46 | Mux(req.valid && req.ldst === lreg.U, req.pdst, preg) 47 | } 48 | }) 49 | } 50 | 51 | // save maps at br for rollback 52 | io.ren_br_tags zip remapTable.slice(1, coreWidth + 1) foreach { case (tag, table) => 53 | when (tag.valid) { 54 | brShot(tag.bits) := table 55 | } 56 | } 57 | 58 | mapTable := Mux(io.brupdate.b2.mispredict, brShot(io.brupdate.b2.uop.brTag), remapTable.last) 59 | 60 | // forwarding is done at top modules 61 | io.map_resps zip io.map_reqs foreach { case (resp, req) => 62 | resp.prs1 := mapTable(req.lrs1) 63 | resp.prs2 := mapTable(req.lrs2) 64 | resp.stale_pdst := mapTable(req.ldst) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/scala/CmperTester.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import scala.util.Random 4 | 5 | import chisel3._ 6 | import chiseltest._ 7 | import org.scalatest.flatspec.AnyFlatSpec 8 | 9 | trait CmperTestFunc { 10 | val FN_EQ = 0 11 | val FN_NE = 1 12 | val FN_LT = 2 13 | val FN_GE = 3 14 | val FN_LTU = 6 15 | val FN_GEU = 7 16 | val funcs = Array(FN_EQ, FN_NE, FN_LT, FN_GE, FN_LTU, FN_GEU) 17 | 18 | def cmper(fn: Int, op1: Int, op2: Int): Boolean = { 19 | fn match { 20 | case FN_EQ => if (op1 == op2) true else false 21 | case FN_NE => if (op1 != op2) true else false 22 | case FN_LT => if (op1 < op2) true else false 23 | case FN_GE => if (op1 >= op2) true else false 24 | case FN_LTU => if ((op1 & 0xFFFFFFFFL) < (op2 & 0xFFFFFFFFL)) true else false 25 | case FN_GEU => if ((op1 & 0xFFFFFFFFL) >= (op2 & 0xFFFFFFFFL)) true else false 26 | } 27 | } 28 | 29 | def testOne(dut: Comparer, fn: Int, op1: Int, op2: Int): Unit = { 30 | val refResult = cmper(fn, op1, op2) 31 | dut.io.fn.poke(fn.U) 32 | dut.io.op1.poke(BigInt(op1 & 0xFFFFFFFFL).U) 33 | dut.io.op2.poke(BigInt(op2 & 0xFFFFFFFFL).U) 34 | dut.clock.step(1) 35 | dut.io.out.expect(refResult.asBool) 36 | } 37 | 38 | def testFn(dut: Comparer): Unit = { 39 | val times = 25 40 | val random = new Random() 41 | val op1 = Array.fill(times)(random.nextInt()) 42 | val op2 = Array.fill(times)(random.nextInt()) 43 | for (a <- op1) { 44 | for (b <- op2) { 45 | for (fn <- funcs) { 46 | testOne(dut, fn, a, a) 47 | testOne(dut, fn, a, b) 48 | testOne(dut, fn, a, -b) 49 | testOne(dut, fn, -a, b) 50 | } 51 | } 52 | } 53 | } 54 | } 55 | 56 | class CmperTester extends AnyFlatSpec with ChiselScalatestTester with CmperTestFunc { 57 | "Comparer" should "pass" in { 58 | test(new Comparer()) { dut => 59 | println("Testing Coparer") 60 | testFn(dut) 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/ram/SDPRam.scala: -------------------------------------------------------------------------------- 1 | package ram 2 | 3 | import chisel3._ 4 | import chisel3.util.log2Ceil 5 | import iFu.common.Consts.FPGAPlatform 6 | 7 | class SDPRam[T <: Data](size: Int, t: T, lineSize: Int = 1, useXpm: Boolean = true) extends Module { 8 | val addrBits = log2Ceil(size) 9 | val io = IO(new Bundle { 10 | val raddr = Input(UInt(addrBits.W)) 11 | val rdata = Output(Vec(lineSize, t)) 12 | val wen = Input(Bool()) 13 | val waddr = Input(UInt(addrBits.W)) 14 | val wstrobe = Input(UInt(lineSize.W)) 15 | val wdata = Input(Vec(lineSize, t)) 16 | }) 17 | if (FPGAPlatform && useXpm) { 18 | val split = !(lineSize == 1 || t.getWidth == 8 || t.getWidth == 9) 19 | val mems = Seq.fill(if (split) lineSize else 1) { 20 | Module(new xpm_memory_sdpram(log2Ceil(size), if (split) t.getWidth else t.getWidth * lineSize, t.getWidth)) 21 | } 22 | mems.zipWithIndex.foreach({case (mem, idx) => 23 | mem.io.clka := clock.asBool 24 | mem.io.clkb := clock.asBool 25 | mem.io.ena := io.wen 26 | mem.io.enb := true.B 27 | mem.io.addra := io.waddr 28 | mem.io.addrb := io.raddr 29 | mem.io.wea := (if (split) io.wstrobe(idx) else io.wstrobe) 30 | if (split) { 31 | mem.io.dina := io.wdata(idx).asUInt 32 | io.rdata(idx) := mem.io.doutb.asTypeOf(t) 33 | } 34 | mem.io.regceb := true.B 35 | mem.io.rstb := false.B 36 | mem.io.sleep := false.B 37 | mem.io.injectdbiterra := false.B 38 | mem.io.injectsbiterra := false.B 39 | }) 40 | if (!split) { 41 | mems.head.io.dina := io.wdata.asUInt 42 | io.rdata := mems.head.io.doutb.asTypeOf(Vec(lineSize, t)) 43 | } 44 | } else { 45 | val mem = SyncReadMem(size, Vec(lineSize, t)) 46 | io.rdata := mem.read(io.raddr) 47 | when (io.wen) { 48 | if (lineSize == 1) 49 | mem.write(io.waddr, io.wdata) 50 | else 51 | mem.write(io.waddr, io.wdata, io.wstrobe.asBools) 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/WordData.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import scala.annotation.switch 6 | 7 | import iFu.common._ 8 | import iFu.common.Consts._ 9 | import iFu.util._ 10 | import ram.SDPRam 11 | 12 | 13 | class DcacheData extends Module with HasDcacheParameters{ 14 | val io = IO(new CoreBundle{ 15 | // 2读口 16 | val read = Vec( memWidth ,new DcacheDataIO) 17 | // 1写口 18 | val write = new DcacheDataIO 19 | }) 20 | 21 | val data = SyncReadMem(nTotalWords, UInt(xLen.W)) 22 | 23 | // reset 24 | val reseting = RegInit(true.B) 25 | val reset_1vIdx = RegInit(0.U(n1vIdxBits.W)) 26 | 27 | when (reseting) { 28 | when (reset_1vIdx === (nTotalWords - 1).U) { 29 | reseting := false.B 30 | } 31 | 32 | // data.write(reset_1vIdx, 0.U) 33 | reset_1vIdx := reset_1vIdx + 1.U 34 | } 35 | 36 | // read 37 | val rvalid = io.read.map( _.req.valid) 38 | val rreq = io.read.map( _.req.bits) 39 | val ridx1v = rreq.map(req => Cat(req.idx, req.pos, req.offset)) 40 | 41 | for (w <- 0 until memWidth) { 42 | io.read(w).resp := 0.U.asTypeOf(Valid(new DcacheDataResp)) 43 | io.read(w).resp.valid := RegNext(rvalid(w)) 44 | val rdata = data.read(ridx1v(w)) 45 | if(!FPGAPlatform)dontTouch(rdata) 46 | io.read(w).resp.bits.data := rdata 47 | } 48 | 49 | // write 50 | val wvalid = io.write.req.valid 51 | val wreq = io.write.req.bits 52 | val widx1v = Cat(wreq.idx, wreq.pos, wreq.offset) 53 | 54 | io.write.resp := 0.U.asTypeOf(Valid(new DcacheDataResp)) 55 | 56 | when (wvalid) { 57 | data.write(widx1v, wreq.data) 58 | } 59 | io.write.resp.valid := RegNext(wvalid) 60 | io.write.resp.bits.data := DontCare 61 | 62 | // bypass 63 | val bypass = Wire(Vec(memWidth, Bool())) 64 | if(!FPGAPlatform)dontTouch(bypass) 65 | for (w <- 0 until memWidth) { 66 | //下周期判断转发 67 | bypass(w) := RegNext(rvalid(w)) && RegNext(wvalid) && IsEqual( RegNext(ridx1v(w)) , RegNext(widx1v)) 68 | when ((bypass(w))) { 69 | io.read(w).resp.bits.data := RegNext(wreq.data) 70 | } 71 | } 72 | 73 | } -------------------------------------------------------------------------------- /src/main/scala/common/Replacement.scala: -------------------------------------------------------------------------------- 1 | package iFu.common 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | abstract class ReplPolicy(nWays: Int) extends Module { 7 | require(isPow2(nWays)) 8 | val io = IO(new Bundle { 9 | val access = Flipped(Valid(UInt(log2Ceil(nWays).W))) 10 | val repl_way = Output(UInt(log2Ceil(nWays).W)) 11 | }) 12 | } 13 | 14 | class PseudoLRU(nWays: Int) extends ReplPolicy(nWays) { 15 | val tree = RegInit(0.U((nWays - 1).W)) 16 | 17 | // update way 18 | def update_tree(_tree: UInt, access_way: UInt, num_ways: Int): UInt = { 19 | if (num_ways <= 2) { 20 | assert(num_ways == 2) 21 | return !access_way(0) 22 | } else { 23 | val left_subtree = _tree(num_ways - 3, num_ways / 2 - 1) 24 | val right_subtree = _tree(num_ways / 2 - 2, 0) 25 | assert(left_subtree.getWidth == num_ways / 2 - 1) 26 | assert(right_subtree.getWidth == num_ways / 2 - 1) 27 | val direction = !access_way(log2Ceil(num_ways) - 1) 28 | return Cat(direction, 29 | Mux(direction, 30 | update_tree(left_subtree, access_way(log2Ceil(num_ways) - 2, 0), num_ways / 2), 31 | left_subtree 32 | ), 33 | Mux(direction, 34 | right_subtree, 35 | update_tree(right_subtree, access_way(log2Ceil(num_ways) - 2, 0), num_ways / 2) 36 | ) 37 | ) 38 | } 39 | } 40 | tree := Mux(io.access.valid, update_tree(tree, io.access.bits, nWays), tree) 41 | 42 | // replace way 43 | def calc_replace_way(_tree: UInt, num_ways: Int): UInt = { 44 | if (num_ways <= 2) { 45 | assert(num_ways == 2) 46 | return _tree(0) 47 | } else { 48 | val left_subtree = _tree(num_ways - 3, num_ways / 2 - 1) 49 | val right_subtree = _tree(num_ways / 2 - 2, 0) 50 | assert(left_subtree.getWidth == num_ways / 2 - 1) 51 | assert(right_subtree.getWidth == num_ways / 2 - 1) 52 | val prio = _tree(num_ways - 2) 53 | return Cat(prio, Mux(prio, 54 | calc_replace_way(right_subtree, num_ways / 2), 55 | calc_replace_way(left_subtree , num_ways / 2) 56 | )) 57 | } 58 | } 59 | io.repl_way := calc_replace_way(tree, nWays) 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/backend/execute/ExecutionUnits.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | 5 | import iFu.common._ 6 | import iFu.common.Consts._ 7 | 8 | import scala.collection.mutable.ArrayBuffer 9 | 10 | class ExecutionUnits extends HasCoreParameters { 11 | private val exe_units = ArrayBuffer[ExecutionUnit]() 12 | 13 | def length = exe_units.length 14 | 15 | def apply(n: Int) = exe_units(n) 16 | 17 | def map[T](f: ExecutionUnit => T) = { 18 | exe_units.map(f) 19 | } 20 | 21 | def withFilter(f: ExecutionUnit => Boolean) = { 22 | exe_units.withFilter(f) 23 | } 24 | 25 | def foreach[U](f: ExecutionUnit => U) = { 26 | exe_units.foreach(f) 27 | } 28 | 29 | def zipWithIndex = { 30 | exe_units.zipWithIndex 31 | } 32 | 33 | def indexWhere(f: ExecutionUnit => Boolean) = { 34 | exe_units.indexWhere(f) 35 | } 36 | 37 | def count(f: ExecutionUnit => Boolean) = { 38 | exe_units.count(f) 39 | } 40 | 41 | lazy val memory_units = { 42 | exe_units.filter(_.hasMem) 43 | } 44 | 45 | lazy val alu_units = { 46 | exe_units.filter(_.hasAlu) 47 | } 48 | 49 | lazy val csr_unit = { 50 | require(exe_units.count(_.hasCSR) == 1) 51 | exe_units.find(_.hasCSR).get 52 | } 53 | 54 | lazy val jmp_unit_idx = { 55 | exe_units.indexWhere(_.hasJmpUnit) 56 | } 57 | 58 | val int_width = issueParams.find(_.iqType == IQT_INT.litValue).get.issueWidth 59 | val mem_width = issueParams.find(_.iqType == IQT_MEM.litValue).get.issueWidth 60 | 61 | for (w <- 0 until mem_width) { 62 | val memExeUnit = Module(new ALUExeUnit( 63 | hasAlu = false, 64 | hasMem = true 65 | )) 66 | exe_units += memExeUnit 67 | } 68 | for (w <- 0 until int_width) { 69 | def is_nth(n: Int): Boolean = w == (n % int_width) 70 | 71 | val alu_exe_unit = Module(new ALUExeUnit( 72 | hasJmpUnit = is_nth(0), 73 | hasCSR = is_nth(1), 74 | hasCnt = is_nth(1), 75 | hasMul = is_nth(2), 76 | hasDiv = is_nth(2), 77 | )) 78 | exe_units += alu_exe_unit 79 | } 80 | 81 | val numWritePorts = exe_units.count(_.writesIrf) 82 | val numTotalBypassPorts = exe_units.withFilter(_.bypassable).map(_.numStages).sum 83 | 84 | val bypassable_write_port_mask = exe_units.withFilter(_.writesIrf).map(_.bypassable) 85 | } 86 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/Missarbiter.scala: -------------------------------------------------------------------------------- 1 | 2 | package iFu.backend 3 | 4 | import chisel3._ 5 | import chisel3.util._ 6 | 7 | import iFu.common._ 8 | import iFu.common.Consts._ 9 | import iFu.util._ 10 | 11 | class Missarbiter extends CoreModule with HasDcacheParameters { 12 | val io = IO(new Bundle{ 13 | val req = Input(Vec(memWidth, new DCacheReq)) 14 | val alive = Input(Vec(memWidth, Bool())) 15 | val miss = Input(Vec(memWidth, Bool())) 16 | 17 | val sendNack = Output(Vec(memWidth , Bool())) 18 | val sendResp = Output(Vec(memWidth , Bool())) 19 | val store_enter_mshr = Output(Bool()) 20 | val storeFailed = Output(Bool()) 21 | 22 | val mshrReq = Decoupled(new DCacheReq) 23 | }) 24 | if (!FPGAPlatform) dontTouch(io) 25 | 26 | io.sendResp := 0.U.asTypeOf(Vec(memWidth, Bool())) 27 | io.sendNack := 0.U.asTypeOf(Vec(memWidth, Bool())) 28 | io.storeFailed := false.B 29 | 30 | io.mshrReq.valid := false.B 31 | io.mshrReq.bits := DontCare 32 | 33 | val store_enter_mshr = WireInit(false.B) 34 | io.store_enter_mshr := store_enter_mshr 35 | 36 | val wantaccess = Wire(Vec(memWidth, Bool())) 37 | for (i <- 0 until memWidth) { 38 | wantaccess(i) := io.miss(i) && io.alive(i) 39 | } 40 | 41 | when (wantaccess(0) && !wantaccess(1)) { 42 | // 上个周期成功进入了一条,这个周期又一个miss_store不能存进去(由于hasStore被RegNext,ready来不及变化)相当于做hasStore的转发 43 | // 不能存进去,同时要nack和storeFailed,通过valid为假导致的握手失败,来实现 44 | val do_st_as_miss = isStore(io.req(0)) && RegNext(store_enter_mshr) 45 | io.sendResp(0) := false.B 46 | io.sendNack(0) := !io.mshrReq.fire 47 | io.storeFailed := !io.mshrReq.fire && isStore(io.req(0)) 48 | 49 | store_enter_mshr := io.mshrReq.fire && isStore(io.req(0)) 50 | 51 | io.mshrReq.valid := true.B && !do_st_as_miss 52 | io.mshrReq.bits := io.req(0) 53 | } .elsewhen(!wantaccess(0) && wantaccess(1)) { 54 | io.sendResp(1) := false.B 55 | io.sendNack(1) := !io.mshrReq.ready 56 | 57 | io.mshrReq.valid := true.B 58 | io.mshrReq.bits := io.req(1) 59 | } .elsewhen(wantaccess(0) && wantaccess(1)) { // priority: 1 > 0 60 | // pipeline 0 61 | io.sendResp(0) := false.B 62 | io.sendNack(0) := true.B 63 | io.storeFailed := isStore(io.req(0)) 64 | 65 | // pipeline 1 66 | io.sendResp(1) := false.B 67 | io.sendNack(1) := !io.mshrReq.ready 68 | 69 | // send request to mshr 70 | io.mshrReq.valid := true.B 71 | io.mshrReq.bits := io.req(1) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/backend/decode/PreDecode.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.isa.Instructions._ 7 | import iFu.common._ 8 | import iFu.common.Consts._ 9 | import iFu.util.ImplicitCast.uintToBitPat 10 | 11 | trait PreDecodeTable { 12 | val default = List[BitPat](N, N, N) 13 | val table:Array[(BitPat, List[BitPat])] = Array[(BitPat, List[BitPat])]( 14 | //// is br? 15 | //// | is bl? 16 | //// | | is jirl? 17 | //// | | | 18 | //// | | | 19 | JIRL -> List(N, N, Y), 20 | B -> List(N, Y, N), 21 | BL -> List(N, Y, N), 22 | BEQ -> List(Y, N, N), 23 | BNE -> List(Y, N, N), 24 | BLT -> List(Y, N, N), 25 | BLTU -> List(Y, N, N), 26 | BGE -> List(Y, N, N), 27 | BGEU -> List(Y, N, N) 28 | ) 29 | } 30 | 31 | class PreDecodeSignals extends CoreBundle { 32 | val isRet = Bool() 33 | val isCall = Bool() 34 | val target = UInt(vaddrBits.W) 35 | val cfiType = UInt(CFI_SZ.W) 36 | } 37 | 38 | class PreDecode extends CoreModule with PreDecodeTable { 39 | val io = IO(new Bundle{ 40 | val instr = Input(UInt(coreInstrBits.W)) 41 | val pc = Input(UInt(vaddrBits.W)) 42 | val out = Output(new PreDecodeSignals) 43 | }) 44 | 45 | //TODO 换成asBool 46 | val bpdSignals = DecodeLogic(io.instr, default, table) 47 | 48 | val isBr = bpdSignals(0)(0) 49 | val isBl = bpdSignals(1)(0) 50 | val isJirl = bpdSignals(2)(0) 51 | 52 | /** 53 | * isRet的情况: 54 | * 1. 为JIRL指令 55 | * 2. rd=0 rj=1 56 | * 3. 立即数值为0 57 | */ 58 | io.out.isRet := (isJirl && io.instr(4,0) === 0.U && io.instr(9,5) === 1.U && io.instr(25,10) === 0.U) 59 | /** 60 | * isCall的情况:为BL指令或link到ra的JIRL指令 61 | */ 62 | io.out.isCall := (isBl && io.instr(26)) || (isJirl && io.instr(4, 0) === 1.U) 63 | 64 | // target输出一个32位的地址 65 | io.out.target := ( 66 | Mux(isBr, 67 | Cat(Fill(14, io.instr(25)), io.instr(25, 10), 0.U(2.W)), 68 | Cat(Fill(4, io.instr(9)), io.instr(9, 0), io.instr(25, 10), 0.U(2.W)) 69 | ).asSInt + io.pc.asSInt).asUInt 70 | 71 | io.out.cfiType := Mux(isBr, CFI_BR, 72 | Mux(isBl, CFI_BL, 73 | Mux(isJirl, CFI_JIRL, 74 | CFI_X))) 75 | } 76 | -------------------------------------------------------------------------------- /src/main/scala/sma/AXI3.scala: -------------------------------------------------------------------------------- 1 | package iFu.axi3 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | object AXI3Parameters { 7 | val idBits = 4 8 | val addrBits = 32 9 | val lenBits = 8 10 | val sizeBits = 3 11 | val burstBits = 2 12 | val cacheBits = 4 13 | val protBits = 3 14 | val dataBits = 32 15 | val respBits = 2 16 | 17 | def MLEN1 = 0x0.U(lenBits.W) 18 | def MLEN2 = 0x1.U(lenBits.W) 19 | def MLEN4 = 0x3.U(lenBits.W) 20 | def MLEN8 = 0x7.U(lenBits.W) 21 | def MLEN16 = 0xF.U(lenBits.W) // max supported length 22 | def MLEN32 = 0x1F.U(lenBits.W) 23 | def MLEN64 = 0x3F.U(lenBits.W) 24 | def MLEN128 = 0x7F.U(lenBits.W) 25 | def MLEN256 = 0xFF.U(lenBits.W) 26 | 27 | def MSIZE1 = 0.U(sizeBits.W) 28 | def MSIZE2 = 1.U(sizeBits.W) 29 | def MSIZE4 = 2.U(sizeBits.W) // max supported size 30 | def MSIZE8 = 3.U(sizeBits.W) 31 | def MSIZE16 = 4.U(sizeBits.W) 32 | def MSIZE32 = 5.U(sizeBits.W) 33 | def MSIZE64 = 6.U(sizeBits.W) 34 | def MSIZE128 = 7.U(sizeBits.W) 35 | 36 | def BURST_FIXED = 0.U(burstBits.W) 37 | def BURST_INCR = 1.U(burstBits.W) 38 | def BURST_WRAP = 2.U(burstBits.W) 39 | def BURST_RESERVED = 3.U(burstBits.W) 40 | } 41 | 42 | trait AXI3ID { 43 | def idBits = AXI3Parameters.idBits 44 | val id = Output(UInt(idBits.W)) 45 | } 46 | 47 | trait AXI3Data { 48 | def dataBits = AXI3Parameters.dataBits 49 | val data = Output(UInt(dataBits.W)) 50 | } 51 | 52 | trait AXI3Last { 53 | val last = Output(Bool()) 54 | } 55 | 56 | class AXI3BundleA extends Bundle with AXI3ID { 57 | val addr = Output(UInt(AXI3Parameters.addrBits.W)) 58 | val len = Output(UInt(AXI3Parameters.lenBits.W)) 59 | val size = Output(UInt(AXI3Parameters.sizeBits.W)) 60 | val burst = Output(UInt(AXI3Parameters.burstBits.W)) 61 | val lock = Output(Bool()) 62 | val cache = Output(UInt(AXI3Parameters.cacheBits.W)) 63 | val prot = Output(UInt(AXI3Parameters.protBits.W)) 64 | } 65 | 66 | class AXI3BundleB extends Bundle with AXI3ID { 67 | val resp = Output(UInt(AXI3Parameters.respBits.W)) 68 | } 69 | 70 | class AXI3BundleR extends AXI3BundleB with AXI3ID with AXI3Data with AXI3Last 71 | 72 | class AXI3BundleW extends Bundle with AXI3ID with AXI3Data with AXI3Last { 73 | val strb = Output(UInt((AXI3Parameters.dataBits / 8).W)) 74 | } 75 | 76 | class AXI3 extends Bundle { 77 | val ar = Decoupled(new AXI3BundleA) 78 | val r = Flipped(Decoupled(new AXI3BundleR)) 79 | val aw = Decoupled(new AXI3BundleA) 80 | val w = Decoupled(new AXI3BundleW) 81 | val b = Flipped(Decoupled(new AXI3BundleB)) 82 | } 83 | -------------------------------------------------------------------------------- /src/test/scala/AluTester.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import scala.util.Random 4 | 5 | import chisel3._ 6 | import chiseltest._ 7 | import org.scalatest.flatspec.AnyFlatSpec 8 | 9 | trait AluTestFunc { 10 | val FN_ADD = 0 11 | val FN_SUB = 1 12 | val FN_AND = 2 13 | val FN_NOR = 3 14 | val FN_OR = 4 15 | val FN_XOR = 5 16 | val FN_SL = 6 17 | val FN_SRA = 7 18 | val FN_SRL = 8 19 | val FN_ANDN = 9 20 | val FN_ORN = 10 21 | val FN_SLT = 11 22 | val FN_SLTU = 13 23 | // val funcs = Array(FN_ADD, FN_SUB, FN_AND, FN_NOR, FN_OR, FN_XOR, FN_SL, FN_SRA, FN_SRL, FN_SLT, FN_SLTU) 24 | val funcs = Array(FN_ADD, FN_SUB, FN_AND, FN_NOR, FN_OR, FN_XOR, FN_SL, FN_SRA, FN_SRL, FN_SLT, FN_SLTU, FN_ANDN, FN_ORN) 25 | 26 | def alu(fn: Int, op1: Int, op2: Int): Int = { 27 | fn match { 28 | case FN_ADD => op1 + op2 29 | case FN_SUB => op1 - op2 30 | case FN_AND => op1 & op2 31 | case FN_NOR => ~(op1 | op2) 32 | case FN_OR => op1 | op2 33 | case FN_XOR => op1 ^ op2 34 | case FN_SL => op1 << op2 35 | case FN_SRA => op1 >> op2 36 | case FN_SRL => (op1 >>> op2) 37 | case FN_SLT => if (op1 < op2) 1 else 0 38 | case FN_SLTU => if ((op1 & 0xFFFFFFFFL) < (op2 & 0xFFFFFFFFL)) 1 else 0 39 | case FN_ANDN => op1 & ~op2 40 | case FN_ORN => op1 | ~op2 41 | } 42 | } 43 | 44 | def testOne(dut: Alu, fn: Int, op1: Int, op2: Int): Unit = { 45 | val refResult = alu(fn, op1, op2) 46 | dut.io.fn.poke(fn.U) 47 | dut.io.op1.poke(BigInt(op1 & 0xFFFFFFFFL).U) 48 | dut.io.op2.poke(BigInt(op2 & 0xFFFFFFFFL).U) 49 | dut.clock.step(1) 50 | dut.io.out.expect(BigInt(refResult & 0xFFFFFFFFL).U) 51 | } 52 | 53 | def testFn(dut: Alu): Unit = { 54 | val times = 25 55 | val random = new Random() 56 | val op1 = Array.fill(times)(random.nextInt()) 57 | val op2 = Array.fill(times)(random.nextInt()) 58 | for (a <- op1) { 59 | for (b <- op2) { 60 | for (fn <- funcs) { 61 | testOne(dut, fn, a, a) 62 | testOne(dut, fn, a, b) 63 | testOne(dut, fn, a, -b) 64 | testOne(dut, fn, -a, b) 65 | } 66 | } 67 | } 68 | } 69 | 70 | } 71 | 72 | class AluTester extends AnyFlatSpec with ChiselScalatestTester with AluTestFunc { 73 | "Alu" should "pass" in { 74 | test(new Alu()) { dut => 75 | println("Testing Alu") 76 | testFn(dut) 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/scala/frontend/bpu/common.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | trait HasBPUParameters { 7 | val vaddrBits = 32 8 | val fetchWidth = 4 9 | val fetchBytes = fetchWidth * 4 10 | 11 | 12 | 13 | val mixSize = 24 14 | // def mixHILO(pc: UInt): UInt = Cat(pc(vaddrBits - 1 , mixSize) , pc(mixSize - 1, 0) ^ pc(vaddrBits - 1 , vaddrBits - mixSize)) 15 | def mixHILO(pc: UInt): UInt = pc 16 | 17 | val targetSz = 15 18 | 19 | def getTargetPC(pc: UInt , target : UInt): UInt = { 20 | Cat(pc(vaddrBits - 1, targetSz + 2) , target(targetSz - 1 , 0) , 0.U(2.W)) 21 | } 22 | 23 | def getTarget(tgtpc : UInt): UInt = tgtpc(targetSz + 2 - 1 , 2) 24 | } 25 | 26 | trait HasUbtbParameters extends HasBPUParameters { 27 | // val nWays = 16 28 | val nWays = 4 29 | /* def tagSz = vaddrBits - log2Ceil(fetchBytes) */ 30 | // def tagSz = 8 31 | 32 | // tag视野大小 33 | val tagView = 11 34 | // val offsetSz = 6 35 | def tagSz = tagView - log2Ceil(fetchBytes) + 1 36 | def getTag(pc: UInt): UInt = pc(tagView , log2Ceil(fetchBytes)) 37 | } 38 | 39 | trait HasBimParameters extends HasBPUParameters { 40 | val nSets = 512 41 | val nWrBypassEntries = 2 42 | 43 | def bimWrite(v: UInt, taken: Bool): UInt = { 44 | val oldBimSatTaken = v === 3.U 45 | val oldBimSatNtaken = v === 0.U 46 | Mux(oldBimSatTaken && taken, 3.U, 47 | Mux(oldBimSatNtaken && !taken, 0.U, 48 | Mux(taken, v + 1.U, v - 1.U))) 49 | } 50 | } 51 | 52 | trait HasBtbParameters extends HasBPUParameters { 53 | val nWays = 2 54 | // def tagSz = vaddrBits - log2Ceil(nSets) - log2Ceil(fetchBytes) 55 | val nSets = 64 56 | // val lowBitSz = 16 57 | 58 | def nIdxBits = log2Ceil(nSets) 59 | def getIdx(pc: UInt): UInt = pc(nIdxBits + log2Ceil(fetchBytes) - 1, log2Ceil(fetchBytes)) 60 | val tagView = 16 61 | def tagSz = tagView - nIdxBits - log2Ceil(fetchBytes) + 1 62 | def getTag(pc: UInt): UInt = pc(tagView , nIdxBits + log2Ceil(fetchBytes)) 63 | } 64 | 65 | trait HasLocalHistoryParameters extends HasBPUParameters { 66 | val localHistoryLength = 13 67 | val nLHRs = 64 68 | val nCounters = 8192 69 | val nLHRBits = log2Ceil(nLHRs) 70 | val nCounterBits = log2Ceil(nCounters) 71 | val nCacheCounters = 64 72 | val nCacheCounterBits = log2Ceil(nCacheCounters) 73 | 74 | def update(v: UInt, taken: Bool): UInt = { 75 | val extended = Cat(0.U(1.W), v) 76 | val newCnt = Mux(taken, extended + 1.U, extended - 1.U) 77 | Mux(newCnt(2), v, newCnt(1, 0)) 78 | } 79 | 80 | def idxHash(pc: UInt, hist: UInt): UInt = { 81 | hist 82 | } 83 | 84 | def cacheIdxHash(hist: UInt): UInt = { 85 | hist(nCacheCounterBits - 1, 0) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/scala/backend/decode/DecodeLogic.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import chisel3.util.experimental.decode._ 6 | 7 | object DecodeLogic 8 | { 9 | // TODO This should be a method on BitPat 10 | private def hasDontCare(bp: BitPat): Boolean = bp.mask.bitCount != bp.width 11 | // Pads BitPats that are safe to pad (no don't cares), errors otherwise 12 | private def padBP(bp: BitPat, width: Int): BitPat = { 13 | if (bp.width == width) bp 14 | else { 15 | require(!hasDontCare(bp), s"Cannot pad '$bp' to '$width' bits because it has don't cares") 16 | val diff = width - bp.width 17 | require(diff > 0, s"Cannot pad '$bp' to '$width' because it is already '${bp.width}' bits wide!") 18 | BitPat(0.U(diff.W)) ## bp 19 | } 20 | } 21 | 22 | def apply(addr: UInt, default: BitPat, mapping: Iterable[(BitPat, BitPat)]): UInt = 23 | chisel3.util.experimental.decode.decoder(QMCMinimizer, addr, TruthTable(mapping, default)) 24 | def apply(addr: UInt, default: Seq[BitPat], mappingIn: Iterable[(BitPat, Seq[BitPat])]): Seq[UInt] = { 25 | val nElts = default.size 26 | require(mappingIn.forall(_._2.size == nElts), 27 | s"All Seq[BitPat] must be of the same length, got $nElts vs. ${mappingIn.find(_._2.size != nElts).get}" 28 | ) 29 | //思路:将default转化为BitPat的形式,然后调用前面的apply函数来生成decoder 30 | val elementsGrouped = mappingIn.map(_._2).transpose 31 | val elementWidths = elementsGrouped.zip(default).map { case (elts, default) => 32 | (default :: elts.toList).map(_.getWidth).max //将default添加到列表的开头,最终得到一行元素中最大宽度,即每一个信号对应的宽度 33 | } 34 | val resultWidth = elementWidths.sum 35 | 36 | val elementIndices = elementWidths.scan(resultWidth - 1) { case (l, r) => l - r } 37 | 38 | // All BitPats that correspond to a given element in the result must have the same width in the 39 | // chisel3 decoder. We will zero pad any BitPats that are too small so long as they dont have 40 | // any don't cares. If there are don't cares, it is an error and the user needs to pad the 41 | // BitPat themselves 42 | val defaultsPadded = default.zip(elementWidths).map { case (bp, w) => padBP(bp, w) } 43 | val mappingInPadded = mappingIn.map { case (in, elts) => 44 | in -> elts.zip(elementWidths).map { case (bp, w) => padBP(bp, w) } 45 | } 46 | val decoded = apply(addr, defaultsPadded.reduce(_ ## _), mappingInPadded.map { case (in, out) => (in, out.reduce(_ ## _)) }) 47 | //返回译码结果 48 | elementIndices.zip(elementIndices.tail).map { case (msb, lsb) => decoded(msb, lsb + 1) }.toList 49 | } 50 | def apply(addr: UInt, default: Seq[BitPat], mappingIn: List[(UInt, Seq[BitPat])]): Seq[UInt] = 51 | apply(addr, default, mappingIn.map(m => (BitPat(m._1), m._2)).asInstanceOf[Iterable[(BitPat, Seq[BitPat])]]) 52 | def apply(addr: UInt, trues: Iterable[UInt], falses: Iterable[UInt]): Bool = 53 | apply(addr, BitPat.dontCare(1), trues.map(BitPat(_) -> BitPat("b1")) ++ falses.map(BitPat(_) -> BitPat("b0"))).asBool 54 | } 55 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/MMIOUnit.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import scala.collection.View.Fill 4 | 5 | import chisel3._ 6 | import chisel3.util._ 7 | 8 | import iFu.sma._ 9 | import iFu.axi3._ 10 | 11 | import iFu.common._ 12 | import iFu.common.Consts._ 13 | import iFu.lsu.utils._ 14 | 15 | class MMIOUnit extends Module with HasDcacheParameters { 16 | val io = IO(new CoreBundle{ 17 | val mmioReq = Flipped(Decoupled(new DCacheReq)) 18 | // mmioResp is a replay request 19 | val mmioResp = Decoupled(new DCacheReq) 20 | 21 | val smar = new SMAR 22 | val smaw = new SMAW 23 | }) 24 | if (!FPGAPlatform) dontTouch (io) 25 | 26 | val s_ready :: s_fetch :: s_wb :: s_resp :: Nil = Enum(4) 27 | val state = RegInit(s_ready) 28 | 29 | val mmioReq = RegInit(0.U.asTypeOf(new DCacheReq)) 30 | if (!FPGAPlatform) dontTouch(mmioReq) 31 | 32 | io.mmioReq.ready := state === s_ready 33 | 34 | io.mmioResp.valid := state === s_resp 35 | io.mmioResp.bits := mmioReq 36 | io.mmioResp.bits.data := loadDataGen( 37 | mmioReq.addr(1, 0), 38 | mmioReq.data, 39 | mmioReq.uop.mem_size, 40 | mmioReq.uop.mem_signed 41 | ) 42 | 43 | io.smar.req.arvalid := state === s_fetch 44 | io.smaw.req.awvalid := state === s_wb 45 | io.smaw.req.wvalid := state === s_wb 46 | 47 | io.smaw.req.wstrb := mmioReq.mask 48 | 49 | io.smar.req.arlen := AXI3Parameters.MLEN1 50 | io.smaw.req.awlen := AXI3Parameters.MLEN1 51 | 52 | io.smar.req.arburst := AXI3Parameters.BURST_FIXED 53 | io.smaw.req.awburst := AXI3Parameters.BURST_FIXED 54 | 55 | io.smar.req.arsize := mmioReq.uop.mem_size 56 | io.smaw.req.awsize := mmioReq.uop.mem_size 57 | 58 | io.smar.req.araddr := mmioReq.addr 59 | io.smaw.req.awaddr := mmioReq.addr 60 | 61 | io.smaw.req.wdata := 62 | Mux(mmioReq.uop.mem_size === 0.U, Fill(4, mmioReq.data( 7, 0)), 63 | Mux(mmioReq.uop.mem_size === 1.U, Fill(2, mmioReq.data(15, 0)), 64 | mmioReq.data)) 65 | 66 | io.smaw.req.wlast := state === s_wb 67 | 68 | when (state === s_ready) { 69 | when (io.mmioReq.fire) { 70 | state := Mux(isStore(io.mmioReq.bits), s_wb, s_fetch) 71 | mmioReq := io.mmioReq.bits 72 | } 73 | } .elsewhen (state === s_fetch) { 74 | state := Mux(io.smar.resp.rvalid, s_resp, s_fetch) 75 | mmioReq.data := io.smar.resp.rdata 76 | } .elsewhen (state === s_wb){ 77 | if(!FPGAPlatform){ 78 | val debug_lo_byte = mmioReq.data(7, 0) 79 | val debug_lo_half = mmioReq.data(15, 0) 80 | dontTouch(debug_lo_byte) 81 | dontTouch(debug_lo_half) 82 | } 83 | 84 | 85 | state := Mux(io.smaw.resp.wready, s_resp, s_wb) 86 | } .elsewhen(state === s_resp) { 87 | state := Mux(io.mmioResp.fire, s_ready, s_resp) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/scala/backend/decode/DecodeStage.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | 8 | import iFu.frontend.FetchBufferResp 9 | 10 | class DecodeStageIO extends CoreBundle { 11 | val enq = Flipped(Decoupled(new FetchBufferResp)) 12 | val deq = Vec(coreWidth, Valid(new MicroOp)) 13 | val intrpt = Input(Bool()) 14 | val flush = Input(Bool()) 15 | val clear = Input(Bool()) 16 | val rollback = Input(Bool()) 17 | val dis_ready = Input(Bool()) 18 | val b1_mispred = Input(Bool()) 19 | val br_update = Input(new BrUpdateInfo) 20 | val xcpt_ftqIdx = Decoupled(UInt(log2Ceil(frontendParams.numFTQEntries).W)) 21 | } 22 | 23 | class DecodeStage extends CoreModule { 24 | val io = IO(new DecodeStageIO) 25 | 26 | private val finished = RegInit(0.U(coreWidth.W)) 27 | 28 | private val valids = Wire(Vec(coreWidth, Bool())) 29 | private val uops = Wire(Vec(coreWidth, new MicroOp)) 30 | for (w <- 0 until coreWidth) { 31 | val dec_unit = Module(new DecodeUnit) 32 | dec_unit.io.enq.uop := io.enq.bits.uops(w).bits 33 | dec_unit.io.interrupt := io.intrpt 34 | 35 | valids(w) := io.enq.valid && io.enq.bits.uops(w).valid && !finished(w) 36 | uops(w) := dec_unit.io.deq.uop 37 | } 38 | 39 | private val brmask_unit = Module(new BrMaskUnit) 40 | brmask_unit.io.flush := io.flush 41 | brmask_unit.io.br_update := io.br_update 42 | 43 | private val deq_valid = Wire(Vec(coreWidth, Bool())) 44 | for (w <- 0 until coreWidth) { 45 | brmask_unit.io.is_branch(w) := uops(w).allocate_brtag && !finished(w) 46 | brmask_unit.io.will_fire(w) := uops(w).allocate_brtag && deq_valid(w) 47 | 48 | uops(w).brTag := brmask_unit.io.br_tag(w) 49 | uops(w).brMask := brmask_unit.io.br_mask(w) 50 | } 51 | 52 | private val dec_xcpts = uops zip valids map { case (u, v) => u.xcpt_valid && v } 53 | private val xcpt_idx = PriorityEncoder(dec_xcpts) 54 | io.xcpt_ftqIdx.valid := dec_xcpts.reduce(_||_) 55 | io.xcpt_ftqIdx.bits := uops(xcpt_idx).ftqIdx 56 | 57 | private val brmask_full = brmask_unit.io.is_full 58 | private val xcpt_stall = dec_xcpts.reduce(_||_) && !io.xcpt_ftqIdx.ready 59 | private val hazards = (0 until coreWidth).map(w => 60 | valids(w) && ( 61 | !io.dis_ready || 62 | io.rollback || 63 | brmask_full(w) || 64 | xcpt_stall || 65 | io.b1_mispred || 66 | io.br_update.b2.mispredict || 67 | io.clear 68 | ) 69 | ) 70 | private val stalls = hazards.scanLeft(false.B)((s, h) => s || h).takeRight(coreWidth) 71 | deq_valid := (0 until coreWidth).map { w => valids(w) && !stalls(w) } 72 | 73 | io.enq.ready := deq_valid.last 74 | (deq_valid zip uops).zipWithIndex foreach { case((v, u), i) => 75 | io.deq(i).valid := v 76 | io.deq(i).bits := u 77 | } 78 | 79 | when (deq_valid.last || io.clear) { 80 | finished := 0.U 81 | } .otherwise { 82 | finished := deq_valid.asUInt | finished 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/backend/rename/FreeList.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | import iFu.util._ 7 | 8 | class FreeList extends CoreModule { 9 | val pregSize = log2Ceil(numPRegs) 10 | 11 | val io = IO(new Bundle { 12 | val reqs = Input(Vec(coreWidth, Bool())) 13 | val alloc_pregs = Vec(coreWidth, Valid(UInt(pregSize.W))) 14 | 15 | // from ROB 16 | val dealloc_pregs = Input(Vec(coreWidth, Valid(UInt(pregSize.W)))) 17 | 18 | val ren_br_tags = Input(Vec(coreWidth, Valid(UInt(brTagSz.W)))) 19 | 20 | val brupdate = Input(new BrUpdateInfo) 21 | }) 22 | 23 | // all free except x0 24 | val freeList = RegInit(UInt(numPRegs.W), (~1.U(numPRegs.W)).asUInt) 25 | val allocsAfterBr = Reg(Vec(maxBrCount, UInt(numPRegs.W))) 26 | 27 | // allocate 28 | val selPregs = Wire(Vec(coreWidth, UInt(numPRegs.W))) 29 | val selPregsValid = VecInit(selPregs.map(_.orR)) 30 | var mask = freeList 31 | for (i <- 0 until coreWidth) { 32 | selPregs(i) := PriorityEncoderOH(mask) 33 | mask = mask & (~selPregs(i)).asUInt 34 | } 35 | 36 | // maintain stored allocated regs 37 | val regValids = Seq.fill(coreWidth) {RegInit(false.B)} 38 | val regIndices = Seq.fill(coreWidth) {Reg(UInt(pregSize.W))} 39 | // if selected reg is valid, always valid, otherwise may be used by req 40 | regValids zip selPregsValid zip io.reqs foreach { case ((regValid, selValid), req) => 41 | regValid := selValid || (regValid && !req) 42 | } 43 | // whether should fill in new regs 44 | val selPregFire = VecInit(selPregsValid zip regValids zip io.reqs map { case ((selValid, regValid), req) => 45 | (!regValid || req) && selValid 46 | }) 47 | regIndices zip selPregs zip selPregFire map { case ((regNum, selPreg), fire) => 48 | when (fire) { 49 | regNum := OHToUInt(selPreg) 50 | } 51 | } 52 | 53 | io.alloc_pregs zip regValids zip regIndices foreach { case ((io, valid), num) => 54 | io.valid := valid 55 | io.bits := num 56 | } 57 | 58 | val allocOHs = regIndices map {UIntToOH(_)} 59 | val allocMasks = (allocOHs zip io.reqs).scanRight(0.U(numPRegs.W)) { case ((alloc, req), mask) => 60 | mask | Mux(req, alloc, 0.U) 61 | } 62 | 63 | val selMask = ((selPregs zip selPregFire) map { case (reg, fire) => Mux(fire, reg, 0.U) }).reduce(_|_) 64 | // free pregs in mispredicted branch 65 | val brDeallocs = Mux(io.brupdate.b2.mispredict, allocsAfterBr(io.brupdate.b2.uop.brTag), 0.U) 66 | // pregs to free from rob 67 | val deallocMask = 68 | (io.dealloc_pregs map { de => Mux(de.valid, UIntToOH(de.bits)(numPRegs - 1, 0), 0.U) }).reduce(_|_) | brDeallocs 69 | 70 | val brTagValids = VecInit(io.ren_br_tags map (_.valid)).asUInt 71 | 72 | for (i <- 0 until maxBrCount) { 73 | val updateList = VecInit(io.ren_br_tags.map(_.bits === i.U)).asUInt & brTagValids 74 | allocsAfterBr(i) := Mux( 75 | updateList.orR, 76 | Mux1H(updateList, allocMasks.slice(1, coreWidth + 1)), 77 | (allocsAfterBr(i) & (~brDeallocs).asUInt) | allocMasks.head 78 | ) 79 | } 80 | 81 | freeList := (freeList & (~selMask).asUInt | deallocMask) & (~1.U(numPRegs.W)).asUInt 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/main/scala/backend/branch/BranchUnit.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.util._ 8 | 9 | case class BRUType ( 10 | normal: Boolean, 11 | to_exu: Boolean, 12 | to_lsu: Boolean 13 | ) 14 | 15 | class BRUIO(t: BRUType, brinfoWidth: Int) extends CoreBundle { 16 | val br_infos = Input(Vec(brinfoWidth, new BrResolutionInfo)) 17 | val rob_flush = Input(Bool()) 18 | val rob_head = if (t.normal || t.to_lsu) Input(UInt(robParameters.robAddrSz.W)) else null 19 | val jalr_tgt = if (t.normal) Input(UInt(vaddrBits.W)) else null 20 | 21 | val br_s1_mispredict = if (t.normal) Output(Bool()) else null 22 | val mis_br_ftqIdx = if (t.normal) Output(UInt(log2Ceil(frontendParams.numFTQEntries).W)) else null 23 | val br_update = Output(new BrUpdateInfo) 24 | } 25 | 26 | class BranchUnit(t: BRUType, brinfoWidth: Int) extends CoreModule { 27 | val io = IO(new BRUIO(t, brinfoWidth)) 28 | 29 | // ----------------------------------------------------------------------- 30 | // clear branch mask and kill instructions 31 | val br_s1 = Reg(new BrUpdateMasks) 32 | br_s1.resolveMask := io.br_infos.map { i => 33 | (i.valid << i.uop.brTag).asUInt 34 | }.reduce(_ | _) 35 | br_s1.mispredictMask := io.br_infos.map { i => 36 | ((i.valid && !io.rob_flush && i.mispredict) << i.uop.brTag).asUInt 37 | }.reduce(_ | _) 38 | 39 | if (t.normal) { 40 | io.br_s1_mispredict := RegNext( 41 | io.br_infos.map { i => 42 | i.valid && !io.rob_flush && i.mispredict 43 | }.reduce(_ || _) 44 | ) 45 | } 46 | io.br_update.b1 := br_s1 47 | // ----------------------------------------------------------------------- 48 | if (t.normal || t.to_lsu) { 49 | // store branch resolution info for redirect 50 | val br_infos = Reg(Vec(coreWidth, new BrResolutionInfo)) 51 | br_infos zip io.br_infos map { case (r, i) => { 52 | r := i 53 | r.valid := i.valid && !io.rob_flush 54 | }} 55 | 56 | val br_s2 = Reg(new BrResolutionInfo) 57 | // find the oldest mispredicted branch 58 | var mis_found = false.B 59 | var oldest_mis_br = br_infos(0) 60 | for (br <- br_infos) { 61 | val mis = br.valid && br.mispredict 62 | val prio = ( 63 | !mis_found || 64 | mis && IsOlder(br.uop.robIdx, oldest_mis_br.uop.robIdx, io.rob_head) 65 | ) 66 | mis_found = mis_found || mis 67 | oldest_mis_br = Mux(prio, br, oldest_mis_br) 68 | } 69 | br_s2.mispredict := mis_found 70 | br_s2.cfiType := (if (t.normal) oldest_mis_br.cfiType else DontCare) 71 | br_s2.taken := (if (t.normal) oldest_mis_br.taken else DontCare) 72 | br_s2.pcSel := (if (t.normal) oldest_mis_br.pcSel else DontCare) 73 | br_s2.uop := UpdateBrMask(io.br_update, oldest_mis_br.uop) 74 | br_s2.jalrTarget := (if (t.normal) RegNext(io.jalr_tgt) else DontCare) 75 | br_s2.targetOffset := (if (t.normal) oldest_mis_br.targetOffset else DontCare) 76 | 77 | if (t.normal) { 78 | io.mis_br_ftqIdx := oldest_mis_br.uop.ftqIdx 79 | } 80 | io.br_update.b2 := br_s2 81 | } else { 82 | io.br_update.b2 := DontCare 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/backend/register/Regfile.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import scala.collection.mutable.ArrayBuffer 7 | 8 | import iFu.common._ 9 | 10 | class RegisterFileReadPortIO ( 11 | val addrWidth: Int, val dataWidth: Int 12 | ) extends CoreBundle { 13 | val addr = Input(UInt(addrWidth.W)) 14 | val data = Output(UInt(dataWidth.W)) 15 | } 16 | 17 | class RegisterFileWritePort ( 18 | val addrWidth: Int, val dataWidth: Int 19 | ) extends CoreBundle { 20 | val addr = UInt(addrWidth.W) 21 | val data = UInt(dataWidth.W) 22 | } 23 | 24 | object WritePort { 25 | def apply( 26 | enq: DecoupledIO[ExeUnitResp], addrWidth: Int, dataWidth: Int, rtype: UInt 27 | ): Valid[RegisterFileWritePort] = { 28 | val wport = Wire(Valid(new RegisterFileWritePort(addrWidth, dataWidth))) 29 | 30 | enq.ready := true.B 31 | wport.valid := enq.valid && enq.bits.uop.dst_rtype === rtype 32 | wport.bits.addr := enq.bits.uop.pdst 33 | wport.bits.data := enq.bits.data 34 | wport 35 | } 36 | } 37 | 38 | abstract class RegisterFile ( 39 | numRegisters: Int, 40 | numReadPorts: Int, 41 | numWritePorts: Int, 42 | registerWidth: Int, 43 | bypassableArray: Seq[Boolean] 44 | ) extends CoreModule { 45 | val io = IO(new CoreBundle{ 46 | val read_ports = Vec( 47 | numReadPorts, new RegisterFileReadPortIO(pregSz, registerWidth) 48 | ) 49 | val write_ports = Flipped(Vec( 50 | numWritePorts, Valid(new RegisterFileWritePort(pregSz, registerWidth)) 51 | )) 52 | }) 53 | } 54 | 55 | class RegisterFileSynthesizable ( 56 | numRegisters : Int, 57 | numReadPorts : Int, 58 | numWritePorts: Int, 59 | registerWidth: Int, 60 | bypassableArray: Seq[Boolean] 61 | ) extends RegisterFile( 62 | numRegisters, 63 | numReadPorts, 64 | numWritePorts, 65 | registerWidth, 66 | bypassableArray 67 | ) { 68 | val regfile = Mem(numRegisters,UInt(registerWidth.W)) 69 | 70 | val read_addrs = io.read_ports.map(p => RegNext(p.addr)) // delay 1 cycle 71 | val read_data = Wire(Vec(numReadPorts,UInt(registerWidth.W))) 72 | 73 | for (i <- 0 until numReadPorts) { 74 | read_data(i) := regfile(read_addrs(i)) 75 | } 76 | 77 | if (bypassableArray.reduce(_||_)) { 78 | val bypassableWports = ArrayBuffer[Valid[RegisterFileWritePort]]() 79 | io.write_ports zip bypassableArray map { 80 | case (wport,b) => if (b) { bypassableWports += wport } 81 | } 82 | 83 | for ( i <- 0 until numReadPorts){ 84 | val bypassEns = bypassableWports.map( 85 | x => x.valid && x.bits.addr === read_addrs(i) 86 | ) 87 | val bypassData = Mux1H( 88 | VecInit(bypassEns.toSeq), 89 | VecInit(bypassableWports.map(_.bits.data).toSeq) 90 | ) 91 | 92 | io.read_ports(i).data := Mux(bypassEns.reduce(_|_), bypassData, read_data(i)) 93 | } 94 | } else { 95 | for (i <- 0 until numReadPorts) { 96 | io.read_ports(i).data := read_data(i) 97 | } 98 | } 99 | 100 | for (wport <- io.write_ports) { 101 | when (wport.valid) { 102 | regfile(wport.bits.addr) := wport.bits.data 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /doc/lsu_learn.md: -------------------------------------------------------------------------------- 1 | # LSU 异常处理机制初探 2 | 3 | ## 1. Introduction 4 | 本文将从 LSU 对外表现的异常情况(也就是`io.lsu_xcpt`)出发,阐述 LSU 处理访存阶段发生的异常的方法。 5 | 首先,我们先介绍一些基础背景: 6 | - LSU 所处理的异常,只会有两种来源:一是 LSU 自身乱序行为带来的写后读异常(此时称为`MINI_EXCEPTION_MEM_ORDERING`),而是 TLB 在进行地址翻译时检测到的异常。 7 | 8 | ## 2. 相关变量溯源 9 | 10 | ### 2.1 Part 1 11 | 1. 首先,我们从最顶层的`io.lsu_xcpt`开始,其直接来源于一个名为`r_xcpt`的寄存器(当然,还有一个名为`r_xcpt_valid`的寄存器,用于标记`r_xcpt`是否有效)。但是,在对外传递异常时,`还需要额外检测当前 ROB 是否正在处理另外的异常,以及发生异常的指令有没有被分支杀掉。 12 | 2. 接下来,我们先看`r_xcpt_valid`,我们观察其代码: 13 | ```scala 14 | r_xcpt_valid := 15 | (ld_xcpt_valid || tlb_xcpt_valid) && 16 | !io.core.exception && 17 | !IsKilledByBranch(io.core.brupdate, xcpt_uop) 18 | ``` 19 | 我们逐项进行分析解释: 20 | 1. 第一部分表示是否有异常发生,而二者正是我们上文提到的两种异常来源。 21 | 2. 第二部分检测当前 ROB 是否正在处理另外的异常,注意,这和上面的检查并不重复,因为`r_xcpt_valid`是一个寄存器,因此此处的检查相较于上文中的检查,提前了一个周期。 22 | 3. 检查发生异常的指令有没有被分支杀掉,同样的,这个检查也是提前了一个周期。 23 | 3. 在看完`r_xcpt_valid`之后,不妨先点到为止,把目光转向`r_xcpt`。同样的,我们直接给出其代码: 24 | ```scala 25 | r_xcpt.uop := xcpt_uop 26 | r_xcpt.uop.brMask := GetNewBrMask(io.core.brupdate, xcpt_uop) 27 | r_xcpt.cause := Mux(use_tlb_xcpt, tlb_xcpt_cause, MINI_EXCEPTION_MEM_ORDERING) 28 | r_xcpt.badvaddr := tlb_xcpt_vaddr 29 | ``` 30 | 1. 首先需要明确的是,`r_xcpt`会直接存储发生异常的指令,以及异常的原因,对应的地址信息。 31 | 2. 异常指令直接来自于`xcpt_uop`,但是需要在当前周期更新其分支掩码。 32 | 3. 根据`use_tlb_xcpt`的值,来选择异常的原因。 33 | 4. 截至目前,我们分析了`r_xcpt_valid`和`r_xcpt`,也即依赖深度为 1 的变量。二者都是作为寄存器存在,因此,我们可以知道,检测到异常后,并非立刻抛出,而是等到下一个周期再抛出。这也符合我们通常的处理方式。 34 | 35 | ### 2.2 Part 2 36 | 在第一部分中,我们实际上留下了不少坑,例如,`ld_xcpt_valid`和`tlb_xcpt_valid`的来源是什么,在哪个阶段产生?`xcpt_uop`、`use_tlb_xcpt`、`tlb_xcpt_cause`、`tlb_xcpt_vaddr`这类变量的来源又是什么? 37 | 因此,在这一部分,我们开始分析这些依赖深度为 2 的变量。 38 | 1. 首先,我们先看`ld_xcpt_valid`,其代码如下: 39 | ```scala 40 | val ld_xcpt_valid = failed_loads.reduce(_|_) 41 | ``` 42 | 从这里可以看出,`ld_xcpt_valid`检测所有`load entry`,只要存在访存违例的情况,就会被置为`true`。 43 | 2. 接下来,我们看`tlb_xcpt_valid`,其代码如下: 44 | ```scala 45 | val tlb_xcpt_valids = RegNext(widthMap(w => 46 | exe_tlb_valid(w) && 47 | dtlb.io.resp(w).exception.valid && 48 | !io.core.exception && 49 | !IsKilledByBranch(io.core.brupdate, exe_tlb_uop(w)) 50 | )) 51 | tlb_xcpt_valid := tlb_xcpt_valids.reduce(_||_) 52 | ``` 53 | 1. `exe_tlb_valid(w)`表示当前周期是否发送了一条 TLB 请求。请求都没有,何谈异常? 54 | 2. `dtlb.io.resp(w).exception.valid`表示 DTLB 是否返回了一个异常。 55 | 3. 之后的两项检查和之前的类似,不再赘述。 56 | 4. 最后,`tlb_xcpt_valid`汇总当前周期所有的异常情况。 57 | 3. 先讨论`use_tlb_xcpt`,代码如下: 58 | ```scala 59 | val use_tlb_xcpt = 60 | ( 61 | tlb_xcpt_valid && IsOlder(tlb_xcpt_uop.robIdx, ld_xcpt_uop.robIdx, io.core.rob_head_idx) 62 | ) || 63 | !ld_xcpt_valid 64 | ``` 65 | 1. 如果当前没有访存违例,那么直接使用 TLB 的异常(可能此时 TLB 也没有异常,但是无妨,因为相关的`valid`位会被置为`false`)。 66 | 2. 如果二者同时存在,会选取更早的那条指令。这是通过比较其在 ROB 中的位置来实现的(ROB 中的指令是顺序排列的)。 67 | 4. 然后是`xcpt_uop`,同样找到其代码: 68 | ```scala 69 | val xcpt_uop = Mux(use_tlb_xcpt, tlb_xcpt_uop, ld_xcpt_uop) 70 | ``` 71 | 在以及有了`use_tlb_xcpt`的情况下,便可以通过一个`Mux`选择异常指令。 72 | 73 | ### 2.3 Part 3 74 | 同样的,在上面的分析中,新引入的坑有哪些呢?`failed_loads`、`tlb_xcpt_uop`、`ld_xcpt_uop`。其余的内容都比较明显,不过多的赘述。 75 | 1. `failed_loads`,根据存储的信息,检查每个槽位的`load`指令是否有访存违例。检查的过程在这里暂时不做展开。 76 | 2. `tlb_xcpt_uop`,记录s0 阶段发起 TLB 请求的指令,依据 TLB 的返回结果,取出发生异常的指令。 77 | 3. `ld_xcpt_uop`,依据检查的结果,直接取出最早的一条发生访存违例的指令(这里检测到后会马上抛出异常,除非其被更前面的异常抑制,那么此时,会发生回滚,也就会取消掉这条指令,访存违例也就不会发生,所以就算没有被处理,也没有什么问题)。 78 | 79 | ## 3. 总结 80 | 经过上述的分析,我们可以得到如下的一个大致的异常处理流程: 81 | 1. s0 阶段:将指令的地址送到 TLB 进行地址翻译 82 | 2. s1 阶段:处理 TLB 的返回结果,以及查看访存违例的情况(实际上每周期都会检查,为了便于理解,我们将其放在这里) 83 | 3. s2 阶段:根据检测到的异常情况,将异常信息送往上层处理 84 | -------------------------------------------------------------------------------- /doc/issue.md: -------------------------------------------------------------------------------- 1 | # iFu-IssueStage 2 | 3 | [toc] 4 | 5 | ## 架构设计 6 | 7 | ### 基类 -- `AbsIssueUnit` 8 | 9 | #### IO 接口 10 | - `disUop` [input] 从dispatch阶段发送过来的`uop`。 11 | - `wakeupPorts` [input] 从外界传入的`wakeup`端口。 12 | - `specLdWakeupPorts` [input] load指令推测唤醒端口。 13 | - `ldMiss` [input] load指令发生miss,不能及时进行数据前递。该信号未指定具体发生miss的指令,此处我们简答处理,视为所有load指令均发生miss。 14 | - `fuTypes` [input] 下一阶段(执行阶段)能接收哪些类型的指令,因为不同的功能单元能执行的功能不完全相同。 15 | - `brUpdate` [input] 分支预测更新。 16 | - `flushPipeline` [input] 清空流水线。 17 | - `issueValids` [output] 发射出去的指令的有效位。 18 | - `issueUops` [output] 发射出去的指令。 19 | 20 | #### 内部元件 21 | - `numIssueSlots`个`IssueSlot`组成的`slots`。 22 | 作为顶层,`AbsIssueUnit`会将唤醒信息、分支更新信息、flush信息等传递给`slots`。 23 | 24 | #### 内部逻辑 25 | 1. [组合逻辑] `dispatch`阶段发来的`uop`并不带有`issue window state`,此时需要计算`iwState`。一般都是`s_valid_1`,但是对于写内存指令(store指令等)需要被标记为`s_valid_2`(因为其可能会分为两个阶段被发射:地址计算和数据计算)。 26 | 2. [组合逻辑] `iw_p1_poisoned`和`iw_p2_poisoned`被默认置为`false`。 27 | 3. [组合逻辑] 将发过来的`uop`连同上述信号统一放到`disUops`中。 28 | 29 | ### `IssueSlot` 30 | 31 | #### IO 接口 32 | - `valid` [output] 该槽位中的指令是否有效。 33 | - `willBeValid` [output] 当前周期的指令虽然被选中发射了,但是由于store指令分两次发射、load推测唤醒错误等情况,导致未能发射出去。 34 | - `request` [output] 数据已就绪,请求发射。 35 | - `grant` [input] 该槽位被选中,当前周期发射。 36 | - `brUpdate` [input] 分支预测更新。 37 | - `kill` [input] 清空该槽位,直接来自`flushPipeline`。 38 | - `clear` [input] 清空该槽位。该槽位指令被移出,且当前周期没有指令移入。 39 | - `ldSpecMiss` [input] load指令发生miss。直接来自`ldMiss`。 40 | - `wakeupPorts` [input] wakeup端口。 41 | - `specLdWakeupPorts` [input] load指令推测唤醒端口。 42 | - `inUop` [input] 当前周期输入的`uop`。 43 | - `outUop` [output] 从当前槽位输出的`uop`。用于构成压缩队列。 44 | - `uop` [output] 被发射出去的`uop`。 45 | 46 | #### 内部元件 47 | - `state` 记录当前槽位存储的指令的状态。 48 | - `slot_uop` 槽中存储的指令。 49 | - `p1` 指令的第一个操作数是否就绪。 50 | `p1`与`slot_uop`是处于不同时序的。具体来说,对于同一条指令,当指令位于`slot_uop`中时,`p1`表达的是该指令的第一个操作数在上一个周期时(包括上一个周期)是否就绪。也就是说,指令会经历至少一周期的唤醒(哪即使数据已经就位),才会被发射。 51 | - `p2` 指令的第二个操作数是否就绪。 52 | - `p1_poisoned` 第一个操作数就绪的来源是load指令的推测唤醒。 53 | - `p2_poisoned` 第二个操作数就绪的来源是load指令的推测唤醒。 54 | 55 | #### 内部逻辑 56 | 1. [组合逻辑] 会依据当前的`state`和`slot_uop`以及传入的信息计算出`next_state`和`next_uop`。 57 | 1. `next_state`主要是会受到`brUpdate`、`kill`、`clear`、指令发射这几类情况的影响。 58 | 2. `next_uop`同样也是会跟着更新。另外,`next_uop`会将`p1`等信息记录到`uop`中。 59 | 2. [组合逻辑] 根据`in_uop`和传入的`wakeup`信息,更新`p1`和`p2`。 60 | 1. 这也就是`p1`等信号早一周期的原因。 61 | 3. [组合逻辑] 根据`p1`等信号,以及`state`,决定是否发射。 62 | 1. 发射时可能会修改`uopcode`和`lrs1_type1`等信息。(主要是因为store可能会分两次发射) 63 | 4. [组合逻辑] 会计算当前槽位下一周期是否还会有效。 64 | 1. 有没有新的有效指令进来。 65 | 2. 当前的指令是否被发射出去。 66 | 5. [时序逻辑] 由于是一个压缩队列,所以数据来源有两个,一是`in_uop`,二是上一个周期的`next_uop`。因此这里会有一个环。 67 | 68 | ### 具体实现 -- `IssueUnitAgeOrdered` 69 | - 继承自`AbsIssueUnit`,并采用了`AgeOrder`的发射策略。底层的实现方法是将`IssueSlot`组织成一个压缩队列。每周期至多前移4个槽位。 70 | 71 | #### 内部逻辑 72 | - 主要分为两块:压缩队列逻辑和发射逻辑。 73 | 74 | ##### 压缩队列的实现 75 | - 压缩通过计算移位的偏移来实现。其中,偏移量使用one hot的格式来表示。通过两层`for`循环(外层遍历所有槽位,内层遍历所有偏移量)来做压缩。 76 | - `maxShift`为`dispatchWidth`,这里做shift最主要的目的是要接收被派遣过来的几条指令,因此只需要至多移位`dispatchWidth`。这样可以避免出现过大的组合逻辑电路。 77 | - `vacnats`取的是每个槽位的`valid`信号的反,即表示该槽位是否为空,再在后面拼接上新派遣过来的指令的`valid`信号的反。 78 | - `shamtOH`表示的是每个槽位需要往前移动的位数,用one hot的形式表示。 79 | - `getShamtOH`的逻辑是:当前槽的移位量至少等于前一个槽位,如果前一个槽位是空的,则比前一个槽位多移一个位置。但是,如果前一个槽位的移位量已经是`maxShift`,则不再多移动。另外,移位量从`0`到`1`需要显式的检查,后续的移位量则可以通过左移一位来实现。 80 | - 移位是通过把后面的槽位的`outUop`赋值给前面的槽位的`inUop`来实现的。在移位时,遍历所有的槽位,然后遍历这个槽位往后的`maxShift`个槽位,检查其`shamtOH`是否等于`1 << (j - 1)`,如果是,即表示其后的第`j`个槽位需要移动到当前槽位中。 81 | - 最后,如果发生了移位,需要`clear`掉发生移位的槽位。不然就会有重复的指令。 82 | - 存在反压现象,当队列不足以接收所有`dispatch_uop`时,会通过`ready`信号,实现只接受一部分指令,并与上一阶段完成消息的传递。此处计数用的是直接对空槽位进行`popcount`。 83 | 84 | ##### 选择发射的指令 85 | - 使用两层`for`循环实现,此处会用到`chisel`中的变量。 86 | - 外层循环遍历所有槽位,内层循环遍历所有功能单元。 87 | - 如果指令和执行单元的功能标志位匹配,则称之为可分配。 88 | - 如果可分配、改槽位发起了发射请求、该槽位的指令此前没有被发射过、该功能单元此前没有接受过指令,则发射该指令。 89 | 90 | ## 一些实现细节 91 | - 被发射出去的`uop`并没有使用`brUpdate`来更新,在后续的`rrd`阶段会进行更新。(具体来说是`rrd-1`阶段) 92 | 93 | ## 最后修改日期 94 | - 2024/04/17 95 | - 2023/07/01 96 | -------------------------------------------------------------------------------- /src/main/scala/frontend/bpu/Bim.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.util._ 7 | import iFu.frontend.FrontendUtils._ 8 | 9 | class BIMPredictMeta extends Bundle with HasBimParameters { 10 | val bim = UInt(2.W) 11 | } 12 | 13 | class BIMIO extends Bundle with HasBimParameters { 14 | val s0valid = Input(Bool()) 15 | val s0pc = Input(UInt(vaddrBits.W)) 16 | 17 | val s2taken = Output(Vec(fetchWidth, Bool())) 18 | 19 | val s3meta = Output(Vec(fetchWidth, new BIMPredictMeta)) 20 | 21 | val s1update = Input(Valid(new BranchPredictionUpdate)) 22 | } 23 | 24 | class BimPredictor extends Module with HasBimParameters { 25 | val io = IO(new BIMIO) 26 | 27 | val bim_ram = SyncReadMem(nSets, Vec(fetchWidth, UInt(2.W))) 28 | 29 | // --------------------------------------------- 30 | // Reset Logic 31 | val reset_en = RegInit(false.B) 32 | val reset_idx = RegInit(0.U(log2Ceil(nSets).W)) 33 | when (reset_en) { 34 | reset_idx := reset_idx + 1.U 35 | } 36 | when (reset_idx === (nSets - 1).U) { 37 | reset_en := false.B 38 | } 39 | // --------------------------------------------- 40 | 41 | // --------------------------------------------- 42 | // Predict Logic 43 | val s0_valid = io.s0valid 44 | val s0_idx = fetchIdx(io.s0pc) 45 | 46 | val s2_bim = RegNext(VecInit( 47 | bim_ram.read(s0_idx.asUInt, s0_valid).map(_.asTypeOf(UInt(2.W))) 48 | )) 49 | 50 | // val s2_valid = RegNext(RegNext(io.s0valid)) 51 | 52 | for (w <- 0 until fetchWidth) { 53 | // val resp_valid = !reset_en && s2_valid 54 | // io.s2taken(w) := resp_valid && s2_bim(w)(1) 55 | io.s2taken(w) := s2_bim(w)(1) 56 | } 57 | // --------------------------------------------- 58 | 59 | // --------------------------------------------- 60 | // Prepare Meta for Update 61 | val s2_meta = Wire(Vec(fetchWidth, new BIMPredictMeta)) 62 | for (w <- 0 until fetchWidth) { 63 | s2_meta(w).bim := s2_bim(w) 64 | } 65 | io.s3meta := RegNext(s2_meta) 66 | // --------------------------------------------- 67 | 68 | // --------------------------------------------- 69 | // Update Logic 70 | val s1_update = io.s1update 71 | val s1_update_idx = fetchIdx(io.s1update.bits.pc) 72 | val s1_update_meta = VecInit(s1_update.bits.meta.map(_.bimMeta)) 73 | 74 | val s1_update_mask = Wire(Vec(fetchWidth,Bool())) 75 | val s1_update_data = Wire(Vec(fetchWidth, UInt(2.W))) 76 | 77 | for (w <- 0 until fetchWidth) { 78 | s1_update_mask(w) := false.B 79 | s1_update_data(w) := s1_update_meta(w).asUInt 80 | 81 | when (s1_update.valid && (s1_update.bits.brMask(w) ||(s1_update.bits.cfiIdx.valid && s1_update.bits.cfiIdx.bits === w.U))) { 82 | val was_taken = ( 83 | (s1_update.bits.cfiIdx.valid) && 84 | (s1_update.bits.cfiIdx.bits === w.U) && 85 | ( 86 | (s1_update.bits.cfiIsBr && s1_update.bits.brMask(w) && s1_update.bits.cfiTaken) || 87 | s1_update.bits.cfiIsJal 88 | ) 89 | ) 90 | val old_bim = s1_update_meta(w).asUInt 91 | 92 | s1_update_mask(w) := true.B 93 | s1_update_data(w) := bimWrite(old_bim, was_taken) 94 | } 95 | } 96 | 97 | bim_ram.write( 98 | Mux(reset_en, reset_idx, s1_update_idx), 99 | Mux(reset_en, VecInit(Seq.fill(fetchWidth){ 2.U(2.W) }), s1_update_data), 100 | Mux(reset_en, (~(0.U(fetchWidth.W))), s1_update_mask.asUInt).asBools 101 | ) 102 | 103 | // --------------------------------------------- 104 | } 105 | -------------------------------------------------------------------------------- /src/test/scala/MultTester.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import scala.util.Random 4 | 5 | import chisel3._ 6 | import chiseltest._ 7 | import org.scalatest.flatspec.AnyFlatSpec 8 | 9 | trait MultTestFunc { 10 | val FN_MUL = 3 11 | val FN_MULH = 1 12 | val FN_MULHU= 0 13 | val funcs = Array(FN_MUL, FN_MULH, FN_MULHU) 14 | 15 | def mult(fn: Int, op1: Int, op2: Int): Int = { 16 | fn match { 17 | case FN_MUL => op1 * op2 18 | case FN_MULH => ((op1.toLong * op2.toLong) >> 32).toInt 19 | case FN_MULHU => (((op1 & 0xFFFFFFFFL) * (op2 & 0xFFFFFFFFL)) >> 32).toInt 20 | } 21 | } 22 | 23 | def testOne(dut: MultStar, fn: Int, op1: Int, op2: Int): Unit = { 24 | val refResult = mult(fn, op1, op2) 25 | dut.io.req.valid.poke(true.B) 26 | dut.io.req.bits.fn.poke(fn.U) 27 | dut.io.req.bits.op1.poke(BigInt(op1 & 0xFFFFFFFFL).U) 28 | dut.io.req.bits.op2.poke(BigInt(op2 & 0xFFFFFFFFL).U) 29 | dut.clock.step(1) 30 | dut.io.req.valid.poke(false.B) 31 | dut.clock.step(dut.latency - 1) 32 | dut.io.resp.valid.expect(true.B) 33 | dut.io.resp.bits.data.expect(BigInt(refResult & 0xFFFFFFFFL).U) 34 | dut.clock.step(1) 35 | } 36 | 37 | def testFn(dut: MultStar): Unit = { 38 | val times = 4 39 | val random = new Random() 40 | val op1 = Array.fill(times)(random.nextInt()) 41 | val op2 = Array.fill(times)(random.nextInt()) 42 | for (a <- op1) { 43 | for (b <- op2) { 44 | for (fn <- funcs) { 45 | // println(s"Testing $fn $a $a") 46 | testOne(dut, fn, a, a) 47 | // println(s"Testing $fn $a $b") 48 | testOne(dut, fn, a, b) 49 | // println(s"Testing $fn $a ${-b}") 50 | testOne(dut, fn, a, -b) 51 | // println(s"Testing $fn ${-a} $b") 52 | testOne(dut, fn, -a, b) 53 | } 54 | } 55 | } 56 | } 57 | 58 | def testPipe(dut: MultStar, fn: Int): Unit = { 59 | val times = 4 60 | val random = new Random() 61 | val op1 = Array.fill(times)(random.nextInt()) 62 | val op2 = Array.fill(times)(random.nextInt()) 63 | var cnt = 0 64 | for (i <- 0 until times) { 65 | for (j <- 0 until times) { 66 | dut.io.req.valid.poke(true.B) 67 | dut.io.req.bits.fn.poke(fn.U) 68 | dut.io.req.bits.op1.poke(BigInt(op1(i) & 0xFFFFFFFFL).U) 69 | dut.io.req.bits.op2.poke(BigInt(op2(j) & 0xFFFFFFFFL).U) 70 | dut.clock.step(1) 71 | 72 | if (dut.io.resp.valid.peek().litToBoolean) { 73 | val refResult = mult(fn, op1(cnt / times), op2(cnt % times)) 74 | dut.io.resp.bits.data.expect(BigInt(refResult & 0xFFFFFFFFL).U) 75 | cnt += 1 76 | } 77 | } 78 | } 79 | dut.io.req.valid.poke(false.B) 80 | for (i <- 0 until dut.latency - 1) { 81 | dut.clock.step(1) 82 | 83 | val refResult = mult(fn, op1(cnt / times), op2(cnt % times)) 84 | dut.io.resp.bits.data.expect(BigInt(refResult & 0xFFFFFFFFL).U) 85 | cnt += 1 86 | } 87 | } 88 | 89 | def testFnPipe(dut: MultStar): Unit = { 90 | for (fn <- funcs) { 91 | testPipe(dut, fn) 92 | } 93 | } 94 | } 95 | 96 | class MultTester extends AnyFlatSpec with ChiselScalatestTester with MultTestFunc { 97 | "MultStar" should "pass" in { 98 | test(new MultStar()) { dut => 99 | println("Testing MultStar") 100 | testFn(dut) 101 | testFnPipe(dut) 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/scala/common/MicroOp.scala: -------------------------------------------------------------------------------- 1 | package iFu.common 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.backend.AluFuncCode 7 | import iFu.common.Consts._ 8 | 9 | class MicroOp extends CoreBundle { 10 | // --------------------------------------------------------- 11 | val numFTQEntries: Int = frontendParams.numFTQEntries 12 | val fetchBytes: Int = frontendParams.fetchBytes 13 | val robAddrSz: Int = robParameters.robAddrSz 14 | val ldqAddrSz: Int = lsuParameters.ldqAddrSz 15 | val stqAddrSz: Int = lsuParameters.stqAddrSz 16 | // --------------------------------------------------------- 17 | val pcLowBits: UInt = UInt(log2Ceil(fetchBytes).W) 18 | val instr: UInt = UInt(coreInstrBits.W) 19 | val uopc: UInt = UInt(UOPC_SZ.W) 20 | val iqType: UInt = UInt(IQT_SZ.W) 21 | val fuCode: UInt = UInt(FUC_SZ.W) 22 | 23 | val ftqIdx: UInt = UInt(log2Ceil(frontendParams.numFTQEntries).W) 24 | 25 | val isBr: Bool = Bool() 26 | val isJal: Bool = Bool() 27 | val isJalr: Bool = Bool() 28 | val taken: Bool = Bool() 29 | 30 | val brMask: UInt = UInt(maxBrCount.W) 31 | val brTag: UInt = UInt(brTagSz.W) 32 | 33 | val ldst: UInt = UInt(lregSz.W) 34 | val lrs1: UInt = UInt(lregSz.W) 35 | val lrs2: UInt = UInt(lregSz.W) 36 | val ldst_val: Bool = Bool() 37 | val dst_rtype: UInt = UInt(RT_X.getWidth.W) 38 | val lrs1_rtype: UInt = UInt(RT_X.getWidth.W) 39 | val lrs2_rtype: UInt = UInt(RT_X.getWidth.W) 40 | val pdst: UInt = UInt(pregSz.W) 41 | val prs1: UInt = UInt(pregSz.W) 42 | val prs2: UInt = UInt(pregSz.W) 43 | val stale_pdst: UInt = UInt(pregSz.W) 44 | 45 | val bypassable: Bool = Bool() 46 | 47 | val immPacked: UInt = UInt(26.W) 48 | 49 | val robIdx: UInt = UInt(robAddrSz.W) 50 | val ldqIdx: UInt = UInt(ldqAddrSz.W) 51 | val stqIdx: UInt = UInt(stqAddrSz.W) 52 | 53 | val iwState: UInt = UInt(2.W) 54 | val iw_p1_poisoned: Bool = Bool() 55 | val iw_p2_poisoned: Bool = Bool() 56 | 57 | val prs1_busy: Bool = Bool() 58 | val prs2_busy: Bool = Bool() 59 | 60 | val xcpt_valid: Bool = Bool() 61 | val xcpt_cause: UInt = UInt(CauseCode.microCauseBits.W) 62 | val vaddrWriteEnable: Bool = Bool() 63 | val ctrl = new CtrlSignals 64 | 65 | val tlb_op: UInt = UInt(5.W) 66 | 67 | val use_ldq: Bool = Bool() 68 | val use_stq: Bool = Bool() 69 | 70 | val mem_size: UInt = UInt(2.W) 71 | val mem_signed: Bool = Bool() 72 | 73 | val is_dbar: Bool = Bool() 74 | val is_ibar: Bool = Bool() 75 | val is_ll: Bool = Bool() 76 | val is_sc: Bool = Bool() 77 | 78 | val is_unique: Bool = Bool() 79 | val flush_on_commit: Bool = Bool() 80 | 81 | val debug_inst: UInt = if (!FPGAPlatform) UInt(coreInstrBits.W) else null 82 | val debug_pc: UInt = if (!FPGAPlatform) UInt(vaddrBits.W) else null 83 | val debug_mispred: Bool = if (!FPGAPlatform) Bool() else null 84 | val debug_load_uncacheable: Bool = if (!FPGAPlatform) Bool() else null 85 | 86 | def allocate_brtag: Bool = isBr || isJalr 87 | def rf_wen: Bool = dst_rtype =/= RT_X 88 | def fu_code_is(_fu: UInt): Bool = (fuCode & _fu) =/= 0.U 89 | def is_nop: Bool = uopc === uopNOP 90 | } 91 | 92 | class CtrlSignals extends CoreBundle { 93 | val br_type: UInt = UInt(BR_N.getWidth.W) 94 | val op1_sel: UInt = UInt(OP1_X.getWidth.W) 95 | val op2_sel: UInt = UInt(OP2_X.getWidth.W) 96 | val imm_sel: UInt = UInt(immX.getWidth.W) 97 | val op_fcn: UInt = UInt(AluFuncCode().SZ_ALU_FN.W) 98 | val csr_cmd: UInt = UInt(CSR_SZ.W) 99 | val is_load: Bool = Bool() 100 | val is_sta: Bool = Bool() 101 | val is_std: Bool = Bool() 102 | } 103 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Utils.scala: -------------------------------------------------------------------------------- 1 | package iFu.lsu.utils 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | 9 | object GenByteMask { 10 | def apply(addr: UInt, size: UInt): UInt = { 11 | val mask = MuxCase(0.U(4.W), Seq( 12 | (size === 0.U) -> (1.U << addr(1, 0)), 13 | (size === 1.U) -> (3.U << (addr(1) << 1)), 14 | (size === 2.U) -> 15.U(4.W) 15 | )) 16 | mask 17 | } 18 | } 19 | 20 | class ForwardingAgeLogic(num_entries: Int) extends CoreModule{ 21 | /** *********************************** */ 22 | val numStqEntries = lsuParameters.numSTQEntries 23 | val numLdqEntries = lsuParameters.numLDQEntries 24 | val stqAddrSz = lsuParameters.stqAddrSz 25 | val ldqAddrSz = lsuParameters.ldqAddrSz 26 | /** ************************************ */ 27 | val io = IO(new Bundle { 28 | val addr_matches = Input(UInt(num_entries.W)) // bit vector of addresses that match 29 | // between the load and the SAQ 30 | val youngest_st_idx = Input(UInt(stqAddrSz.W)) // needed to get "age" 31 | 32 | val forwarding_val = Output(Bool()) 33 | val forwarding_idx = Output(UInt(stqAddrSz.W)) 34 | }) 35 | 36 | // generating mask that zeroes out anything younger than tail 37 | val age_mask = Wire(Vec(num_entries, Bool())) 38 | for (i <- 0 until num_entries) { 39 | age_mask(i) := true.B 40 | when(i.U >= io.youngest_st_idx) // currently the tail points PAST last store, so use >= 41 | { 42 | age_mask(i) := false.B 43 | } 44 | } 45 | 46 | // Priority encoder with moving tail: double length 47 | val matches = Wire(UInt((2 * num_entries).W)) 48 | matches := Cat(io.addr_matches & age_mask.asUInt, 49 | io.addr_matches) 50 | 51 | val found_match = Wire(Bool()) 52 | found_match := false.B 53 | io.forwarding_idx := 0.U 54 | 55 | // look for youngest, approach from the oldest side, let the last one found stick 56 | for (i <- 0 until (2 * num_entries)) { 57 | when(matches(i)) { 58 | found_match := true.B 59 | io.forwarding_idx := (i % num_entries).U 60 | } 61 | } 62 | 63 | io.forwarding_val := found_match 64 | } 65 | object storeMaskGen{ 66 | def apply(addr: UInt, memSize: UInt): UInt = { 67 | val mask = WireInit(0.U(4.W)) 68 | when(memSize === 0.U){ 69 | when(addr === 0.U){mask := "b0001".U} 70 | .elsewhen(addr === 1.U){mask := "b0010".U} 71 | .elsewhen(addr === 2.U){mask := "b0100".U} 72 | .elsewhen(addr === 3.U){mask := "b1000".U} 73 | }.elsewhen(memSize === 1.U){ 74 | when(addr(1) === 0.U){mask := "b0011".U} 75 | .elsewhen(addr(1) === 1.U){mask := "b1100".U} 76 | }.elsewhen(memSize === 2.U){ 77 | mask := "b1111".U 78 | } 79 | mask 80 | } 81 | 82 | } 83 | 84 | object loadDataGen { 85 | def apply(addr: UInt, data: UInt, mem_size: UInt, mem_signed: Bool): UInt = { 86 | var res = data 87 | for (i <- (2 - 1) to 0 by -1) { 88 | val pos = 8 << i 89 | val shifted = Mux(addr(i), res(2 * pos - 1, pos), res(pos - 1, 0)) 90 | res = Cat( 91 | Mux( 92 | mem_size === i.U, 93 | Fill(8 * 4 - pos, mem_signed && shifted(pos - 1)), // sign/zero extend 94 | res(8 * 4 - 1, pos) // keep the upper bits 95 | ), 96 | shifted 97 | ) 98 | } 99 | res.asUInt 100 | } 101 | } 102 | 103 | object storeDataGen { 104 | def apply(addr: UInt, data: UInt, memSize: UInt): UInt = { 105 | (data << (addr << 3.U))(31, 0).asUInt 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/scala/backend/execute/FunctionalUnit/Multiplier.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | 7 | class MultFuncCode { 8 | val SZ_MUL_FN = 4 9 | def FN_X = BitPat("b????") 10 | def FN_MUL = 3.U(SZ_MUL_FN.W) // 0b11 11 | def FN_MULH = 1.U(SZ_MUL_FN.W) // 0b01 12 | def FN_MULHU= 0.U(SZ_MUL_FN.W) // 0b00 13 | 14 | def LorH(cmd: UInt) = cmd(1) 15 | def SorU(cmd: UInt) = cmd(0) 16 | } 17 | 18 | object MultFuncCode { 19 | def apply() = new MultFuncCode() 20 | } 21 | 22 | class MultReq[T <: MultFuncCode](val mulFn: T) extends CoreBundle { 23 | val fn = UInt(mulFn.SZ_MUL_FN.W) 24 | val op1 = UInt(xLen.W) 25 | val op2 = UInt(xLen.W) 26 | } 27 | 28 | class MultResp extends CoreBundle { 29 | val data = UInt(xLen.W) 30 | } 31 | 32 | abstract class AbstractMult[T <: MultFuncCode](val mulFn: T) extends CoreModule { 33 | val io = IO(new Bundle { 34 | val req = Flipped(Valid(new MultReq(mulFn))) 35 | val resp = Valid(new MultResp()) 36 | }) 37 | } 38 | 39 | class MultStar(val debug: Boolean = false, val latency: Int = 3) extends AbstractMult(MultFuncCode()) { 40 | val req = Pipe(io.req) 41 | val lhs = Cat(mulFn.SorU(req.bits.fn) & req.bits.op1(xLen - 1), req.bits.op1).asSInt 42 | val rhs = Cat(mulFn.SorU(req.bits.fn) & req.bits.op2(xLen - 1), req.bits.op2).asSInt 43 | val prod = lhs * rhs 44 | val res = Mux(mulFn.LorH(req.bits.fn), prod(xLen - 1, 0), prod(2 * xLen - 1, xLen)) 45 | val resp = Pipe(req, latency - 1) 46 | io.resp.valid := resp.valid 47 | io.resp.bits.data := Pipe(req.valid, res, latency - 1).bits 48 | 49 | if (debug) { 50 | val cnt = RegInit(0.U(32.W)) 51 | cnt := cnt + 1.U 52 | printf(p"------------clock: ${cnt}------------\n") 53 | when (io.resp.valid) { 54 | printf(p"req: ${resp}\n") 55 | printf(p"resp: ${io.resp}\n") 56 | printf("\n") 57 | } .otherwise { 58 | printf("no response\n") 59 | } 60 | } 61 | } 62 | 63 | class MultDSP48E1(val debug: Boolean = false, val latency: Int = 2) extends AbstractMult(MultFuncCode()) { 64 | // stage 1: calculate partial products 65 | val op1 = Mux(mulFn.SorU(io.req.bits.fn) & io.req.bits.op1(xLen - 1), -io.req.bits.op1, io.req.bits.op1) 66 | val op2 = Mux(mulFn.SorU(io.req.bits.fn) & io.req.bits.op2(xLen - 1), -io.req.bits.op2, io.req.bits.op2) 67 | val pp = RegInit(VecInit(Seq.fill(4)(0.U(xLen.W)))) 68 | for (i <- 0 until 2) { 69 | for (j <- 0 until 2) { 70 | val lhs = Cat( 71 | 0.U((25 - 1 - (xLen / 2).toInt).W), // 8 bits 72 | op1((i + 1) * (xLen / 2).toInt - 1, i * (xLen / 2).toInt) 73 | ).asUInt 74 | val rhs = Cat( 75 | 0.U((18 - 1 - (xLen / 2).toInt).W), // 1 bits 76 | op2((j + 1) * (xLen / 2).toInt - 1, j * (xLen / 2).toInt) 77 | ).asUInt 78 | pp(i * 2 + j) := (lhs * rhs)(xLen - 1, 0) // 32 bits 79 | } 80 | } 81 | // stage 2: calculate sum 82 | var res = 0.U((2 * xLen).W) 83 | for (i <- 0 until 2) { 84 | for (j <- 0 until 2) { 85 | res = res + Cat(pp(i * 2 + j), 0.U((((i + j) * (xLen / 2).toInt).W))).asUInt 86 | } 87 | } 88 | val sign = RegNext( 89 | mulFn.SorU(io.req.bits.fn) & (io.req.bits.op1(xLen - 1) ^ io.req.bits.op2(xLen - 1)) 90 | ) 91 | val isL = RegNext(mulFn.LorH(io.req.bits.fn)) 92 | val data = Mux(sign, -res, res) 93 | val result = Mux(isL, data(xLen - 1, 0), data(2 * xLen - 1, xLen)) 94 | 95 | io.resp.valid := RegNext(RegNext(io.req.valid)) 96 | io.resp.bits.data := RegNext(result) 97 | } 98 | 99 | class MultWallace extends AbstractMult(MultFuncCode()) { 100 | // TODO: implement Wallace tree multiplier 101 | } 102 | -------------------------------------------------------------------------------- /doc/ROB_PC.md: -------------------------------------------------------------------------------- 1 | # ROB 重定向PC选取 2 | 3 | ## 1. Introduction 4 | 本文主要关注 ROB 产生重定向信号时,如何选出重定向的 PC。 5 | 6 | ## 2. 选择信号 7 | 8 | ### 2.1 产生选择信号的方法 9 | 在 ROB 中,如果有重定向的需求,最终会通过一个名为`FlushTypes`的类来产生重定向信号,其定义如下: 10 | ```scala 11 | object FlushTypes { 12 | def SZ =3 13 | def apply() = UInt(SZ.W) 14 | def none = 0.U 15 | def xcpt = 1.U 16 | def eret = (2+1).U 17 | def refetch = 2.U 18 | def next = 4.U 19 | 20 | def useCsrEvec(typ: UInt): Bool = typ === eret || typ === xcpt 21 | def useSamePC(typ: UInt): Bool = typ === refetch 22 | def usePCplus4(typ: UInt): Bool = typ === next 23 | 24 | def getType(valid: Bool, i_xcpt: Bool, i_eret: Bool, i_refetch: Bool): UInt = { 25 | val ret = Mux(!valid , none , 26 | Mux( i_xcpt , xcpt , 27 | Mux( i_eret , eret , 28 | Mux( i_refetch, refetch, 29 | next )))) 30 | ret 31 | } 32 | } 33 | ``` 34 | 可以看到,`FlushTypes`定义了 5 种重定向类型,分别是`none`、`xcpt`、`eret`、`refetch`和`next`。接下来,我们逐个解释其含义: 35 | - `none`:无需重定向,即不产生重定向信号;当传入的`valid`为`false`时,产生此信号。 36 | - `xcpt`:异常重定向,当 ROB 检测到异常时,会使用此信号。 37 | - `eret`:当 ROB 检测到一条`eret`指令被提交时,产生此信号。 38 | - `refetch`:如果触发的异常是 CPU 内部自定义的异常,需要重新从异常指令处开始执行,则采用此信号。 39 | - `next`:默认情况下,PC 会递增 4,即下一条指令的地址。该信号用于处理`flush on commit`的情况。 40 | 41 | 另外,由于是采用`Mux`实现,实际上这几种信号存在优先级关系,即`xcpt` > `eret` > `refetch` > `next`。 42 | 实际上存在冲突的情况有: 43 | - `xcpt`和`refetch`:异常包括 ISA 中定义的异常和 CPU 内部自定义的异常,优先处理 ISA 中定义的异常。 44 | - `eret`和`next`:`eret`用于处理`eret`指令,`refetch`用于处理其余设置了`flush on commit`的指令。 45 | 46 | ### 2.2 ROB 中传递的控制信号 47 | 在 ROB 中,采用上述的`FlushTypes.getType`来获取重定向类型,实现如下: 48 | ```scala 49 | io.flush.bits.flush_typ := FlushTypes.getType( 50 | flushVal , 51 | exceptionThrown && !isMiniException , 52 | flushCommit && flushUop.uopc === uopERET, 53 | refetchInst 54 | ) 55 | ``` 56 | 同样的,我们逐个解释其含义: 57 | 1. `flushVal`:ROB 是否要产生重定向信号。对应的代码如下: 58 | ```scala 59 | val flushVal = exceptionThrown || flushCommit 60 | ``` 61 | 从中可以看出,当检测到异常或提交`flush on commit`的指令时,会将`flushVal`置为`true`。 62 | 2. `exceptionThrown && !isMiniException`:异常重定向。当检测到异常且不是 CPU 内部自定义的异常时,会产生此信号。目前的`isMiniException`用于处理访存违例的情况。此时,重新执行一遍就可以解决此问题,不需要进入异常处理流程。 63 | 3. `flushCommit && flushUop.uopc === uopERET`:`eret`重定向。 64 | 4. `refetchInst`:需要从异常处重新执行,对应的代码如下: 65 | ```scala 66 | val insnSysPc2epc = 67 | robHeadVals.reduce(_||_) && 68 | PriorityMux(robHeadVals, io.commit.uops.map{u => u.is_sys_pc2epc}) 69 | val refetchInst = exceptionThrown || insnSysPc2epc 70 | ``` 71 | 当检测到异常且不是 CPU 内部自定义的异常时,会将`refetchInst`置为`true`。 72 | 实际上,`insnSysPc2epc`并没有意义,因为拥有`is_sys_pc2epc`标志的指令只有`syscall`和`break`,但是这两条指令同时会带有异常标志,依据上文的优先级关系,会直接进入异常处理流程。 73 | 综上,`refetchInst`等效于`exceptionThrown`。但是由于优先级的关系,其实际上生效的情况只有访存违例。 74 | 75 | ## 3. 选择 PC 76 | 当 core 中检测到 ROB 抛出重定向信号后,会选取重定向的 PC。具体实现如下: 77 | ```scala 78 | when (RegNext(rob.io.flush.valid)) { 79 | ... 80 | val flush_type = RegNext(rob.io.flush.bits.flush_typ) 81 | ... 82 | when (FlushTypes.useCsrEvec(flush_type)) { 83 | ifu.io.core.redirect_pc := csr.io.redirect_pc 84 | } .otherwise { 85 | val flush_pc = ( 86 | AlignPCToBoundary(ifu.io.core.getFtqPc(0).pc, iCacheLineBytes) + 87 | RegNext(rob.io.flush.bits.pc_lob) 88 | ) 89 | val flush_pc_next = flush_pc + coreInstrBytes.U 90 | ifu.io.core.redirect_pc := Mux( 91 | FlushTypes.useSamePC(flush_type), 92 | flush_pc, flush_pc_next 93 | ) 94 | } 95 | ... 96 | } ... 97 | ``` 98 | 可以看到,当 ROB 抛出重定向信号后,下个周期会根据上述方法计算出的`flush_type`来选择 PC。具体来说,有以下几种情况: 99 | 1. `FlushTypes.useCsrEvec`:对应的是`eret`和`xcpt`。此时,使用 CSR 模块中的`redirect_pc`作为重定向的 PC。 100 | 2. `FlushTypes.useSamePC`:对应的是`refetch`。上面已经解释过了,不过多赘述。 101 | -------------------------------------------------------------------------------- /src/main/scala/difftest/InstrCommits.scala: -------------------------------------------------------------------------------- 1 | 2 | package iFu.difftest 3 | 4 | import chisel3._ 5 | import chisel3.util._ 6 | 7 | import iFu.common._ 8 | import iFu.common.Consts._ 9 | 10 | import iFu.difftest._ 11 | 12 | class InstrCommit extends CoreBundle { 13 | val debug_uopc = Vec(robParameters.retireWidth, UInt(UOPC_SZ.W)) 14 | val debug_insts = Vec(robParameters.retireWidth, UInt(32.W)) 15 | val debug_wdata = Vec(robParameters.retireWidth, UInt(xLen.W)) 16 | val debug_ldst = Vec(robParameters.retireWidth, UInt(lregSz.W)) 17 | val debug_pc = Vec(robParameters.retireWidth, UInt(32.W)) 18 | val debug_wen = Vec(robParameters.retireWidth, Bool()) 19 | val debug_load_uncacheable = Vec(robParameters.retireWidth, Bool()) 20 | 21 | val valids = Vec(robParameters.retireWidth,Bool()) 22 | } 23 | 24 | class InstrCommits extends CoreModule { 25 | val io = IO(new Bundle{ 26 | val rawCommit = Input(new InstrCommit) 27 | val exception = Input(Bool()) 28 | val fill_idx = if (!FPGAPlatform) Input(UInt(5.W)) else null 29 | }) 30 | 31 | //------------------------------------- 32 | val cmtsz = robParameters.retireWidth 33 | //------------------------------------- 34 | 35 | val rawCommit = RegNext(io.rawCommit) 36 | 37 | val idxs = Wire(Vec(cmtsz, Valid(UInt(log2Ceil(cmtsz).W)))) 38 | idxs := 0.U.asTypeOf(Vec(cmtsz, Valid(UInt(log2Ceil(cmtsz).W)))) 39 | 40 | when (rawCommit.valids.reduce(_|_)) { 41 | // 初始化头元素 42 | idxs(0).valid := rawCommit.valids(0) && !io.exception 43 | idxs(0).bits := Mux(rawCommit.valids(0), 0.U, 3.U/*2'b11*/) 44 | 45 | for (i <- 1 until cmtsz) { 46 | idxs(i).valid := rawCommit.valids(i) && !io.exception 47 | idxs(i).bits := Mux(rawCommit.valids(i), idxs(i-1).bits + 1.U, idxs(i-1).bits) 48 | } 49 | } 50 | 51 | val zippedCommit = WireInit(0.U.asTypeOf(new InstrCommit)) 52 | 53 | for (i <- 0 until cmtsz) { 54 | val valid = idxs(i).valid 55 | val idx = idxs(i).bits 56 | 57 | when (valid) { 58 | zippedCommit.valids(idx) := true.B 59 | zippedCommit.debug_pc(idx) := rawCommit.debug_pc(i) 60 | zippedCommit.debug_ldst(idx) := rawCommit.debug_ldst(i) 61 | zippedCommit.debug_insts(idx) := rawCommit.debug_insts(i) 62 | zippedCommit.debug_wdata(idx) := rawCommit.debug_wdata(i) 63 | zippedCommit.debug_wen(idx) := rawCommit.debug_wen(i) 64 | zippedCommit.debug_uopc(idx) := rawCommit.debug_uopc(i) 65 | zippedCommit.debug_load_uncacheable(idx) := rawCommit.debug_load_uncacheable(i) 66 | } 67 | } 68 | 69 | for (i <- 0 until cmtsz) { 70 | val dic = Module(new DifftestInstrCommit) 71 | dic.io.clock := clock 72 | dic.io.coreid := 0.U // only support 1 core now 73 | 74 | dic.io.index := i.U 75 | dic.io.valid := zippedCommit.valids(i) 76 | dic.io.pc := zippedCommit.debug_pc(i) 77 | dic.io.instr := zippedCommit.debug_insts(i) 78 | dic.io.skip := false.B 79 | dic.io.is_TLBFILL := zippedCommit.debug_uopc(i) === uopTLBFILL 80 | dic.io.TLBFILL_index := Cat(0.U(3.W), io.fill_idx - 1.U) 81 | dic.io.is_CNTinst := false.B 82 | dic.io.timer_64_value := 0.U 83 | dic.io.wen := zippedCommit.debug_wen(i) 84 | dic.io.wdest := zippedCommit.debug_ldst(i) 85 | dic.io.wdata := zippedCommit.debug_wdata(i) 86 | dic.io.csr_rstat := false.B 87 | dic.io.csr_data := 0.U 88 | 89 | val dle = Module(new DifftestLoadEvent) 90 | dle.io.clock := clock 91 | dle.io.coreid := 0.U 92 | dle.io.vaddr := 0.U 93 | 94 | dle.io.valid := zippedCommit.debug_load_uncacheable(i) 95 | dle.io.index := i.U 96 | dle.io.paddr := 0xf8000000L.asUInt 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/verilog/mycpu_top.v: -------------------------------------------------------------------------------- 1 | module core_top ( 2 | input aclk , 3 | input aresetn , 4 | input [ 7:0] intrpt , 5 | //AXI interface 6 | //read reqest 7 | output [ 3:0] arid , 8 | output [31:0] araddr , 9 | output [ 7:0] arlen , 10 | output [ 2:0] arsize , 11 | output [ 1:0] arburst , 12 | output [ 1:0] arlock , 13 | output [ 3:0] arcache , 14 | output [ 2:0] arprot , 15 | output arvalid , 16 | input arready , 17 | //read back 18 | input [ 3:0] rid , 19 | input [31:0] rdata , 20 | input [ 1:0] rresp , 21 | input rlast , 22 | input rvalid , 23 | output rready , 24 | //write request 25 | output [ 3:0] awid , 26 | output [31:0] awaddr , 27 | output [ 7:0] awlen , 28 | output [ 2:0] awsize , 29 | output [ 1:0] awburst , 30 | output [ 1:0] awlock , 31 | output [ 3:0] awcache , 32 | output [ 2:0] awprot , 33 | output awvalid , 34 | input awready , 35 | //write data 36 | output [ 3:0] wid , 37 | output [31:0] wdata , 38 | output [ 3:0] wstrb , 39 | output wlast , 40 | output wvalid , 41 | input wready , 42 | //write back 43 | input [ 3:0] bid , 44 | input [ 1:0] bresp , 45 | input bvalid , 46 | output bready , 47 | 48 | // //debug 49 | input break_point , 50 | input infor_flag , 51 | input [ 4:0] reg_num , 52 | output ws_valid , 53 | output [31:0] rf_rdata , 54 | 55 | output [31:0] debug0_wb_pc , 56 | output [ 3:0] debug0_wb_rf_wen , 57 | output [ 4:0] debug0_wb_rf_wnum , 58 | output [31:0] debug0_wb_rf_wdata 59 | ); 60 | reg reset; 61 | always @(posedge aclk) begin 62 | reset <= ~aresetn; 63 | end 64 | 65 | iFuCore core( 66 | .clock(aclk) , 67 | .reset(reset) , 68 | .io_ext_int(intrpt) , 69 | .io_axi3_ar_ready(arready) , 70 | .io_axi3_ar_valid(arvalid) , 71 | .io_axi3_ar_bits_id(arid) , 72 | .io_axi3_ar_bits_addr(araddr) , 73 | .io_axi3_ar_bits_len(arlen) , 74 | .io_axi3_ar_bits_size(arsize) , 75 | .io_axi3_ar_bits_burst(arburst), 76 | .io_axi3_ar_bits_lock(arlock) , 77 | .io_axi3_ar_bits_cache(arcache), 78 | .io_axi3_ar_bits_prot(arprot) , 79 | .io_axi3_r_ready(rready) , 80 | .io_axi3_r_valid(rvalid) , 81 | .io_axi3_r_bits_id(rid) , 82 | .io_axi3_r_bits_resp(rresp) , 83 | .io_axi3_r_bits_data(rdata) , 84 | .io_axi3_r_bits_last(rlast) , 85 | .io_axi3_aw_ready(awready) , 86 | .io_axi3_aw_valid(awvalid) , 87 | .io_axi3_aw_bits_id(awid) , 88 | .io_axi3_aw_bits_addr(awaddr) , 89 | .io_axi3_aw_bits_len(awlen) , 90 | .io_axi3_aw_bits_size(awsize) , 91 | .io_axi3_aw_bits_burst(awburst), 92 | .io_axi3_aw_bits_lock(awlock) , 93 | .io_axi3_aw_bits_cache(awcache), 94 | .io_axi3_aw_bits_prot(awprot) , 95 | .io_axi3_w_ready(wready) , 96 | .io_axi3_w_valid(wvalid) , 97 | .io_axi3_w_bits_id(wid) , 98 | .io_axi3_w_bits_data(wdata) , 99 | .io_axi3_w_bits_last(wlast) , 100 | .io_axi3_w_bits_strb(wstrb) , 101 | .io_axi3_b_ready(bready) , 102 | .io_axi3_b_valid(bvalid) , 103 | .io_axi3_b_bits_id(bid) , 104 | .io_axi3_b_bits_resp(bresp) 105 | ); 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /src/main/scala/difftest/difftest.scala: -------------------------------------------------------------------------------- 1 | package iFu.difftest 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | trait DifftestWithClock { 7 | val clock = Input(Clock()) 8 | } 9 | 10 | trait DifftestWithCoreid { 11 | val coreid = Input(UInt(8.W)) 12 | } 13 | 14 | trait DifftestWithIndex { 15 | val index = Input(UInt(8.W)) 16 | } 17 | 18 | abstract class DifftestBundle extends Bundle 19 | with DifftestWithClock 20 | with DifftestWithCoreid 21 | 22 | class DiffInstrCommitIO extends DifftestBundle with DifftestWithIndex { 23 | val valid = Input(Bool()) 24 | val pc = Input(UInt(64.W)) 25 | val instr = Input(UInt(32.W)) 26 | val skip = Input(Bool()) 27 | val is_TLBFILL = Input(Bool()) 28 | val TLBFILL_index = Input(UInt(8.W)) 29 | val is_CNTinst = Input(Bool()) 30 | val timer_64_value = Input(UInt(64.W)) 31 | val wen = Input(Bool()) 32 | val wdest = Input(UInt(8.W)) 33 | val wdata = Input(UInt(64.W)) 34 | val csr_rstat = Input(Bool()) 35 | val csr_data = Input(UInt(32.W)) 36 | } 37 | 38 | class DiffTrapEventIO extends DifftestBundle { 39 | val valid = Input(Bool()) 40 | val code = Input(UInt(3.W)) 41 | val pc = Input(UInt(64.W)) 42 | val cycleCnt = Input(UInt(64.W)) 43 | val instrCnt = Input(UInt(64.W)) 44 | } 45 | 46 | class DiffCSRRegStateIO extends DifftestBundle { 47 | val crmd = Input(UInt(64.W)) 48 | val prmd = Input(UInt(64.W)) 49 | val euen = Input(UInt(64.W)) 50 | val ecfg = Input(UInt(64.W)) 51 | val estat = Input(UInt(64.W)) 52 | val era = Input(UInt(64.W)) 53 | val badv = Input(UInt(64.W)) 54 | val eentry = Input(UInt(64.W)) 55 | val tlbidx = Input(UInt(64.W)) 56 | val tlbehi = Input(UInt(64.W)) 57 | val tlbelo0 = Input(UInt(64.W)) 58 | val tlbelo1 = Input(UInt(64.W)) 59 | val asid = Input(UInt(64.W)) 60 | val pgdl = Input(UInt(64.W)) 61 | val pgdh = Input(UInt(64.W)) 62 | val save0 = Input(UInt(64.W)) 63 | val save1 = Input(UInt(64.W)) 64 | val save2 = Input(UInt(64.W)) 65 | val save3 = Input(UInt(64.W)) 66 | val tid = Input(UInt(64.W)) 67 | val tcfg = Input(UInt(64.W)) 68 | val tval = Input(UInt(64.W)) 69 | val ticlr = Input(UInt(64.W)) 70 | val llbctl = Input(UInt(64.W)) 71 | val tlbrentry = Input(UInt(64.W)) 72 | val dmw0 = Input(UInt(64.W)) 73 | val dmw1 = Input(UInt(64.W)) 74 | } 75 | 76 | class DifftestExcpEventIO extends DifftestBundle { 77 | val excp_valid = Input(Bool()) 78 | val eret = Input(Bool()) 79 | val intrNo = Input(UInt(11.W)) 80 | val cause = Input(UInt(15.W)) 81 | val exceptionPC = Input(UInt(32.W)) 82 | val exceptionInst = Input(UInt(32.W)) 83 | } 84 | 85 | class DiffGRegStateIO extends DifftestBundle { 86 | val gpr = Input(Vec(32, UInt(64.W))) 87 | } 88 | 89 | class DiffStoreEventIO extends DifftestBundle with DifftestWithIndex { 90 | val valid = Input(UInt(8.W))//{4'b0, llbit && sc_w, st_w, st_h, st_b} 91 | val storePAddr = Input(UInt(64.W)) 92 | val storeVAddr = Input(UInt(64.W)) 93 | val storeData = Input(UInt(64.W)) 94 | } 95 | 96 | class DiffLoadEventIO extends DifftestBundle with DifftestWithIndex { 97 | val valid = Input(UInt(8.W)) //{2'b0, ll_w, ld_w, ld_hu, ld_h, ld_bu, ld_b} 98 | val paddr = Input(UInt(64.W)) 99 | val vaddr = Input(UInt(64.W)) 100 | } 101 | 102 | class DifftestInstrCommit extends BlackBox { 103 | val io = IO(new DiffInstrCommitIO) 104 | } 105 | 106 | class DifftestTrapEvent extends BlackBox { 107 | val io = IO(new DiffTrapEventIO) 108 | } 109 | 110 | class DifftestCSRRegState extends BlackBox { 111 | val io = IO(new DiffCSRRegStateIO) 112 | } 113 | 114 | class DifftestGRegState extends BlackBox { 115 | val io = IO(new DiffGRegStateIO) 116 | } 117 | 118 | class DifftestStoreEvent extends BlackBox { 119 | val io = IO(new DiffStoreEventIO) 120 | } 121 | 122 | class DifftestLoadEvent extends BlackBox { 123 | val io = IO(new DiffLoadEventIO) 124 | } 125 | 126 | class DifftestExcpEvent extends BlackBox { 127 | val io = IO(new DifftestExcpEventIO) 128 | } 129 | -------------------------------------------------------------------------------- /src/main/scala/backend/execute/FunctionalUnit/Alu.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | 7 | 8 | class AluFuncCode { 9 | val SZ_ALU_FN = 4 10 | def FN_X = BitPat("b????") 11 | def FN_ADD = 0.U(SZ_ALU_FN.W) // 0b0000 12 | def FN_SUB = 1.U(SZ_ALU_FN.W) // 0b0001 13 | def FN_AND = 2.U(SZ_ALU_FN.W) // 0b0010 14 | def FN_NOR = 3.U(SZ_ALU_FN.W) // 0b0011 15 | def FN_OR = 4.U(SZ_ALU_FN.W) // 0b0100 16 | def FN_XOR = 5.U(SZ_ALU_FN.W) // 0b0101 17 | def FN_SL = 6.U(SZ_ALU_FN.W) // 0b0110 18 | def FN_SRA = 7.U(SZ_ALU_FN.W) // 0b0111 19 | def FN_SRL = 8.U(SZ_ALU_FN.W) // 0b1000 20 | def FN_ANDN = 9.U(SZ_ALU_FN.W) // 0b1011 21 | def FN_ORN = 10.U(SZ_ALU_FN.W) // 0b1100 22 | def FN_SLT = 11.U(SZ_ALU_FN.W) // 0b1011 23 | def FN_SLTU = 13.U(SZ_ALU_FN.W) // 0b1101 24 | 25 | def isSub(cmd: UInt) = cmd(0) 26 | def isCmp(cmd: UInt) = cmd >= FN_SLT 27 | def cmpUnsigned(cmd: UInt) = cmd(2) 28 | } 29 | 30 | object AluFuncCode { 31 | def apply() = new AluFuncCode() 32 | } 33 | 34 | abstract class AbstractAlu[T <: AluFuncCode](val aluFn: T) extends CoreModule { 35 | val io = IO(new Bundle { 36 | val fn = Input(UInt(aluFn.SZ_ALU_FN.W)) 37 | val op1 = Input(UInt(xLen.W)) 38 | val op2 = Input(UInt(xLen.W)) 39 | val out = Output(UInt(xLen.W)) 40 | }) 41 | } 42 | 43 | class Alu(val debug: Boolean = false) extends AbstractAlu(AluFuncCode()) { 44 | // ADD, SUB 45 | val op2Inverse = Mux(aluFn.isSub(io.fn), ~io.op2, io.op2) 46 | val op1XorOp2 = io.op1 ^ io.op2 47 | val sum = io.op1 + op2Inverse + aluFn.isSub(io.fn) 48 | 49 | // SLT, SLTU 50 | val slt = Mux(io.op1(xLen - 1) === io.op2(xLen - 1), 51 | sum(xLen - 1), // in this case, sum = op1 - op2 52 | Mux(aluFn.cmpUnsigned(io.fn), io.op2(xLen - 1), io.op1(xLen - 1)) 53 | ) 54 | 55 | // SLL, SRL, SRA 56 | val (shamt, shin_r) = (io.op2(4, 0), io.op1) // TODO: add support for xLen = 64 57 | val shin = Mux(io.fn === aluFn.FN_SRL || io.fn === aluFn.FN_SRA, shin_r, Reverse(shin_r)) 58 | val shout_r = (Cat(aluFn.isSub(io.fn) & shin(xLen - 1), shin).asSInt >> shamt)(xLen - 1, 0) 59 | val shout_l = Reverse(shout_r) 60 | val shout = Mux(io.fn === aluFn.FN_SRL || io.fn === aluFn.FN_SRA, shout_r, 0.U) | 61 | Mux(io.fn === aluFn.FN_SL, shout_l, 0.U) 62 | 63 | val logicTmp1 = Mux(io.fn === aluFn.FN_XOR || io.fn === aluFn.FN_OR, op1XorOp2, 0.U) | 64 | Mux(io.fn === aluFn.FN_OR || io.fn === aluFn.FN_AND, io.op1 & io.op2, 0.U) 65 | val logicTmp2 = Mux(io.fn === aluFn.FN_ANDN, io.op1 & ~io.op2, 0.U) | 66 | Mux(io.fn === aluFn.FN_ORN, io.op1 | ~io.op2, 0.U) | 67 | Mux(io.fn === aluFn.FN_NOR, ~(io.op1 | io.op2), 0.U) 68 | val logic = logicTmp1 | logicTmp2 69 | 70 | val shift_logic = (aluFn.isCmp(io.fn) && slt) | logic | shout 71 | 72 | io.out := Mux(io.fn === aluFn.FN_ADD || io.fn === aluFn.FN_SUB, sum, shift_logic) 73 | 74 | if (debug) { 75 | when(io.fn === aluFn.FN_ADD) { 76 | printf(p"alu: fn=ADD, ") 77 | }.elsewhen(io.fn === aluFn.FN_SUB) { 78 | printf(p"alu: fn=SUB, ") 79 | }.elsewhen(io.fn === aluFn.FN_AND) { 80 | printf(p"alu: fn=AND, ") 81 | }.elsewhen(io.fn === aluFn.FN_NOR) { 82 | printf(p"alu: fn=NOR, ") 83 | }.elsewhen(io.fn === aluFn.FN_OR) { 84 | printf(p"alu: fn=OR, ") 85 | }.elsewhen(io.fn === aluFn.FN_XOR) { 86 | printf(p"alu: fn=XOR, ") 87 | }.elsewhen(io.fn === aluFn.FN_SL) { 88 | printf(p"alu: fn=SL, ") 89 | }.elsewhen(io.fn === aluFn.FN_SRA) { 90 | printf(p"alu: fn=SRA, ") 91 | }.elsewhen(io.fn === aluFn.FN_SRL) { 92 | printf(p"alu: fn=SRL, ") 93 | }.elsewhen(io.fn === aluFn.FN_SLT) { 94 | printf(p"alu: fn=SLT, ") 95 | }.elsewhen(io.fn === aluFn.FN_SLTU) { 96 | printf(p"alu: fn=SLTU, ") 97 | }.elsewhen(io.fn === aluFn.FN_ANDN) { 98 | printf(p"alu: fn=ANDN, ") 99 | }.elsewhen(io.fn === aluFn.FN_ORN) { 100 | printf(p"alu: fn=ORN, ") 101 | }.otherwise { 102 | printf(p"alu: fn=UNKNOWN, ") 103 | } 104 | printf(p"op1=${io.op1.asSInt}, op2=${io.op2.asSInt}, out=${io.out.asSInt}\n") 105 | } 106 | 107 | } -------------------------------------------------------------------------------- /src/main/scala/common/CommonClasses.scala: -------------------------------------------------------------------------------- 1 | package iFu.common 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common.Consts._ 7 | 8 | import iFu.common.CauseCode 9 | class BrUpdateMasks extends CoreBundle { 10 | val resolveMask = UInt(maxBrCount.W) 11 | val mispredictMask = UInt(maxBrCount.W) 12 | } 13 | 14 | class BrResolutionInfo extends CoreBundle { 15 | val uop = new MicroOp() 16 | val valid = Bool() 17 | val mispredict = Bool() 18 | val taken = Bool() 19 | val cfiType = UInt(CFI_SZ.W) 20 | val pcSel = UInt(2.W) 21 | val jalrTarget = UInt(xLen.W) 22 | val targetOffset = SInt(xLen.W) // ??? 23 | } 24 | 25 | class BrUpdateInfo extends CoreBundle { 26 | val b1 = new BrUpdateMasks() 27 | val b2 = new BrResolutionInfo() 28 | } 29 | 30 | class FuncUnitReq extends CoreBundle { 31 | val kill = Bool() 32 | val uop = new MicroOp() 33 | val rs1Data = UInt(xLen.W) 34 | val rs2Data = UInt(xLen.W) 35 | } 36 | 37 | class FuncUnitResp extends CoreBundle { 38 | val uop = new MicroOp() 39 | val data = UInt(xLen.W) 40 | val addr = UInt(xLen.W) 41 | val r1 = UInt(xLen.W) 42 | val r2 = UInt(xLen.W) 43 | } 44 | 45 | class ExeUnitResp(val len: Int = 32) extends CoreBundle { 46 | val uop = new MicroOp 47 | val data = Bits(len.W) 48 | val csr_cmd = UInt(CSR_SZ.W) 49 | val csr_addr = UInt(14.W) 50 | val tlb_op = UInt(5.W) 51 | val csr_r2 = UInt(xLen.W) 52 | val csr_r1 = UInt(xLen.W) 53 | } 54 | 55 | class DCacheReq extends CoreBundle { 56 | val mask = UInt(4.W) 57 | val addr = UInt(vaddrBits.W) 58 | val data = Bits(xLen.W) 59 | val uop = new MicroOp() 60 | val is_uncacheable = Bool() 61 | } 62 | 63 | class DCacheResp extends CoreBundle { 64 | val data = Bits(xLen.W) 65 | val uop = new MicroOp() 66 | } 67 | 68 | class LSUDMemIO extends CoreBundle { 69 | /**************************************/ 70 | val robAddrSz = robParameters.robAddrSz 71 | /***************************************/ 72 | val req = new DecoupledIO(Vec(memWidth,Valid(new DCacheReq))) 73 | val s1_kill = Output(Vec(memWidth, Bool())) 74 | //val s1_paddr = Output(Vec(memWidth, UInt(paddrBits.W))) 75 | val s2_hit = Input(Vec(memWidth, Bool())) 76 | val resp = Flipped(Vec(memWidth, new Valid(new DCacheResp))) 77 | 78 | val nack = Flipped(Vec(memWidth, new Valid(new DCacheReq))) 79 | 80 | val brupdate = Output(new BrUpdateInfo) 81 | val exception = Output(Bool()) 82 | 83 | val llbit = Output(Bool()) 84 | val fence_dmem = Output(Bool()) 85 | val ordered = Input(Bool()) 86 | } 87 | 88 | class CommitSignals extends CoreBundle { 89 | val valids = Vec(robParameters.retireWidth, Bool()) 90 | val arch_valids = Vec(robParameters.retireWidth,Bool()) 91 | val uops = Vec(robParameters.retireWidth, new MicroOp) 92 | 93 | //maybe use 94 | //val debug 95 | 96 | val rbk_valids = Vec(robParameters.retireWidth,Bool()) 97 | val rollback = Bool() 98 | 99 | //------------------debug 100 | val debug_insts = if (!FPGAPlatform) Vec(robParameters.retireWidth, UInt(32.W)) else null 101 | val debug_wdata = if (!FPGAPlatform) Vec(robParameters.retireWidth, UInt(xLen.W)) else null 102 | val debug_ldst = if (!FPGAPlatform) Vec(robParameters.retireWidth, UInt(lregSz.W)) else null 103 | val debug_pc = if (!FPGAPlatform) Vec(robParameters.retireWidth, UInt(32.W)) else null 104 | val debug_load_uncacheable = if (!FPGAPlatform) Vec(robParameters.retireWidth, Bool()) else null 105 | } 106 | 107 | case class SupportedFuncs ( 108 | val alu: Boolean = false, 109 | val jmp: Boolean = false, 110 | val mem: Boolean = false, 111 | val muldiv: Boolean = false, 112 | val csr: Boolean = false, 113 | val cnt: Boolean = false 114 | // val tlb: Boolean = false 115 | ) 116 | 117 | class Exception extends CoreBundle { 118 | val uop = new MicroOp() 119 | //TODO:update cause to loogarch 120 | val cause = Bits(CauseCode.causeCodeBits.W) 121 | val badvaddr = UInt(paddrBits.W) 122 | } 123 | 124 | class CommitExceptionSignals extends CoreBundle { 125 | val ftq_idx = UInt(log2Ceil(frontendParams.numFTQEntries).W) 126 | val pc_lob = UInt(log2Ceil(frontendParams.fetchBytes).W) 127 | val cause = UInt(CauseCode.microCauseBits.W) 128 | val badvaddr = UInt(xLen.W) 129 | val vaddrWriteEnable = Bool() 130 | 131 | val uop = new MicroOp() 132 | 133 | val flush_typ = FlushTypes() 134 | } -------------------------------------------------------------------------------- /src/main/scala/frontend/bpu/LocalHistory.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util.{Cat, Valid} 5 | import iFu.frontend.FrontendUtils.{fetchIdx, getPc} 6 | import iFu.frontend._ 7 | import ram.SDPRam 8 | 9 | class LocalHistoryPredictMeta extends Bundle with HasLocalHistoryParameters { 10 | val cntIdx = UInt(nCounterBits.W) 11 | val counter = UInt(2.W) 12 | } 13 | 14 | class LocalHistoryIO extends Bundle with HasLocalHistoryParameters { 15 | val s0pc = Input(UInt(vaddrBits.W)) 16 | 17 | /* val s2taken = Output(Vec(fetchWidth, Valid(Bool()))) */ 18 | val s2_high_taken = Output(Vec(fetchWidth, Valid(Bool()))) 19 | 20 | val s3meta = Output(Vec(fetchWidth, new LocalHistoryPredictMeta)) 21 | 22 | val s1update = Input(Valid(new BranchPredictionUpdate)) 23 | } 24 | 25 | class LocalHistoryPredictor extends Module with HasLocalHistoryParameters { 26 | val io = IO(new LocalHistoryIO) 27 | 28 | val localHistories = SyncReadMem(nLHRs, Vec(fetchWidth, UInt(localHistoryLength.W))) 29 | val counters = Seq.fill(fetchWidth) {SyncReadMem(nCounters, UInt(2.W))} 30 | /* val cacheCounters = Seq.fill(fetchWidth) {Module(new SDPRam(nCacheCounters, UInt(2.W)))} */ 31 | 32 | // --------------------------------------------- 33 | // Reset 34 | val reset_en = RegInit(false.B) 35 | val reset_idx = RegInit(0.U(nCounterBits.W)) 36 | when(reset_en) { 37 | reset_idx := reset_idx + 1.U 38 | } 39 | when(reset_idx.andR) { 40 | reset_en := false.B 41 | } 42 | // --------------------------------------------- 43 | // Predict 44 | val s1pc = RegNext(io.s0pc) 45 | val s1hist = localHistories.read(fetchIdx(io.s0pc)(nLHRBits - 1, 0)) 46 | val s1idx = VecInit(s1hist.zipWithIndex.map({case (hist, w) => idxHash(getPc(s1pc, w.U), hist)})) 47 | val s2cnt = VecInit(counters.zip(s1idx).map({case (ram, idx) => ram.read(idx)})) 48 | /* io.s2taken := s1hist.zip(cacheCounters).map({ case (hist, cnt) => 49 | val idx = cacheIdxHash(hist) 50 | cnt.io.raddr := idx 51 | val cnt_val = cnt.io.rdata.head 52 | val taken = Wire(Valid(Bool())) 53 | taken.valid := !(cnt_val(0) ^ cnt_val(1)) 54 | taken.bits := cnt_val(1) 55 | taken 56 | }) */ 57 | 58 | io.s2_high_taken := VecInit(s2cnt.map(cnt => { 59 | val taken = Wire(Valid(Bool())) 60 | taken.valid := cnt =/= 2.U && !reset_en 61 | taken.bits := cnt(1) 62 | taken 63 | })) 64 | /* val s3hist = RegNext(RegNext(s1hist)) 65 | val s3cnt = RegNext(s2cnt) 66 | s3hist.zip(cacheCounters).zip(s3cnt).foreach({ 67 | case ((hist, cnt), cnt_val) => 68 | val idx = cacheIdxHash(hist) 69 | cnt.io.wen := true.B 70 | cnt.io.waddr := idx 71 | cnt.io.wdata.head := cnt_val 72 | cnt.io.wstrobe := 1.U 73 | }) */ 74 | // --------------------------------------------- 75 | // Meta 76 | val s2idx = RegNext(s1idx) 77 | val s2meta = VecInit(s2idx.zip(s2cnt).map({case (idx, cnt) => 78 | val meta = Wire(new LocalHistoryPredictMeta) 79 | meta.cntIdx := idx 80 | meta.counter := cnt 81 | meta 82 | })) 83 | io.s3meta := RegNext(s2meta) 84 | // --------------------------------------------- 85 | // Update at s2 86 | val s1update = io.s1update.bits 87 | val s1updatepc = io.s1update.bits.pc 88 | val s2update = RegNext(s1update) 89 | val s2oldHist = localHistories.read(fetchIdx(s1updatepc)(nLHRBits - 1, 0)) 90 | val s2newHist = Wire(Vec(fetchWidth, UInt(localHistoryLength.W))) 91 | val s2newCounter = Wire(Vec(fetchWidth, UInt(2.W))) 92 | for (w <- 0 until fetchWidth) { 93 | val s1taken = Mux(s1update.cfiIdx.valid && s1update.cfiIdx.bits === w.U, s1update.cfiTaken, false.B) 94 | val s2taken = RegNext(s1taken) 95 | val s2oldCounter = counters(w).read(s1update.meta(w).localHistoryMeta.cntIdx) 96 | s2newHist(w) := Mux(s2update.brMask(w), Cat(s2oldHist(w)(localHistoryLength - 2, 0), s2taken.asUInt), s2oldHist(w)) 97 | s2newCounter(w) := Mux(s2update.brMask(w), update(s2oldCounter, s2taken), s2oldCounter) 98 | } 99 | when (RegNext(io.s1update.valid)) { 100 | localHistories.write(fetchIdx(s2update.pc)(nLHRBits - 1, 0), s2newHist) 101 | } 102 | when (RegNext(io.s1update.valid) || reset_en) { // only counter needs resetting 103 | for (w <- 0 until fetchWidth) { 104 | counters(w).write(Mux(reset_en, reset_idx, s2update.meta(w).localHistoryMeta.cntIdx), 105 | Mux(reset_en, 2.U(2.W), s2newCounter(w))) 106 | } 107 | } 108 | // --------------------------------------------- 109 | } 110 | -------------------------------------------------------------------------------- /src/main/scala/common/Utils.scala: -------------------------------------------------------------------------------- 1 | package iFu.util 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.backend.HasUop 6 | import iFu.common.Consts._ 7 | import iFu.common._ 8 | 9 | import scala.language.implicitConversions 10 | 11 | object IsEqual { 12 | def apply(a: UInt, b: UInt): Bool = { 13 | !(a ^ b).orR 14 | } 15 | } 16 | 17 | object MaskLower { 18 | def apply(in: UInt): UInt = { 19 | val n = in.getWidth 20 | (0 until n).map(i => in >> i.U).reduce(_ | _) 21 | } 22 | } 23 | 24 | object MaskUpper { 25 | def apply(in: UInt):UInt = { // 假设第i位初次为1,则(n-1,i)为1 26 | val n = in.getWidth 27 | (0 until n).map(i => (in << i.U)(n-1,0)).reduce(_|_) 28 | } 29 | } 30 | 31 | object WrapInc { 32 | def apply(value: UInt, n: Int): UInt = { 33 | if (isPow2(n)) { 34 | (value + 1.U)(log2Ceil(n) - 1, 0) 35 | } else { 36 | val wrap = value === (n - 1).U 37 | Mux(wrap, 0.U, value + 1.U) 38 | } 39 | } 40 | } 41 | 42 | object WrapDec { 43 | def apply(value: UInt, n: Int): UInt = { 44 | if (isPow2(n)) { 45 | (value - 1.U)(log2Ceil(n) - 1,0) 46 | } else { 47 | val wrap = value === 0.U 48 | Mux(wrap, (n-1).U, value - 1.U) 49 | } 50 | } 51 | } 52 | 53 | object WrapAdd { 54 | def apply(value: UInt, inc: UInt, n: Int): UInt = { 55 | if (isPow2(n)) { 56 | (value + inc)(log2Ceil(n) - 1, 0) 57 | } else { 58 | val value_padd = value.pad(value.getWidth + 1) 59 | val result = value_padd + inc 60 | val wrap = result >= n.U 61 | Mux(wrap, result - n.U, result) 62 | } 63 | } 64 | } 65 | 66 | object GetNewUopAndBrMask { 67 | def apply(uop: MicroOp, brupdate: BrUpdateInfo): MicroOp = { 68 | val newuop = WireInit(uop) 69 | newuop.brMask := uop.brMask & (~brupdate.b1.resolveMask).asUInt 70 | newuop 71 | } 72 | } 73 | 74 | object IsOlder 75 | { 76 | def apply(i0: UInt, i1: UInt, head: UInt): Bool = (i0 < i1) ^ (i0 < head) ^ (i1 < head) 77 | } 78 | 79 | object maskMatch { 80 | def apply(msk1: UInt, msk2: UInt): Bool = (msk1 & msk2) =/= 0.U 81 | } 82 | 83 | object IsKilledByBranch{ 84 | def apply(brupdate: BrUpdateInfo, uop: MicroOp): Bool = { 85 | maskMatch(brupdate.b1.mispredictMask, uop.brMask) 86 | } 87 | 88 | def apply(brupdate: BrUpdateInfo, uop_mask: UInt): Bool = { 89 | maskMatch(brupdate.b1.mispredictMask, uop_mask) 90 | } 91 | } 92 | 93 | object GetNewBrMask { 94 | def apply(brupdate: BrUpdateInfo, uop: MicroOp): UInt = { 95 | uop.brMask & (~brupdate.b1.resolveMask).asUInt 96 | } 97 | 98 | def apply(brupdate: BrUpdateInfo, br_mask: UInt): UInt = { 99 | br_mask & (~brupdate.b1.resolveMask).asUInt 100 | } 101 | } 102 | 103 | object UpdateBrMask { 104 | def apply(brupdate: BrUpdateInfo, uop: MicroOp): MicroOp = { 105 | val out = WireInit(uop) 106 | out.brMask := GetNewBrMask(brupdate, uop) 107 | out 108 | } 109 | 110 | def apply[T <: HasUop](brupdate: BrUpdateInfo, bundle: Valid[T]): Valid[T] = { 111 | val out = WireInit(bundle) 112 | out.bits.uop.brMask := GetNewBrMask(brupdate, bundle.bits.uop.brMask) 113 | out.valid := bundle.valid && !IsKilledByBranch(brupdate, bundle.bits.uop.brMask) 114 | out 115 | } 116 | } 117 | 118 | object AlignPCToBoundary 119 | { 120 | def apply(pc: UInt, b: Int): UInt = { 121 | // Invert for scenario where pc longer than b 122 | // (which would clear all bits above size(b)). 123 | (~((~pc).asUInt | (b - 1).U)).asUInt 124 | } 125 | } 126 | 127 | object immGen 128 | { 129 | def apply(immPacked: UInt, immType: UInt): SInt = { 130 | val imm = WireInit(0.U(32.W)) 131 | imm := MuxLookup(immType, 0.U)(Seq( 132 | immU5 -> Cat(0.U(27.W),immPacked(14,10)), 133 | immU12 -> Cat(0.U(20.W),immPacked(21,10)), 134 | immS12 -> Cat(Fill(20,immPacked(21)),immPacked(21,10)), 135 | immS14 -> Cat(Fill(16,immPacked(23)),immPacked(23,10),0.U(2.W)), 136 | immS16 -> Cat(Fill(14,immPacked(25)),immPacked(25,10),0.U(2.W)), 137 | immU20 -> Cat(immPacked(24,5),0.U(12.W)), 138 | immS20 -> Cat(Fill(10,immPacked(24)),immPacked(24,5),0.U(2.W)), 139 | immS26 -> Cat(Fill(4,immPacked(9)),immPacked(9,0),immPacked(25,10),0.U(2.W)), 140 | immCSR -> Cat(0.U(18.W),immPacked(23,10)) 141 | )) 142 | imm.asSInt 143 | } 144 | } 145 | 146 | object ImplicitCast { 147 | implicit def uintToBitPat(x: UInt): BitPat = BitPat(x) 148 | } 149 | -------------------------------------------------------------------------------- /src/main/scala/common/CoreParameters.scala: -------------------------------------------------------------------------------- 1 | package iFu.common 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common.Consts._ 7 | import iFu.frontend.{HasBPUParameters} 8 | 9 | class BPUParameters { 10 | val numRasEntries: Int = 8 11 | } 12 | 13 | class ICacheParameters(fetchWidth: Int) { 14 | val nSets: Int = 64 15 | val nWays: Int = 8 16 | val lineBytes:Int = 64 17 | val indexBits: Int = log2Ceil(nSets) 18 | val offsetBits : Int = log2Ceil(lineBytes) 19 | val untagBits: Int = indexBits + offsetBits 20 | val tagBits: Int = 32 - untagBits 21 | val fetchBytes = fetchWidth * 4 22 | val fetchesPerLine = lineBytes / fetchBytes 23 | 24 | require(isPow2(fetchesPerLine)) 25 | } 26 | 27 | class FrontendParameters{ 28 | val instrBytes: Int = 4 29 | val fetchWidth: Int = 4 30 | val fetchBytes: Int = fetchWidth * instrBytes 31 | // val numFTQEntries: Int = 40 32 | val numFTQEntries: Int = 16 33 | // val numFetchBufferEntries: Int = 32 34 | val numFetchBufferEntries: Int = 9 35 | val iCacheParams: ICacheParameters = new ICacheParameters(fetchWidth) 36 | val bpdParams: BPUParameters = new BPUParameters 37 | 38 | val bpuParams = new HasBPUParameters {} 39 | val targetSz = bpuParams.targetSz 40 | def getTargetPC(pc: UInt , target : UInt): UInt = { 41 | bpuParams.getTargetPC(pc, target) 42 | } 43 | def getTarget(tgtpc : UInt): UInt = bpuParams.getTarget(tgtpc) 44 | 45 | } 46 | 47 | class ROBParameters(coreWidth: Int) { 48 | val numRobEntries: Int = 36 49 | val retireWidth: Int = coreWidth 50 | val numRobRows: Int = numRobEntries / coreWidth 51 | val robAddrSz: Int = log2Ceil(numRobRows) + log2Ceil(coreWidth) 52 | } 53 | 54 | class LSUParameters{ 55 | val numSTQEntries: Int = 12 56 | val numLDQEntries: Int = 12 57 | val ldqAddrSz : Int = log2Ceil(numLDQEntries) 58 | val stqAddrSz : Int = log2Ceil(numSTQEntries) 59 | } 60 | 61 | class DcacheParameters { 62 | val nRowWords = 16 63 | def nRowBits = nRowWords * 32 64 | def nRowBytes = nRowWords * 4 65 | val nSets = 64 66 | val nWays = 4 67 | 68 | def nTotalWords = nSets * nWays * nRowWords 69 | 70 | def n1vIdxBits = log2Ceil(nTotalWords) 71 | 72 | val nMSHR = 8 73 | val nTLBEntries = 32 74 | def nOffsetBits = log2Ceil(nRowBytes) 75 | def nIdxBits = log2Ceil(nSets) 76 | def nTagBits = 32 - nOffsetBits - nIdxBits 77 | def nAgebits = 10 78 | val coreDataBits = 32 79 | val vaddrBits = 32 80 | def nBlockAddrBits = vaddrBits - nOffsetBits 81 | 82 | val memWidth = 2 83 | 84 | val nFirstMSHRs = 2 85 | val nSecondMSHRs = 3 86 | 87 | def getWordOffset(vaddr: UInt): UInt = vaddr(nOffsetBits - 1, 2) 88 | def getIdx(vaddr: UInt): UInt = vaddr(nOffsetBits + nIdxBits - 1, nOffsetBits) 89 | def getTag(vaddr: UInt): UInt = vaddr(vaddrBits - 1, vaddrBits - nTagBits) 90 | def getBlockAddr(vaddr: UInt): UInt = vaddr(vaddrBits - 1, nOffsetBits) 91 | 92 | def isStore(req : DCacheReq): Bool = req.uop.use_stq 93 | 94 | def isMMIO(req : DCacheReq): Bool = req.is_uncacheable 95 | 96 | def isUncacheable(req : DCacheReq): Bool = req.is_uncacheable 97 | 98 | def isLL(req : DCacheReq) : Bool = req.uop.is_ll 99 | 100 | def isSC(req : DCacheReq) : Bool = req.uop.is_sc 101 | 102 | } 103 | 104 | case class IssueParams( 105 | iqType: Int, // issue queue type: IQT_INT, IQT_MEM 106 | numIssueSlots: Int, // number of issue slots 107 | dispatchWidth: Int, 108 | issueWidth: Int // maximum number of uops issued per cycle 109 | ) 110 | 111 | trait HasCoreParameters { 112 | val resetPC: Int = 0x1c000000 113 | val xLen: Int = 32 114 | val vaddrBits: Int = xLen 115 | val paddrBits: Int = xLen 116 | val coreInstrBytes: Int = 4 117 | val coreInstrBits: Int = coreInstrBytes * 8 118 | val coreWidth = 3 119 | val memWidth = 2 120 | val maxBrCount: Int = 8 121 | val brTagSz: Int = log2Ceil(maxBrCount) 122 | val numLRegs: Int = 32 123 | val lregSz: Int = log2Ceil(numLRegs) 124 | // val numPRegs: Int = 108 125 | val numPRegs: Int = 63 126 | val pregSz: Int = log2Ceil(numPRegs) 127 | val frontendParams: FrontendParameters = new FrontendParameters 128 | val robParameters: ROBParameters = new ROBParameters(coreWidth) 129 | val lsuParameters: LSUParameters = new LSUParameters 130 | val dcacheParameters: DcacheParameters = new DcacheParameters 131 | val issueParams: Seq[IssueParams] = Seq( 132 | IssueParams(issueWidth = 2, numIssueSlots = 6, iqType = IQT_MEM.litValue.toInt, dispatchWidth = 3), 133 | IssueParams(issueWidth = 3, numIssueSlots = 10, iqType = IQT_INT.litValue.toInt, dispatchWidth = 3) 134 | ) 135 | } 136 | -------------------------------------------------------------------------------- /src/main/scala/backend/register/RegisterRead.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | import iFu.util._ 9 | 10 | class RegisterRead ( 11 | issueWidth: Int, 12 | supportedUnitsArray: Seq[SupportedFuncs], 13 | numReadPortsArray: Seq[Int], 14 | numTotalBypassPorts: Int, 15 | registerWidth: Int 16 | ) extends CoreModule { 17 | val io = IO(new Bundle{ 18 | val iss_valids = Input(Vec(issueWidth, Bool())) 19 | val iss_uops = Input(Vec(issueWidth, new MicroOp)) 20 | 21 | val rf_read_ports = Flipped(Vec(numReadPortsArray.sum, new RegisterFileReadPortIO(pregSz, registerWidth))) 22 | 23 | val bypass = Input(Vec(numTotalBypassPorts, Valid(new ExeUnitResp(registerWidth)))) 24 | 25 | val exe_reqs = Vec(issueWidth, Decoupled(new FuncUnitReq)) 26 | 27 | val kill = Input(Bool()) 28 | val brupdate = Input(new BrUpdateInfo()) 29 | }) 30 | io.exe_reqs.foreach(_.bits.kill := io.kill) 31 | 32 | var idx = 0 33 | for (w <- 0 until issueWidth) { 34 | val numReadPorts = numReadPortsArray(w) 35 | 36 | val rrd_valid = Reg(Bool()) 37 | val rrd_uop = Reg(new MicroOp) 38 | // clock 0: func decode 39 | val dec_unit = Module(new RegisterReadDecode(supportedUnitsArray(w))) 40 | dec_unit.io.iss_valid := io.iss_valids(w) 41 | dec_unit.io.iss_uop := io.iss_uops(w) 42 | 43 | rrd_valid := ( 44 | dec_unit.io.rrd_valid && !IsKilledByBranch(io.brupdate,dec_unit.io.rrd_uop) 45 | ) 46 | rrd_uop := GetNewUopAndBrMask(dec_unit.io.rrd_uop, io.brupdate) 47 | 48 | val rrd_rs1 = Wire(UInt(registerWidth.W)) 49 | val rrd_rs2 = if (numReadPorts == 2) Wire(UInt(registerWidth.W)) else null 50 | val rs1_addr = if (numReadPorts == 2) { 51 | io.iss_uops(w).prs1 52 | } else { 53 | Mux(io.iss_uops(w).lrs2_rtype === RT_FIX, io.iss_uops(w).prs2, io.iss_uops(w).prs1) 54 | } 55 | val rs2_addr = if (numReadPorts == 2) io.iss_uops(w).prs2 else null 56 | // clock 0: send read request to register file 57 | io.rf_read_ports(idx + 0).addr := rs1_addr 58 | if (numReadPorts == 2) io.rf_read_ports(idx + 1).addr := rs2_addr 59 | // clock 1: read data from register file 60 | rrd_rs1 := Mux( 61 | RegNext(rs1_addr === 0.U), 0.U, io.rf_read_ports(idx + 0).data 62 | ) 63 | if (numReadPorts == 2) rrd_rs2 := Mux( 64 | RegNext(rs2_addr === 0.U), 0.U, io.rf_read_ports(idx + 1).data 65 | ) 66 | idx += numReadPorts 67 | 68 | // clock 1: do bypass 69 | val bypassed_rs1 = Wire(UInt(registerWidth.W)) 70 | val bypassed_rs2 = if (numReadPorts == 2) Wire(UInt(registerWidth.W)) else null 71 | val prs1 = if (numReadPorts == 2) { 72 | rrd_uop.prs1 73 | } else { 74 | Mux(rrd_uop.lrs2_rtype === RT_FIX, rrd_uop.prs2, rrd_uop.prs1) 75 | } 76 | val lrs1Rtype = if (numReadPorts == 2) { 77 | rrd_uop.lrs1_rtype 78 | } else { 79 | Mux(rrd_uop.lrs2_rtype === RT_FIX, rrd_uop.lrs2_rtype, rrd_uop.lrs1_rtype) 80 | } 81 | val prs2 = if (numReadPorts == 2) rrd_uop.prs2 else null 82 | val lrs2Rtype = if (numReadPorts == 2) rrd_uop.lrs2_rtype else null 83 | var rs1_cases = Seq((false.B, 0.U(registerWidth.W))) 84 | var rs2_cases = if (numReadPorts == 2) Seq((false.B, 0.U(registerWidth.W))) else null 85 | io.bypass foreach { bypass => 86 | rs1_cases ++= Seq(( 87 | bypass.valid && (prs1 === bypass.bits.uop.pdst) && bypass.bits.uop.rf_wen && 88 | bypass.bits.uop.dst_rtype === RT_FIX && lrs1Rtype === RT_FIX && (prs1 =/= 0.U), 89 | bypass.bits.data 90 | )) 91 | if (numReadPorts == 2) { 92 | rs2_cases ++= Seq(( 93 | bypass.valid && (prs2 === bypass.bits.uop.pdst) && bypass.bits.uop.rf_wen && 94 | bypass.bits.uop.dst_rtype === RT_FIX && lrs2Rtype === RT_FIX && (prs2 =/= 0.U), 95 | bypass.bits.data 96 | )) 97 | } 98 | } 99 | bypassed_rs1 := MuxCase(rrd_rs1, rs1_cases) 100 | if (numReadPorts == 2) bypassed_rs2 := MuxCase(rrd_rs2, rs2_cases) 101 | 102 | // clock 2: send request to execution units 103 | val exe_req_valid = RegInit(false.B) 104 | val exe_req_uop = Reg(new MicroOp) 105 | val exe_req_rs1 = Reg(UInt(registerWidth.W)) 106 | val exe_req_rs2 = if (numReadPorts == 2) Reg(UInt(registerWidth.W)) else null 107 | 108 | val killed = io.kill || IsKilledByBranch(io.brupdate, rrd_uop) 109 | exe_req_valid := Mux(killed, false.B, rrd_valid) 110 | exe_req_uop := rrd_uop 111 | exe_req_uop.brMask := GetNewBrMask(io.brupdate, rrd_uop) 112 | 113 | exe_req_rs1 := bypassed_rs1 114 | if (numReadPorts == 2) exe_req_rs2 := bypassed_rs2 115 | 116 | io.exe_reqs(w).valid := exe_req_valid 117 | io.exe_reqs(w).bits.uop := exe_req_uop 118 | io.exe_reqs(w).bits.rs1Data := exe_req_rs1 119 | io.exe_reqs(w).bits.rs2Data := (if (numReadPorts == 2) exe_req_rs2 else DontCare) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/test/scala/DivTester.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import scala.util.Random 4 | 5 | import chisel3._ 6 | import chiseltest._ 7 | import org.scalatest.flatspec.AnyFlatSpec 8 | 9 | trait DivTestFunc { 10 | val FN_DIV = 1 11 | val FN_DIVU = 0 12 | val FN_REM = 3 13 | val FN_REMU = 2 14 | def funcs = Array(FN_DIV, FN_REM, FN_REMU) 15 | 16 | def divider(fn: Int, op1: Int, op2: Int): Int = { 17 | if (op2 == 0) { 18 | return fn match { 19 | case FN_DIV => 0xFFFFFFFF 20 | case FN_DIVU => 0xFFFFFFFF 21 | case FN_REM => op1 22 | case FN_REMU => op1 23 | } 24 | } 25 | return fn match { 26 | case FN_DIV => op1 / op2 27 | case FN_DIVU => ((op1 & 0xFFFFFFFFL) / (op2 & 0xFFFFFFFFL)).toInt 28 | case FN_REM => op1 % op2 29 | case FN_REMU => ((op1 & 0xFFFFFFFFL) % (op2 & 0xFFFFFFFFL)).toInt 30 | } 31 | } 32 | 33 | def testOne(dut: SRT16Divider, fn: Int, op1: Int, op2: Int): Unit = { 34 | val op = fn match { 35 | case FN_DIV => "/" 36 | case FN_DIVU => "/u" 37 | case FN_REM => "%" 38 | case FN_REMU => "%u" 39 | } 40 | // println(s"<><><><><><><><><><> Testing $op1 $op $op2 <><><><><><><><><><>") 41 | val refResult = divider(fn, op1, op2) 42 | dut.io.resp.ready.poke(true.B) 43 | dut.io.req.valid.poke(true.B) 44 | dut.io.req.bits.fn.poke(fn.U) 45 | dut.io.req.bits.op1.poke(BigInt(op1 & 0xFFFFFFFFL).U) 46 | dut.io.req.bits.op2.poke(BigInt(op2 & 0xFFFFFFFFL).U) 47 | dut.clock.step(1) 48 | dut.io.req.valid.poke(false.B) 49 | 50 | while (dut.io.resp.valid.peek().litToBoolean == false) { 51 | dut.clock.step(1) 52 | } 53 | 54 | dut.io.resp.bits.data.expect(BigInt(refResult & 0xFFFFFFFFL).U) 55 | dut.clock.step(1) 56 | // println(s"<><><><><><><><><><> Testing $op1 $op $op2 <><><><><><><><><><>") 57 | // println("========================================================\n") 58 | } 59 | def getOneRef(dut: SRT16DividerRef, fn: Int, op1: Int, op2: Int): Unit = { 60 | val op = fn match { 61 | case FN_DIV => "/" 62 | case FN_DIVU => "/u" 63 | case FN_REM => "%" 64 | case FN_REMU => "%u" 65 | } 66 | // println(s"<><><><><><><><><><> Testing $op1 $op $op2 <><><><><><><><><><>") 67 | val refResult = divider(fn, op1, op2) 68 | dut.io.resp.ready.poke(true.B) 69 | dut.io.req.valid.poke(true.B) 70 | dut.io.req.bits.fn.poke(fn.U) 71 | dut.io.req.bits.op1.poke(BigInt(op1 & 0xFFFFFFFFL).U) 72 | dut.io.req.bits.op2.poke(BigInt(op2 & 0xFFFFFFFFL).U) 73 | dut.clock.step(1) 74 | dut.io.req.valid.poke(false.B) 75 | 76 | while (dut.io.resp.valid.peek().litToBoolean == false) { 77 | dut.clock.step(1) 78 | } 79 | dut.clock.step(2) 80 | // println(s"<><><><><><><><><><> Testing $op1 $op $op2 <><><><><><><><><><>") 81 | // println("========================================================\n") 82 | } 83 | 84 | def testFn(dut: SRT16Divider): Unit = { 85 | val times = 4 86 | val random = new Random() 87 | val op1 = Array.fill(times)(random.nextInt()) 88 | val op2 = Array.fill(times)(random.nextInt()) 89 | // val op1 = Array(2, 4) 90 | // val op2 = Array(1, 2) 91 | for (a <- op1) { 92 | for (b <- op2) { 93 | // val fn = FN_DIV 94 | for (fn <- funcs) { 95 | // testOne(dut, fn, a, a) 96 | // testOne(dut, fn, a, b) 97 | // testOne(dut, fn, a, -b) 98 | // testOne(dut, fn, -a, b) 99 | testOne(dut, fn, a, 0) 100 | testOne(dut, fn, 0, b) 101 | testOne(dut, fn, 0, 0) 102 | testOne(dut, fn, a, -1) 103 | testOne(dut, fn, 1, b) 104 | testOne(dut, fn, 1, -1) 105 | testOne(dut, fn, 1, 0) 106 | testOne(dut, fn, 0, -1) 107 | } 108 | } 109 | } 110 | } 111 | 112 | def getRef(dut: SRT16DividerRef): Unit = { 113 | val times = 4 114 | val random = new Random() 115 | // val op1 = Array.fill(times)(random.nextInt()) 116 | // val op2 = Array.fill(times)(random.nextInt()) 117 | val op1 = Array(2, 4) 118 | val op2 = Array(1, 2) 119 | for (a <- op1) { 120 | for (b <- op2) { 121 | val fn = FN_DIV 122 | // for (fn <- funcs) { 123 | // testOne(dut, fn, a, a) 124 | getOneRef(dut, fn, a, b) 125 | // testOne(dut, fn, a, -b) 126 | // testOne(dut, fn, -a, b) 127 | // } 128 | } 129 | } 130 | } 131 | } 132 | 133 | class DivTester extends AnyFlatSpec with ChiselScalatestTester with DivTestFunc { 134 | "SRT16Divider" should "pass" in { 135 | // test(new SRT16DividerRef(true)) { dut => 136 | // println("Testing SRT16Divider") 137 | // getRef(dut) 138 | // } 139 | test(new SRT16Divider()) { dut => 140 | println("Testing SRT16Divider") 141 | testFn(dut) 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/main/scala/tlb/ITLB.scala: -------------------------------------------------------------------------------- 1 | package iFu.tlb 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | 9 | class ITLBReq extends CoreBundle { 10 | val vaddr = UInt(vaddrBits.W) 11 | } 12 | 13 | // PIF PPI ADEF TLBR 14 | class ITLBException extends CoreBundle { 15 | val xcpt_cause = UInt(CauseCode.microCauseBits.W) 16 | } 17 | 18 | class ITLBResp extends CoreBundle { 19 | val paddr = UInt(paddrBits.W) 20 | val exception = Valid(new ITLBException) 21 | } 22 | 23 | class ITLBIO extends CoreBundle { 24 | val itlb_csr_cxt = Input(new TLBCsrContext) 25 | val req = Flipped(Valid(new ITLBReq)) 26 | val resp = Output(new ITLBResp) 27 | val r_req = Valid(new TLBDataRReq) 28 | val r_resp = Flipped(Valid(new TLBDataRResp)) 29 | } 30 | 31 | class ITLB(num_l0_itlb_entries: Int = 2) extends CoreModule with L0TLBState { 32 | require(isPow2(num_l0_itlb_entries)) 33 | 34 | val io = IO(new ITLBIO) 35 | 36 | val state = RegInit(s_ready) 37 | val state_nxt = WireInit(state) 38 | state := state_nxt 39 | 40 | // L0 ITLB 41 | val l0_entry = RegInit(VecInit( 42 | Seq.fill(num_l0_itlb_entries)(0.U.asTypeOf(new L0ITLBEntry)) 43 | )) 44 | 45 | val csr_regs = WireInit(io.itlb_csr_cxt) 46 | // csr_regs.da_mode := io.itlb_csr_cxt.da_mode 47 | // csr_regs.pg_mode := io.itlb_csr_cxt.pg_mode 48 | 49 | val vaddr = io.req.bits.vaddr 50 | val l0_hit_oh = VecInit(l0_entry.map( 51 | e => e.entry.matches(vaddr(vaddrBits - 1, 13), (csr_regs.asid_asid)) 52 | )) 53 | val l0_hit = l0_hit_oh.asUInt.orR 54 | val l0_hit_idx = OHToUInt(l0_hit_oh) 55 | val l0_hit_entry = l0_entry(l0_hit_idx) 56 | 57 | 58 | val dmw0_en = ( 59 | (csr_regs.dmw0_plv0 && csr_regs.crmd_plv === 0.U) || 60 | (csr_regs.dmw0_plv3 && csr_regs.crmd_plv === 3.U) 61 | ) && (vaddr(31, 29) === (csr_regs.dmw0_vseg)) 62 | val dmw1_en = ( 63 | (csr_regs.dmw1_plv0 && csr_regs.crmd_plv === 0.U) || 64 | (csr_regs.dmw1_plv3 && csr_regs.crmd_plv === 3.U) 65 | ) && (vaddr(31, 29) === (csr_regs.dmw1_vseg)) 66 | if (!FPGAPlatform) dontTouch(dmw0_en) 67 | if (!FPGAPlatform) dontTouch(dmw1_en) 68 | 69 | // addr translation 70 | val use_page_table = WireInit(false.B) 71 | io.resp := 0.U.asTypeOf(new ITLBResp) 72 | when (vaddr(1, 0) =/= 0.U) { 73 | io.resp.exception.valid := true.B 74 | io.resp.exception.bits.xcpt_cause := CauseCode.ADEF 75 | } .elsewhen (csr_regs.da_mode) { 76 | io.resp.paddr := vaddr 77 | } .elsewhen (csr_regs.pg_mode) { 78 | when (dmw0_en || dmw1_en) { 79 | io.resp.paddr := Cat( 80 | Mux(dmw0_en, (csr_regs.dmw0_pseg), (csr_regs.dmw1_pseg)), vaddr(28, 0) 81 | ) 82 | io.resp.exception.valid := false.B 83 | } .otherwise { 84 | use_page_table := true.B 85 | val entry = l0_hit_entry.entry 86 | val odd_even_page = Mux(entry.meta.ps === 12.U, vaddr(12), vaddr(21)) 87 | val data = entry.data(odd_even_page) 88 | switch (state) { 89 | is (s_ready) { 90 | when (!l0_hit) { 91 | state_nxt := Mux(io.req.valid, s_refill, s_ready) 92 | io.resp.exception.valid := true.B 93 | io.resp.exception.bits.xcpt_cause := CauseCode.MINI_EXCEPTION_L0TLB_MISS 94 | } .elsewhen (!l0_hit_entry.exist) { 95 | io.resp.exception.valid := true.B 96 | io.resp.exception.bits.xcpt_cause := CauseCode.TLBR 97 | } .otherwise { 98 | when (!data.v) { 99 | io.resp.exception.valid := true.B 100 | io.resp.exception.bits.xcpt_cause := CauseCode.PIF 101 | } .elsewhen((csr_regs.crmd_plv) > data.plv) { 102 | io.resp.exception.valid := true.B 103 | io.resp.exception.bits.xcpt_cause := CauseCode.PPI 104 | } 105 | } 106 | } 107 | is (s_refill) { 108 | io.resp.exception.valid := true.B 109 | io.resp.exception.bits.xcpt_cause := CauseCode.MINI_EXCEPTION_L0TLB_MISS 110 | } 111 | } 112 | io.resp.paddr := Mux( 113 | entry.meta.ps === 12.U, 114 | Cat(data.ppn, vaddr(11, 0)), 115 | Cat(data.ppn(paddrBits - 13, 9), vaddr(20, 0)) 116 | ) 117 | } 118 | } 119 | 120 | // access L1 TLB 121 | io.r_req.valid := RegNext(io.req.valid && !l0_hit) 122 | io.r_req.bits.vaddr := RegNext(vaddr) 123 | val r_resp = RegNext(io.r_resp) 124 | 125 | val refill_vppn = RegNext(RegNext(RegNext(vaddr(vaddrBits - 1, 13)))) 126 | val refill_en = RegNext(RegNext(RegNext(io.req.valid && !l0_hit && use_page_table))) && (state === s_refill) 127 | val refill_idx = RegInit(0.U(log2Ceil(num_l0_itlb_entries).W)) 128 | refill_idx := refill_idx + refill_en 129 | if (!FPGAPlatform) dontTouch(refill_idx) 130 | 131 | when (refill_en) { 132 | when (r_resp.valid) { 133 | l0_entry(refill_idx) := Mux( 134 | r_resp.bits.found, 135 | L0ITLBEntry.new_entry(r_resp.bits.entry), 136 | L0ITLBEntry.fake_entry(refill_vppn, (csr_regs.asid_asid)) 137 | ) 138 | } 139 | state_nxt := s_ready 140 | } 141 | when ((csr_regs.inv_l0_tlb)) { 142 | l0_entry map { e => e.entry.meta.e := false.B } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/main/scala/backend/issue/IssueSlot.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | import iFu.util._ 9 | 10 | class IssueSlotIO(val numWakeupPorts: Int) extends CoreBundle { 11 | val valid = Output(Bool()) 12 | val willBeValid = Output(Bool()) 13 | 14 | val request = Output(Bool()) 15 | val grant = Input(Bool()) 16 | 17 | val brUpdate = Input(new BrUpdateInfo) 18 | val kill = Input(Bool()) 19 | val clear = Input(Bool()) 20 | val ldSpecMiss = Input(Bool()) 21 | 22 | val wakeupPorts = Vec(numWakeupPorts, Flipped(Valid(new IssueWakeup(pregSz)))) 23 | val specLdWakeupPorts = Vec(memWidth, Flipped(Valid(UInt(pregSz.W)))) 24 | 25 | val inUop = Flipped(Valid(new MicroOp)) 26 | val outUop = Output(new MicroOp) // passed to next slot uop 27 | val uop = Output(new MicroOp) // issued uop 28 | } 29 | 30 | class IssueSlot(val numWakeupPorts: Int) extends CoreModule with IssueState { 31 | val io = IO(new IssueSlotIO(numWakeupPorts)) 32 | 33 | val state = RegInit(s_invalid) 34 | val next_state = WireInit(state) 35 | when (io.kill) { 36 | state := s_invalid 37 | } .elsewhen (io.inUop.valid) { 38 | state := io.inUop.bits.iwState 39 | } .elsewhen (io.clear) { 40 | state := s_invalid 41 | } .otherwise { 42 | state := next_state 43 | } 44 | 45 | val slot_uop = Reg(new MicroOp) 46 | val next_uop = WireInit(UpdateBrMask(io.brUpdate, slot_uop)) 47 | when (io.inUop.valid) { 48 | slot_uop := io.inUop.bits 49 | } .otherwise { 50 | slot_uop := next_uop 51 | } 52 | 53 | io.valid := isValid(state) 54 | io.outUop := next_uop 55 | 56 | val p1 = RegInit(false.B) 57 | val p2 = RegInit(false.B) 58 | val p1_poisoned = RegInit(false.B) 59 | val p2_poisoned = RegInit(false.B) 60 | 61 | next_uop.iwState := next_state 62 | next_uop.prs1_busy := !p1 63 | next_uop.prs2_busy := !p2 64 | next_uop.iw_p1_poisoned := p1_poisoned 65 | next_uop.iw_p2_poisoned := p2_poisoned 66 | 67 | val killed = IsKilledByBranch(io.brUpdate, slot_uop) 68 | when (io.kill || killed) { 69 | next_state := s_invalid 70 | } .elsewhen (io.grant && (state === s_valid_1)) { 71 | when (!(io.ldSpecMiss && (p1_poisoned || p2_poisoned))) { 72 | next_state := s_invalid 73 | } 74 | } .elsewhen (io.grant && (state === s_valid_2)) { 75 | when (!(io.ldSpecMiss && (p1_poisoned || p2_poisoned))) { 76 | next_state := s_valid_1 77 | when (p1) { 78 | next_uop.uopc := uopSTD 79 | next_uop.lrs1_rtype := RT_X 80 | } .otherwise { 81 | next_uop.lrs2_rtype := RT_X 82 | } 83 | } 84 | } 85 | 86 | when (io.inUop.valid) { 87 | p1 := !io.inUop.bits.prs1_busy 88 | p2 := !io.inUop.bits.prs2_busy 89 | } 90 | p1_poisoned := false.B 91 | p2_poisoned := false.B 92 | 93 | val in_uop = Mux(io.inUop.valid, io.inUop.bits, slot_uop) 94 | val in_uop_p1_poisoned = Mux( 95 | io.inUop.valid, io.inUop.bits.iw_p1_poisoned, p1_poisoned 96 | ) 97 | val in_uop_p2_poisoned = Mux( 98 | io.inUop.valid, io.inUop.bits.iw_p2_poisoned, p2_poisoned 99 | ) 100 | when (io.ldSpecMiss && in_uop_p1_poisoned) { p1 := false.B } 101 | when (io.ldSpecMiss && in_uop_p2_poisoned) { p2 := false.B } 102 | 103 | val prs1_matches = io.wakeupPorts.map { 104 | w => w.bits.pdst === in_uop.prs1 105 | } 106 | val prs2_matches = io.wakeupPorts.map { 107 | w => w.bits.pdst === in_uop.prs2 108 | } 109 | val prs1_specmatchs = io.specLdWakeupPorts.map { 110 | w => w.bits === in_uop.prs1 && in_uop.lrs1_rtype === RT_FIX 111 | } 112 | val prs2_specmatchs = io.specLdWakeupPorts.map { 113 | w => w.bits === in_uop.prs2 && in_uop.lrs2_rtype === RT_FIX 114 | } 115 | val prs1_wakeups = (io.wakeupPorts zip prs1_matches).map { 116 | case (w, m) => w.valid && m 117 | } 118 | val prs2_wakeups = (io.wakeupPorts zip prs2_matches).map { 119 | case (w, m) => w.valid && m 120 | } 121 | val prs1_specwakeups = (io.specLdWakeupPorts zip prs1_specmatchs).map { 122 | case (w, m) => w.valid && m 123 | } 124 | val prs2_specwakeups = (io.specLdWakeupPorts zip prs2_specmatchs).map { 125 | case (w, m) => w.valid && m 126 | } 127 | val prs1_normalwakeup = prs1_wakeups.reduce(_||_) 128 | val prs2_normalwakeup = prs2_wakeups.reduce(_||_) 129 | val prs1_specwakeup = prs1_specwakeups.reduce(_||_) 130 | val prs2_specwakeup = prs2_specwakeups.reduce(_||_) 131 | when (prs1_normalwakeup || prs1_specwakeup) { p1 := true.B } 132 | when (prs2_normalwakeup || prs2_specwakeup) { p2 := true.B } 133 | when (prs1_specwakeup) { p1_poisoned := true.B } 134 | when (prs2_specwakeup) { p2_poisoned := true.B } 135 | 136 | when (state === s_valid_1) { 137 | io.request := p1 && p2 && !io.kill 138 | } .elsewhen (state === s_valid_2) { 139 | io.request := (p1 || p2) && !io.kill 140 | } .otherwise { 141 | io.request := false.B 142 | } 143 | 144 | io.uop := slot_uop 145 | io.uop.iw_p1_poisoned := p1_poisoned 146 | io.uop.iw_p2_poisoned := p2_poisoned 147 | when (state === s_valid_2) { 148 | when (p1) { 149 | io.uop.lrs2_rtype := RT_X 150 | } .elsewhen (p2) { 151 | io.uop.uopc := uopSTD 152 | io.uop.lrs1_rtype := RT_X 153 | } 154 | } 155 | 156 | val squash_grant = io.ldSpecMiss && (p1_poisoned || p2_poisoned) 157 | io.willBeValid := isValid(state) && !(io.grant && (state === s_valid_1) && !squash_grant) 158 | } 159 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/IndexAllocator.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common._ 6 | import iFu.common.Consts._ 7 | import iFu.common.CauseCode._ 8 | import iFu.tlb._ 9 | import iFu.util._ 10 | 11 | class IndexAllocator extends CoreModule { 12 | val stqAddrSz = lsuParameters.stqAddrSz 13 | val ldqAddrSz = lsuParameters.ldqAddrSz 14 | val numStqEntries = lsuParameters.numSTQEntries 15 | val numLdqEntries = lsuParameters.numLDQEntries 16 | val io = IO(new CoreBundle { 17 | val dis_uops = Input(Vec(coreWidth, Valid(new MicroOp))) 18 | val old_ldq_tail = Input(UInt(ldqAddrSz.W)) 19 | val old_stq_tail = Input(UInt(stqAddrSz.W)) 20 | val ldq_enq_idxs = Output(Vec(coreWidth, UInt(ldqAddrSz.W))) 21 | val stq_enq_idxs = Output(Vec(coreWidth, UInt(stqAddrSz.W))) 22 | val new_ldq_tail = Output(UInt(ldqAddrSz.W)) 23 | val new_stq_tail = Output(UInt(stqAddrSz.W)) 24 | }) 25 | val dis_ld_valids = VecInit(io.dis_uops.map(x=>x.valid && x.bits.use_ldq && !x.bits.xcpt_valid)) 26 | val dis_st_valids = VecInit(io.dis_uops.map(x=>x.valid && x.bits.use_stq && !x.bits.xcpt_valid)) 27 | // assert(coreWidth == 4 , "coreWidth must be 4") 28 | io.ldq_enq_idxs := 0.U.asTypeOf(Vec(coreWidth, UInt(ldqAddrSz.W))) 29 | io.stq_enq_idxs := 0.U.asTypeOf(Vec(coreWidth, UInt(stqAddrSz.W))) 30 | io.new_ldq_tail := 0.U 31 | io.new_stq_tail := 0.U 32 | 33 | val ldq_tail_add_1 = WrapAdd(io.old_ldq_tail, 1.U, numLdqEntries) 34 | val ldq_tail_add_2 = WrapAdd(io.old_ldq_tail, 2.U, numLdqEntries) 35 | val ldq_tail_add_3 = WrapAdd(io.old_ldq_tail, 3.U, numLdqEntries) 36 | 37 | val stq_tail_add_1 = WrapAdd(io.old_stq_tail, 1.U, numStqEntries) 38 | val stq_tail_add_2 = WrapAdd(io.old_stq_tail, 2.U, numStqEntries) 39 | val stq_tail_add_3 = WrapAdd(io.old_stq_tail, 3.U, numStqEntries) 40 | 41 | assert(coreWidth == 3 , "coreWidth must be 3") 42 | switch(dis_ld_valids.asUInt){ 43 | is("b000".U){ 44 | io.ldq_enq_idxs(0) := io.old_ldq_tail 45 | io.ldq_enq_idxs(1) := io.old_ldq_tail 46 | io.ldq_enq_idxs(2) := io.old_ldq_tail 47 | io.new_ldq_tail := io.old_ldq_tail 48 | } 49 | is("b001".U){ 50 | io.ldq_enq_idxs(0) := io.old_ldq_tail 51 | io.ldq_enq_idxs(1) := ldq_tail_add_1 52 | io.ldq_enq_idxs(2) := ldq_tail_add_1 53 | io.new_ldq_tail := ldq_tail_add_1 54 | } 55 | is("b010".U){ 56 | io.ldq_enq_idxs(0) := io.old_ldq_tail 57 | io.ldq_enq_idxs(1) := io.old_ldq_tail 58 | io.ldq_enq_idxs(2) := ldq_tail_add_1 59 | io.new_ldq_tail := ldq_tail_add_1 60 | } 61 | is("b011".U){ 62 | io.ldq_enq_idxs(0) := io.old_ldq_tail 63 | io.ldq_enq_idxs(1) := ldq_tail_add_1 64 | io.ldq_enq_idxs(2) := ldq_tail_add_2 65 | io.new_ldq_tail := ldq_tail_add_2 66 | } 67 | is("b100".U){ 68 | io.ldq_enq_idxs(0) := io.old_ldq_tail 69 | io.ldq_enq_idxs(1) := io.old_ldq_tail 70 | io.ldq_enq_idxs(2) := io.old_ldq_tail 71 | io.new_ldq_tail := ldq_tail_add_1 72 | } 73 | is("b101".U){ 74 | io.ldq_enq_idxs(0) := io.old_ldq_tail 75 | io.ldq_enq_idxs(1) := ldq_tail_add_1 76 | io.ldq_enq_idxs(2) := ldq_tail_add_1 77 | io.new_ldq_tail := ldq_tail_add_2 78 | } 79 | is("b110".U){ 80 | io.ldq_enq_idxs(0) := io.old_ldq_tail 81 | io.ldq_enq_idxs(1) := io.old_ldq_tail 82 | io.ldq_enq_idxs(2) := ldq_tail_add_1 83 | io.new_ldq_tail := ldq_tail_add_2 84 | } 85 | is("b111".U){ 86 | io.ldq_enq_idxs(0) := io.old_ldq_tail 87 | io.ldq_enq_idxs(1) := ldq_tail_add_1 88 | io.ldq_enq_idxs(2) := ldq_tail_add_2 89 | io.new_ldq_tail := ldq_tail_add_3 90 | } 91 | } 92 | 93 | switch(dis_st_valids.asUInt){ 94 | is("b000".U){ 95 | io.stq_enq_idxs(0) := io.old_stq_tail 96 | io.stq_enq_idxs(1) := io.old_stq_tail 97 | io.stq_enq_idxs(2) := io.old_stq_tail 98 | io.new_stq_tail := io.old_stq_tail 99 | } 100 | is("b001".U){ 101 | io.stq_enq_idxs(0) := io.old_stq_tail 102 | io.stq_enq_idxs(1) := stq_tail_add_1 103 | io.stq_enq_idxs(2) := stq_tail_add_1 104 | io.new_stq_tail := stq_tail_add_1 105 | } 106 | is("b010".U){ 107 | io.stq_enq_idxs(0) := io.old_stq_tail 108 | io.stq_enq_idxs(1) := io.old_stq_tail 109 | io.stq_enq_idxs(2) := stq_tail_add_1 110 | io.new_stq_tail := stq_tail_add_1 111 | } 112 | is("b011".U){ 113 | io.stq_enq_idxs(0) := io.old_stq_tail 114 | io.stq_enq_idxs(1) := stq_tail_add_1 115 | io.stq_enq_idxs(2) := stq_tail_add_2 116 | io.new_stq_tail := stq_tail_add_2 117 | } 118 | is("b100".U){ 119 | io.stq_enq_idxs(0) := io.old_stq_tail 120 | io.stq_enq_idxs(1) := io.old_stq_tail 121 | io.stq_enq_idxs(2) := io.old_stq_tail 122 | io.new_stq_tail := stq_tail_add_1 123 | } 124 | is("b101".U){ 125 | io.stq_enq_idxs(0) := io.old_stq_tail 126 | io.stq_enq_idxs(1) := stq_tail_add_1 127 | io.stq_enq_idxs(2) := stq_tail_add_1 128 | io.new_stq_tail := stq_tail_add_2 129 | } 130 | is("b110".U){ 131 | io.stq_enq_idxs(0) := io.old_stq_tail 132 | io.stq_enq_idxs(1) := io.old_stq_tail 133 | io.stq_enq_idxs(2) := stq_tail_add_1 134 | io.new_stq_tail := stq_tail_add_2 135 | } 136 | is("b111".U){ 137 | io.stq_enq_idxs(0) := io.old_stq_tail 138 | io.stq_enq_idxs(1) := stq_tail_add_1 139 | io.stq_enq_idxs(2) := stq_tail_add_2 140 | io.new_stq_tail := stq_tail_add_3 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/main/scala/backend/issue/IssueUnit.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | 9 | trait IssueState { 10 | // s_invalid -> no valid instruction 11 | // s_valid_1 -> normal valid instruction 12 | // s_valid_2 -> store-like instruction 13 | val s_invalid :: s_valid_1 :: s_valid_2 :: Nil = Enum(3) 14 | 15 | def isValid(s: UInt): Bool = s =/= s_invalid 16 | } 17 | 18 | class IssueWakeup(val pregSz: Int) extends Bundle { 19 | val pdst = UInt(pregSz.W) // physical destination register 20 | } 21 | 22 | class IssueUnitIO ( 23 | val dispatchWidth: Int, 24 | val numWakeupPorts: Int, 25 | val issueWidth: Int 26 | ) extends CoreBundle { 27 | val disUops = Vec(dispatchWidth, Flipped(Decoupled(new MicroOp))) 28 | 29 | // normal wakeup 30 | val wakeupPorts = Vec(numWakeupPorts, Flipped(Valid(new IssueWakeup(pregSz)))) 31 | // speculative load wakeup 32 | val specLdWakeupPorts = Vec(memWidth, Flipped(Valid(UInt(pregSz.W)))) 33 | val ldMiss = Input(Bool()) // load miss happened 34 | 35 | val fuTypes = Input(Vec(issueWidth, Bits(FUC_SZ.W))) 36 | 37 | val brUpdate = Input(new BrUpdateInfo) 38 | val flushPipeline = Input(Bool()) 39 | 40 | val issueValids = Output(Vec(issueWidth, Bool())) 41 | val issueUops = Output(Vec(issueWidth, new MicroOp)) 42 | } 43 | 44 | class IssueUnit ( 45 | issParams: IssueParams, 46 | numWakeupPorts: Int 47 | ) extends CoreModule with IssueState { 48 | val numIssueSlots = issParams.numIssueSlots 49 | val dispatchWidth = issParams.dispatchWidth 50 | val issueWidth = issParams.issueWidth 51 | 52 | val io = IO(new IssueUnitIO(dispatchWidth, numWakeupPorts, issueWidth)) 53 | 54 | val disUops = Wire(Vec(dispatchWidth, new MicroOp)) 55 | 56 | for (w <- 0 until dispatchWidth) { 57 | disUops(w) := io.disUops(w).bits 58 | disUops(w).iwState := s_valid_1 59 | disUops(w).iw_p1_poisoned := false.B 60 | disUops(w).iw_p2_poisoned := false.B 61 | 62 | when ( 63 | (io.disUops(w).bits.uopc === uopSTA) || 64 | (io.disUops(w).bits.uopc === uopSC_AG) 65 | ) { 66 | disUops(w).iwState := s_valid_2 67 | } 68 | } 69 | 70 | val slots = Seq.fill(numIssueSlots) { Module(new IssueSlot(numWakeupPorts)) } 71 | val issueSlots = VecInit(slots.map(_.io)) 72 | 73 | issueSlots.foreach { slot => 74 | slot.wakeupPorts := io.wakeupPorts 75 | slot.specLdWakeupPorts := io.specLdWakeupPorts 76 | slot.ldSpecMiss := io.ldMiss 77 | slot.brUpdate := io.brUpdate 78 | slot.kill := io.flushPipeline 79 | } 80 | 81 | val maxShift = dispatchWidth 82 | val vacants = issueSlots.map(_.valid).map(!_.asBool) ++ io.disUops.map(_.valid).map(!_.asBool) 83 | 84 | def getShamtOH(countOH: UInt, inc: Bool): UInt = { 85 | val next = Wire(UInt(maxShift.W)) 86 | next := countOH 87 | when(countOH === 0.U && inc) { 88 | next := 1.U 89 | }.elsewhen(!countOH(maxShift - 1) && inc) { 90 | next := (countOH << 1.U) 91 | } 92 | next 93 | } 94 | 95 | val shamtOH = vacants.scanLeft(0.U)(getShamtOH) 96 | 97 | val willBeValid = issueSlots.map(_.willBeValid) ++ 98 | io.disUops.map(dis => dis.valid && !dis.bits.xcpt_valid && !dis.bits.is_ibar && !dis.bits.is_nop) 99 | 100 | val uops = issueSlots.map(_.outUop) ++ disUops.map(uop => uop) 101 | 102 | for (i <- 0 until numIssueSlots) { 103 | issueSlots(i).inUop.valid := false.B 104 | issueSlots(i).inUop.bits := uops(i + 1) 105 | 106 | for (j <- 1 to maxShift) { 107 | when (shamtOH(i + j) === (1 << (j - 1)).U) { 108 | issueSlots(i).inUop.valid := willBeValid(i + j) 109 | issueSlots(i).inUop.bits := uops(i + j) 110 | } 111 | } 112 | issueSlots(i).clear := shamtOH(i) =/= 0.U 113 | } 114 | 115 | val willBeAvailable = issueSlots.map{ i => (!i.willBeValid || i.clear) && !i.inUop.valid } 116 | val numAvailable = PopCount(willBeAvailable) 117 | io.disUops.zipWithIndex.foreach { case (uop, idx) => uop.ready := RegNext(numAvailable > idx.U) } 118 | 119 | for (w <- 0 until issueWidth) { 120 | io.issueValids(w) := false.B 121 | io.issueUops(w) := NullMicroOp 122 | io.issueUops(w).prs1 := 0.U 123 | io.issueUops(w).prs2 := 0.U 124 | io.issueUops(w).lrs1_rtype := RT_X 125 | io.issueUops(w).lrs2_rtype := RT_X 126 | } 127 | 128 | // io.issueValids.foreach { 129 | // _ := false.B 130 | // } 131 | // io.issueUops := DontCare 132 | 133 | // choose which uops to issue 134 | val requests = issueSlots.map(_.request) // get request from each slot 135 | val portIssued = Array.fill(issueWidth) { false.B } 136 | 137 | val iss_mask = Seq( 138 | Seq(true, true, true, true, true, true, true, false, true, false), 139 | Seq(true, true, true, true, true, true, false, true, false, true), 140 | Seq(true, true, true, false, false, false, false, false, true, true), 141 | ) 142 | 143 | for (i <- 0 until numIssueSlots) { // iterate through all slots 144 | issueSlots(i).grant := false.B 145 | var uopIssued = false.B 146 | 147 | for (w <- 0 until issueWidth) { 148 | val canAllocate = if (issParams.iqType == IQT_INT.litValue.toInt) { 149 | iss_mask(w)(i).B && (issueSlots(i).uop.fuCode & io.fuTypes(w)) =/= 0.U 150 | } else { 151 | true.B 152 | } 153 | when (canAllocate && requests(i) && !uopIssued && !portIssued(w)) { 154 | issueSlots(i).grant := true.B 155 | io.issueValids(w) := true.B 156 | io.issueUops(w) := issueSlots(i).uop 157 | } 158 | val wasPortIssuedYet = portIssued(w) 159 | portIssued(w) = (canAllocate && requests(i) && !uopIssued) | portIssued(w) 160 | uopIssued = (canAllocate && requests(i) && !wasPortIssuedYet) | uopIssued 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/WFU.scala: -------------------------------------------------------------------------------- 1 | package iFu.backend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.axi3._ 7 | import iFu.sma._ 8 | 9 | import iFu.common._ 10 | 11 | class WriteFetchUnit extends Module with HasDcacheParameters { 12 | val io = IO(new CoreBundle { 13 | // can wfu accept new request 14 | val ready = Output(Bool()) 15 | 16 | // request from MSHR or fence 17 | val req_valid = Input(Bool()) 18 | val req_addr = Input(UInt(xLen.W)) 19 | val req_wb_only = Input(Bool()) // only write back, no refill 20 | 21 | // meta response from Meta Array 22 | val meta_resp = Input(new DcacheMetaResp) 23 | 24 | // which way wfu is working on 25 | val pos = Output(UInt(log2Ceil(nWays).W)) 26 | 27 | // finish fetch, ready to replay request in MSHR 28 | val fetch_ready = Output(Bool()) 29 | val fetched_addr = Output(UInt(xLen.W)) 30 | // new meta line to be written 31 | val new_meta = Output(new MetaLine) 32 | 33 | // send to Meta Array and Data Array 34 | val wfu_read_req = Output(Valid(new DCacheReq)) 35 | val wfu_read_resp = Input(Valid(new DCacheResp)) 36 | val wfu_write_req = Output(Valid(new DCacheReq)) 37 | 38 | // clear a dirty line 39 | val line_clear_req = Output(Valid(new DCacheReq)) 40 | 41 | val smar = new SMAR 42 | val smaw = new SMAW 43 | }) 44 | 45 | // ready -> no thing to do, fetch -> fetch data from memory, wb -> write back dirty line to memory 46 | val ready :: fetch :: wb :: Nil = Enum(3) 47 | val state = RegInit(ready) 48 | 49 | // which position to write when receive data from Data Array 50 | val receive_head = RegInit(0.U(log2Ceil(nRowWords).W)) 51 | // which position to read when send data to memory 52 | val execute_head = RegInit(0.U(log2Ceil(nRowWords).W)) 53 | val tail = RegInit(0.U((log2Ceil(nRowWords) + 1).W))//宽度加1位,不然根本无法增大到nRowWords 54 | val refillIdx = RegInit(0.U(log2Ceil(nRowWords).W)) 55 | val dataLineBuffer = RegInit(0.U.asTypeOf(Vec(nRowWords, UInt(xLen.W)))) 56 | val replaceAddr = RegInit(0.U(xLen.W)) 57 | val fetchAddr = RegInit(0.U(xLen.W)) 58 | val wbOnly = RegInit(false.B) 59 | val getfirstWord = RegInit(false.B) 60 | val replaceWay = RegInit(0.U(log2Ceil(nWays).W)) 61 | 62 | io.ready := state === ready 63 | 64 | io.pos := replaceWay 65 | 66 | io.fetch_ready := false.B 67 | io.fetched_addr := fetchAddr 68 | 69 | io.new_meta.valid := true.B 70 | io.new_meta.tag := getTag(fetchAddr) 71 | io.new_meta.dirty := false.B 72 | io.new_meta.readOnly := false.B 73 | io.new_meta.fixed := false.B 74 | 75 | io.wfu_read_req := DontCare 76 | io.wfu_read_req.valid := false.B 77 | io.wfu_read_req.bits.addr := replaceAddr | (tail << 2.U).asUInt 78 | 79 | io.wfu_write_req := DontCare 80 | io.wfu_write_req.valid := false.B 81 | io.wfu_write_req.bits.addr := fetchAddr | (refillIdx << 2.U).asUInt 82 | io.wfu_write_req.bits.data := io.smar.resp.rdata 83 | 84 | io.line_clear_req := DontCare 85 | io.line_clear_req.valid := false.B 86 | io.line_clear_req.bits.addr := replaceAddr 87 | 88 | io.smar.req.arvalid := state === fetch 89 | io.smar.req.arlen := AXI3Parameters.MLEN16 90 | io.smar.req.arburst := AXI3Parameters.BURST_WRAP 91 | io.smar.req.arsize := AXI3Parameters.MSIZE4 92 | io.smar.req.araddr := fetchAddr 93 | 94 | io.smaw.req.awvalid := state === wb && getfirstWord 95 | io.smaw.req.wvalid := state === wb && getfirstWord 96 | io.smaw.req.wstrb := 0xf.U 97 | io.smaw.req.awlen := AXI3Parameters.MLEN16 98 | io.smaw.req.awburst := AXI3Parameters.BURST_WRAP 99 | io.smaw.req.awsize := AXI3Parameters.MSIZE4 100 | io.smaw.req.awaddr := replaceAddr 101 | io.smaw.req.wdata := dataLineBuffer(execute_head) 102 | io.smaw.req.wlast := execute_head === 0xf.U 103 | 104 | when (state === ready) { 105 | when (io.req_valid) { 106 | replaceWay := io.meta_resp.pos 107 | replaceAddr := Cat(io.meta_resp.rmeta.tag, io.meta_resp.idx, 0.U(nOffsetBits.W)) 108 | fetchAddr := Cat(io.req_addr >> nOffsetBits.U, 0.U(nOffsetBits.W)) 109 | wbOnly := io.req_wb_only 110 | 111 | // init 112 | receive_head := 0.U 113 | execute_head := 0.U 114 | tail := 0.U 115 | refillIdx := 0.U 116 | 117 | state := Mux(io.meta_resp.rmeta.valid && io.meta_resp.rmeta.dirty, wb, fetch) 118 | } 119 | } .elsewhen (state === fetch) { 120 | when (io.smar.resp.rvalid) { 121 | state := Mux(io.smar.resp.rlast, ready, fetch) 122 | 123 | // refill Meta Array and Data Array(when first word comes, it will invalidate the old meta) 124 | io.wfu_write_req.valid := true.B 125 | refillIdx := refillIdx + 1.U 126 | 127 | when (io.smar.resp.rlast) { // finish fetch 128 | io.fetch_ready := true.B 129 | } 130 | } 131 | } .elsewhen (state === wb) { 132 | // send a request to Data Array 133 | when (tail < nRowWords.U) { 134 | io.wfu_read_req.valid := true.B 135 | tail := tail + 1.U 136 | } 137 | // receive data from Data Array 138 | when (io.wfu_read_resp.valid) { 139 | getfirstWord := true.B 140 | dataLineBuffer(receive_head) := io.wfu_read_resp.bits.data 141 | receive_head := receive_head + 1.U 142 | } 143 | 144 | // write back to memory 145 | when (io.smaw.resp.wready) { 146 | state := Mux(!io.smaw.req.wlast, wb, 147 | Mux(wbOnly , ready, // do not fetch, only write back 148 | fetch)) 149 | 150 | execute_head := execute_head + 1.U 151 | 152 | when (io.smaw.req.wlast) { // finish write back 153 | when (wbOnly) { 154 | // clear a dirty line 155 | io.line_clear_req.valid := true.B 156 | } 157 | // reset 158 | getfirstWord := false.B 159 | } 160 | } 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/main/scala/frontend/bpu/BranchPredictor.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.frontend.FrontendUtils._ 8 | 9 | class PredictionInfo extends Bundle with HasBPUParameters { 10 | val predicted_target = Valid(UInt(targetSz.W)) 11 | // 给f3用的 12 | val takens = Output(Vec(fetchWidth, Bool())) 13 | val tgts = Vec(fetchWidth, Valid(UInt(targetSz.W))) 14 | } 15 | 16 | class PredictionMeta extends Bundle with HasBPUParameters{ 17 | val bimMeta = Output(new BIMPredictMeta) 18 | val btbMeta = Output(new BTBPredictMeta) 19 | val uBTBMeta = Output(new UBTBPredictMeta) 20 | val localHistoryMeta = Output(new LocalHistoryPredictMeta) 21 | } 22 | 23 | class BranchPredictionBundle extends Bundle with HasBPUParameters{ 24 | val pc = UInt(vaddrBits.W) 25 | val predInfos = new PredictionInfo 26 | val meta = Vec(fetchWidth, new PredictionMeta) 27 | } 28 | 29 | class BranchPredictionUpdate extends Bundle with HasBPUParameters { 30 | val pc = UInt(vaddrBits.W) 31 | // Mask of instructions which are branches. 32 | // If these are not cfiIdx, then they were predicted not taken 33 | val brMask = UInt(fetchWidth.W) 34 | // Which CFI was taken/mispredicted (if any) 35 | val cfiIdx = Valid(UInt(log2Ceil(fetchWidth).W)) 36 | // Was the cfi taken? 37 | val cfiTaken = Bool() 38 | // Was the cfi mispredicted from the original prediction? 39 | val cfiMispredicted = Bool() 40 | // Was the cfi a br? 41 | val cfiIsBr = Bool() 42 | // Was the cfi a jal/jalr? 43 | val cfiIsJal = Bool() 44 | // Was the cfi a jalr 45 | val cfiIsJalr = Bool() 46 | 47 | // What did this CFI jump to? 48 | val target = UInt(vaddrBits.W) 49 | 50 | val meta = Vec(fetchWidth, new PredictionMeta) 51 | } 52 | 53 | class BranchPredictionRequest extends CoreBundle { 54 | val pc = UInt(vaddrBits.W) 55 | } 56 | 57 | class BranchPredictor extends Module with HasBPUParameters { 58 | val io = IO(new Bundle { 59 | val f0req = Flipped(Valid(new BranchPredictionRequest)) 60 | val resp = Output(new Bundle{ 61 | val f1 = new BranchPredictionBundle 62 | val f2 = new BranchPredictionBundle 63 | val f3 = new BranchPredictionBundle 64 | }) 65 | val update = Input(Valid(new BranchPredictionUpdate)) 66 | }) 67 | 68 | val s0valid = io.f0req.valid 69 | val s1valid = RegNext(s0valid) 70 | val s2valid = RegNext(s1valid) 71 | val s3valid = RegNext(s2valid) 72 | 73 | val s0mask = fetchMask(io.f0req.bits.pc) 74 | val s1mask = RegNext(s0mask) 75 | val s2mask = RegNext(s1mask) 76 | val s3mask = RegNext(s2mask) 77 | 78 | val s0pc = io.f0req.bits.pc 79 | val s0_mixed_pc = mixHILO(io.f0req.bits.pc) 80 | val s1pc = RegNext(s0pc) 81 | val s1_mixed_pc = RegNext(s0_mixed_pc) 82 | val s2pc = RegNext(s1pc) 83 | val s3pc = RegNext(s2pc) 84 | 85 | val s0update = io.update 86 | val s1update = RegNext(s0update) 87 | 88 | val faubtb = Module(new FaUBtbPredictior) 89 | val btb = Module(new BTBPredictor) 90 | val bim = Module(new BimPredictor) 91 | val lh = Module(new LocalHistoryPredictor) 92 | 93 | faubtb.io.s1update := s1update 94 | btb.io.s1update := s1update 95 | bim.io.s1update := s1update 96 | lh.io.s1update := s1update 97 | 98 | // 基本的pc和使能位 99 | faubtb.io.s1valid := s1valid 100 | faubtb.io.s1pc := s1pc 101 | faubtb.io.s1_mixed_pc := s1_mixed_pc 102 | btb.io.s0valid := s0valid 103 | btb.io.s0pc := s0pc 104 | btb.io.s0_mixed_pc := s0_mixed_pc 105 | bim.io.s0valid := s0valid 106 | bim.io.s0pc := s0pc 107 | lh.io.s0pc := s0pc 108 | 109 | // f1接收faubtb输出结果 110 | val f1_valid_instr_mask = fetchMask(s1pc) 111 | val s1jumpvalid = WireInit(VecInit(Seq.fill(fetchWidth)(false.B))) 112 | io.resp.f1.predInfos := 0.U.asTypeOf(new PredictionInfo) 113 | // 覆盖顺序是倒着,从高位到低位 114 | for (w <- (0 until fetchWidth).reverse) { 115 | s1jumpvalid(w) := s1valid && f1_valid_instr_mask(w) && faubtb.io.s1targs(w).valid && ((faubtb.io.s1br(w) && faubtb.io.s1taken(w)) || faubtb.io.s1jal(w)) 116 | when(s1jumpvalid(w)){ 117 | io.resp.f1.predInfos.predicted_target := faubtb.io.s1targs(w) 118 | } 119 | } 120 | io.resp.f1.predInfos.takens := s1jumpvalid 121 | io.resp.f1.predInfos.tgts := faubtb.io.s1targs 122 | 123 | // f2以f1为基础,接收btb,bim的输出结果 124 | io.resp.f2.predInfos := RegNext(io.resp.f1.predInfos) 125 | 126 | val f2_valid_instr_mask = fetchMask(s2pc) 127 | val s2jumpvalid = WireInit(VecInit(Seq.fill(fetchWidth)(false.B))) 128 | // 覆盖顺序是倒着,从高位到低位 129 | for (w <- (0 until fetchWidth).reverse) { 130 | // bim预测taken(不存在命不命中的说法)覆盖f2的初值 131 | // val pred_taken = Mux(lh.io.s2taken(w).valid, lh.io.s2taken(w).bits, bim.io.s2taken(w)) || btb.io.s2taken(w) 132 | val pred_taken = bim.io.s2taken(w) || btb.io.s2taken(w) 133 | s2jumpvalid(w) := f2_valid_instr_mask(w) && s2valid && btb.io.s2targs(w).valid && (btb.io.s2br(w) && pred_taken || btb.io.s2jal(w)) 134 | // io.resp.f2.predInfos(w).taken := bim.io.s2taken(w) 135 | // 对于btb,当且仅当命中,结果的valid有效,才会把对应的结果覆盖f2的初值 136 | when (s2jumpvalid(w)) { 137 | io.resp.f2.predInfos.predicted_target := btb.io.s2targs(w) 138 | } 139 | } 140 | io.resp.f2.predInfos.takens := s2jumpvalid 141 | io.resp.f2.predInfos.tgts := btb.io.s2targs 142 | 143 | // f3以f2为基础,接收tage的输出结果 144 | io.resp.f3.predInfos := RegNext(io.resp.f2.predInfos) 145 | for (w <- 0 until fetchWidth) { 146 | io.resp.f3.predInfos.takens(w) := RegNext( 147 | Mux( 148 | lh.io.s2_high_taken(w).valid, 149 | lh.io.s2_high_taken(w).bits, 150 | io.resp.f2.predInfos.takens(w) 151 | ) 152 | ) 153 | } 154 | 155 | 156 | io.resp.f1.pc := RegNext(io.f0req.bits.pc) 157 | io.resp.f2.pc := RegNext(io.resp.f1.pc) 158 | io.resp.f3.pc := RegNext(io.resp.f2.pc) 159 | 160 | io.resp.f1.meta := DontCare 161 | io.resp.f2.meta := DontCare 162 | for (w <- 0 until fetchWidth) { 163 | io.resp.f3.meta(w).uBTBMeta := faubtb.io.s3meta(w) 164 | io.resp.f3.meta(w).localHistoryMeta := lh.io.s3meta(w) 165 | io.resp.f3.meta(w).bimMeta := bim.io.s3meta(w) 166 | io.resp.f3.meta(w).btbMeta := btb.io.s3meta(w) 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/doc.md: -------------------------------------------------------------------------------- 1 | ### NonBlockingDcache 2 | 非阻塞cache在这里负责处理lsu的请求,以及一些对于cache,总线资源的维护。 3 | 4 | ![cache](imgs/cache.jpg) 5 | 6 | 7 | #### 1. 内部模块组件 8 | - meta : 用于存储cache的meta信息,包括valid,tag,dirty等信息 9 | 10 | - data : 用于存储cache的数据信息,一维展平,可以优化时序 11 | 12 | - mshr :用于存储cache指令的miss信息,包括miss的地址,miss的状态等信息 13 | 14 | - wfu : Write&Fetch Unit,连接着总线,用于处理对脏cache行写回内存,以及从内存中取新的行到cache中 15 | 16 | - mmiou : 同样连接着总线,用于处理lsu发进来的mmio请求,包括读写 17 | 18 | 以及几个辅助模块 19 | - wordWrite:用于处理粒度写 20 | - MissArbiter:两个流水线发生了miss请求,仲裁谁放进mshr 21 | 22 | #### 2. 内部流水线 23 | 流水线有状态对应的任务是 24 | - lsu :普通的lsu访存请求(无mmio),包括读写。 25 | s0:接到请求,将idx送入meta进行命中的判断 26 | s1:接到hit与否,如果hit,就将命中后的pos送入data 27 | s2: 28 | 若命中,此时data会返回那个字,进行处理 29 | 如果是ld指令,就将数据resp回lsu 30 | 如果是st指令 31 | 如果mshr没有正在等待的st指令,就正常做,将数据按粒度处理之后写回data,然后将那一行meta的dirty标记为true 32 | 如果mshr有st指令,那就说明前面的st指令还在等待自己完成,后面的不要去做,按照storefailed的方式返回 33 | 若miss经过missAribiter的处理,将miss的请求存入mshr中,等待有机会发出replay 34 | 35 | - replayfind : 处理mshr指令的第一步,向wfu发请求 36 | s0:按miss的地址去meta里面寻一个可替换的行 37 | s1:meta将这个寻得的pos以及其他有关信息(要去fetch的自己的地址,以及可能写回内存的脏行的地址)送入wfu 38 | hai 39 | (此时metalogic会保留一个周期这个只读行信息,用于快速维持只读行的状态) 40 | s2:这个时候将那个行设置成readOnly 41 | 42 | - wb : wfu不存一整条cache行,而是"现用现取",wb代表wfu要从向内存写回的行那里拿一个字——因此一次对cache行的写回是需要16,32等周期数的 43 | s0:由于wfu里面已经得知了要写回内存的行以及具体想要的哪一个字,s0不需要进meta再寻地址,不做任何操作 44 | s1:将位置信息送入data 45 | s2:data读取到这个字,送给wfu 46 | 一个小机制: 47 | 如果这个字的偏移是那一行的第一个字,说明这个时候要正式开始写回了,将这个行对应meta的readOnly标记为true,此时可以服务ld,但是不可以服务st(标志是st指令即使命中也返回miss)。 48 | 49 | - refill: wfu执行fetch状态从内存中拿出的新行的某个字,写入cache 50 | s0:由于wfu已经得知了要写入的行的位置,s0不需要进meta再寻地址,但是这个时候如果fetchReady,就要先通告mshr取好的地址以及pos信息,使其可以激活对应的可能等待的请求 51 | 52 | s1:只需要按照指定的位置写回就好,因此也不需要读取data 53 | 54 | s2:将那个字写入data 55 | 一个特殊情况 56 | 如果是fetchReady的时候,也就是写好了最后一个字,这个时候将新的meta行写回meta,同时 57 | 58 | - replay: mshr中的miss请求被激活,重新发出来执行 59 | s0:由于已经知道对应的全部位置信息,因此不需要meta参与 60 | s1:将位置信息送入data 61 | s2:读或写操作,将返回的resp通过0号返回lsu即可. 62 | 63 | 64 | - mmioreq:mmio请求,lsu发送mmio的时候,保证两条流水线里面只有一条在处理mmio,另一条什么都不做 65 | s0:无 66 | s1:无 67 | s2:将mmio请求送入mmiou,mmiou开始处理 68 | 69 | - mmioresp:mmio回复,mmiou处理完毕,回复lsu 70 | s0:会从mmiou发出一条req,这条req里面的data和uop就是最后要送给resp的,这里以"req"的形式出现只是载体 71 | s1:无 72 | s2:将resp通过0号流水线送回给lsu 73 | 74 | #### 3. 一些细节 75 | - 和lsu的ready交流 76 | 3.1 io.lsu.req.ready的意思lsu一定会认为发送进来了,因此当一些情况不想让lsu发送进来的时候,要将ready置为false,这些主要包括一些内部事务,以及不满足条件的情况:1.lsu想发st指令,但刚刚发生了storeFailed,需要lsu的st队列调整之后重发.2.lsu想发送mmio.但是当前的axi总线繁忙. 77 | 78 | 79 | - 状态优先级 80 | wb和refill最高,因为wb一旦开始,必须保证从cache行读字的速度,比axi总线向内存写回要快,否则会出现数据混乱的情况. 81 | 然后是mmio的req和resp 82 | 再之后是replay,因为replay信号等了很久了,优先处理 83 | 再之后是replayfind,因为replayfind是为了给replay提供激活wfu功能的 84 | 再之后,在所有总线相关和处理miss指令之后,是普通的lsu请求 85 | 86 | - axi线的互斥 87 | 由于只有一条axi线,而可能用到axi的有wfu和mmiou,因此要记录axi的状态,如果其中一个单元在忙,或者流水线中s1s2有即将动用两个单元的请求,那么就标记为忙,不允许其他axi请求来. 88 | 89 | 90 | - kill机制 91 | iskilledbyBranch,会在s0,s1,s2来检查 92 | storeFailed,会在s0,s1检查 93 | lsu的s1_kill,会kill掉当前s1的请求 94 | 95 | - 分支预测处理 96 | 在每级流水线处理分支预测错误,随时准备判断kill的信号. 97 | 在每级流水线及时更新,GetNewBrMask 98 | 99 | --- 100 | 接下来是meta和data相关内容 101 | ### MetaLogic 102 | 处理meta逻辑,首先约定所有状态,都需要在s0发起读,s2和data一起发起写,否则会有复杂的冲突 103 | 读的case 104 | - lsu_R: lsu普通的读指令,拿着s1返回的metaSet来对tag比较,判断命中与否,特别地对于readOnly的行,如果是st指令,也会返回miss 105 | - replay_R:无具体工作 106 | - mshr_R: 替换算法找到路号,已经对应的被换掉的行,送给wfu自行判断需不需要藏位写回 107 | 108 | 写的case 109 | 全部都是将写的请求传递给meta数据即可 110 | 111 | ### Meta 112 | meta的数据,以Set存储,接收读写,实现了内部转发 113 | 存在一个写的机制:每一个字段(valid,dirty,tag等)都有valid和bits,只有对应位的valid为高,才能将bits的数值写进去. 114 | 115 | ### DataLogic 116 | 逻辑比较简单,无论读写都是直接将请求传递给data数据即可 117 | 118 | ### WordData 119 | 这里的data是一维的,会将idx,pos,和字偏移算出一个一维的idx,进行读或者写,单次读或写都是一个字 120 | 同样地也实现了内部转发. 121 | 122 | ### MSHR 123 | mshr,也就是Miss Status Handling Register,用于存储cache指令的miss信息,包括miss的地址,miss的状态等信息 124 | 125 | 组织形式是包括一表和二表的 126 | 127 | 里面最基本的成分是表项MSHREntry 128 | 其内部记录了: 129 | 1. 数据部分MSHRdata: 130 | 根据一表二表,同一个字段可能有不同的含义 131 | - valid:是否有效 132 | - id:如果是一表项,这是一表项的id,如果是二表项,这是它被唤醒所依赖的一表项的id 133 | - req DCacheReq,请求的内容 134 | - pos 该请求再次发出去会是replay,这个时候要读哪一行的位置信息已经确定了,注意的是,这个pos不是一上来就随着请求存完的,而是对应的那一行彻底取好之后,随着fetchReady信号一起送进来的信息。 135 | - waiting 该项是否在等待fetch 136 | - ready 该项是否已经fetch完毕,所有都准备好,随时可以发出去repaly 137 | 138 | 2. 逻辑部分: 139 | 主要包括 140 | - 一些判断的logic信号生成 141 | - 如果是一表项,会进行块地址和传入的新的miss请求的块地址作比较,将结果传出去做首次还是二次miss的判断,也会进行和取好的块地址作比较,将匹配结果传出去看是否满足唤醒逻辑 142 | - fire的时候写入的逻辑,在一表项,就更新完brmask之后正常写入,waiting为真,ready为假,二表项的时候,存在一种快速唤醒,写入的同一个周期标记该项被快速唤醒,然后下个周期直接变成ready状态 143 | - 一个小的状态机 144 | 对于二表项:可以接到外部的wakeUp信号,直接到达激活状态,同时这个时候一定是有什么东西取好了,这个时候接收fetchedpos信号,将这个pos信息存入数据部分 145 | 对于一表项:如果在等待状态,看外部fetched取好的那个周期,一旦判断出是自己代表的块地址取好了,转ready激活状态; 146 | 147 | 对于二表项:如果在激活状态,外面做一个fire也就是"接收",说明replay的东西被发出去了,自己就可以被清空了,等待下一次使用 148 | 149 | 对于一表项,清空则是通过reset信号实现的 150 | 151 | 整体架构在MSHRFile里面实现 152 | MSHRFile包括四个一表项和八个二表项 153 | 主要的操作逻辑是 154 | 1. 当一个miss请求能够fire进入的时候,流程是 155 | - 去所有一表项中先匹配valid的一表项里面存的块地址,看自己是不是FirstMiss 156 | - 如果是,就既写入一表项,也写入二表项 157 | - 如果不是,就直接找一个空的二表项,写入即可 158 | 159 | 这里涉及到一个机制,只有一表项发现自己被取好的那个周期,才会激活已经存入的二表项, 160 | 如果此时来了一个正要存入的请求并且是secondmiss,并且正好依赖的是这个下个周期就会被刷掉的一表项,显然会导致它再也不能被唤醒,那么最好的方法是判断出来这个情况,加入一个快速唤醒机制,在这个特殊的周期,二表项存入即激活,即可解决这个问题 161 | 162 | (5.20快速唤醒除了进去就激活之外,也要立即存储fetchedpos) 163 | 164 | 2. 自己内部有正在等待的一表项 165 | - 选出一个这样的表项,向外界留fetchable信号,让外界去取这个块地址(此时将引起Dcache的"取行"的流程) 166 | 167 | 168 | 169 | 3. 外面有某一行被取好的时候 170 | MSHR会收到io.fetchReady,与之一并进来的,是该行的pos信息,将pos信息给二表项,计算出这个被取好的是一表项的第几个,也就是firstFetchMatchway,将所有期待这个表项id的二表项都激活,ready,等待repaly出去 171 | 172 | 与此同时一表项这个周期就会被清除掉,等待下一次使用 173 | (因此也可见,一表项其实没有状态机的ready状态,在waiting被match到fetchReady的时候,下周期就直接被清除了) 174 | 175 | 4. 内部有被激活的二表项 176 | 向外面持续发出replay的valid信号,io.replay是个DecoupledIO,一旦外面接收到,就会将这个表项清除掉,等待下一次使用 177 | 178 | 接下来是wfu和mmiou相关内容 179 | ### WFU 180 | 分为ready :: fetch :: wb 三个状态 181 | - ready:等待状态,等待新的请求 182 | - fetch:正在从内存中取新的行到cache中,对外体现为refill 183 | - wb:正在向内存写回数据,对外体现为wb 184 | 185 | 使用一个buffer,有execute_head和receive_head 186 | 向内存写,要求必须收到字比写回的速度快,因此从cache读占据了优先级,决定了wb一定是最高优先级的 187 | 从内存读向cache写,wfu占主动权,什么时候总线得到一个字,什么时候发出一个refill请求 188 | 189 | ### MMIOU 190 | 和wfu差不多三个状态ready :: fetch :: wb 191 | mmio单元,用于处理lsu发进来的mmio请求,包括读写 192 | 发进来之后执行相应的axi读写 193 | 这里需要注意的是,实际上叫MMIOU不太好,所有uncachable的请求(不仅仅是mmio)都会被这个单元处理,mmio一定是一个字的粒度,但是一些uncachable的访存模式下面,st.b,st.h这种带着粒度的访存,也会被这个单元处理,因此这里要做axi线mask的粒度处理 194 | 195 | 196 | 在接下来是两个辅助小模块 197 | ### WordWrite 198 | 用于处理粒度写,传入那个字,然后按照粒度返回粒度写处理后的字 199 | 200 | ### MissArbiter 201 | 因为mshr一次最多存一个miss请求,因此对于两条流水线,需要做仲裁 202 | 这里模块化一下会比较清晰,简化了NonBlockingDcache那边的主逻辑 203 | 204 | 这里综合两条流水线的信息 205 | - 选择一个请求存入mshr 206 | - 根据两条流水线的情况决定最后的resp和nack信号 207 | - 如果st指令存不进去,返回对应的storeFailed信号 208 | 209 | 210 | 211 | 212 | ### 全局的机制 213 | #### 分支预测信息处理 214 | 被不停更新并检查是不是被kill了brmask的包括主阶段的s0,s1,s2,以及mshr的每个表项,如果有分支预测错误,会被kill掉,在主流水线体现为那个阶段对应valid信号为false,在mshr里面体现为那个表项被清除掉 -------------------------------------------------------------------------------- /src/main/scala/backend/memSystem/Dcache/Meta.scala: -------------------------------------------------------------------------------- 1 | 2 | package iFu.backend 3 | 4 | import chisel3._ 5 | import chisel3.util._ 6 | 7 | import iFu.common._ 8 | import iFu.common.Consts._ 9 | import iFu.util._ 10 | 11 | 12 | // 区分DcacheMetaResp 13 | class MetaResp extends CoreBundle with HasDcacheParameters{ 14 | val rmetaSet = Vec(nWays, new MetaLine) 15 | val dirtyIdx = UInt(nIdxBits.W) 16 | val dirtyPos = UInt(log2Ceil(nWays).W) 17 | } 18 | 19 | class MetaIO extends CoreBundle with HasDcacheParameters{ 20 | val req = Input(Valid(new DcacheMetaReq)) 21 | val resp = Output(Valid(new MetaResp)) 22 | } 23 | 24 | class DcacheMeta extends Module with HasDcacheParameters{ 25 | val io = IO(new CoreBundle{ 26 | val read = Vec( memWidth ,new MetaIO) 27 | val write = new MetaIO 28 | // 专线hasDirty 29 | val hasDirty = Output(Bool()) 30 | }) 31 | 32 | // val valids = RegInit(VecInit(Seq.fill(nSets)(VecInit(Seq.fill(nWays)(false.B))))) 33 | // val dirtys = RegInit(VecInit(Seq.fill(nSets)(VecInit(Seq.fill(nWays)(false.B))))) 34 | // val readOnlys = RegInit(VecInit(Seq.fill(nSets)(VecInit(Seq.fill(nWays)(false.B))))) 35 | // val fixeds = RegInit(VecInit(Seq.fill(nSets)(VecInit(Seq.fill(nWays)(false.B))))) 36 | val valids = RegInit(VecInit(Seq.fill(nSets)(0.U(nWays.W)))) 37 | val dirtys = RegInit(VecInit(Seq.fill(nSets)(0.U(nWays.W)))) 38 | val readOnlys = RegInit(VecInit(Seq.fill(nSets)(0.U(nWays.W)))) 39 | val fixeds = RegInit(VecInit(Seq.fill(nSets)(0.U(nWays.W)))) 40 | val tags = SyncReadMem(nSets, Vec(nWays, UInt(nTagBits.W))) 41 | 42 | // reset tags 43 | val reseting = RegInit(true.B) 44 | val resetIdx = RegInit(0.U(nIdxBits.W)) 45 | 46 | when (reseting) { 47 | when (resetIdx === (nSets - 1).U) { 48 | reseting := false.B 49 | } 50 | // tags.write(resetIdx, VecInit(Seq.fill(nWays)(0.U))) 51 | resetIdx := resetIdx + 1.U 52 | } 53 | 54 | // to rename 55 | val sethasValids = Wire(Vec(nSets, Bool())) 56 | for (i <- 0 until nSets) { 57 | sethasValids(i) := (valids(i)).orR 58 | } 59 | 60 | // preserve dirty position 61 | val dirtyIdx = RegInit(0.U(nIdxBits.W)) 62 | val dirtyPos = RegInit(0.U(log2Ceil(nWays).W)) 63 | //传递有脏位的信息 64 | io.hasDirty := sethasValids.asUInt.orR 65 | 66 | // read 67 | val rvalid = io.read.map( _.req.valid) 68 | val rreq = io.read.map( _.req.bits) 69 | val ridx = rreq.map( req => Mux(req.isFence, dirtyIdx, req.idx)) 70 | 71 | // if has fence_read , inc dirtyIdx and dirtyPos 72 | when (rreq.map(_.isFence).reduce(_ || _)) { 73 | dirtyPos := WrapInc(dirtyPos, nWays) 74 | dirtyIdx := Mux(dirtyPos === (nWays - 1).U, WrapInc(dirtyIdx, nSets), dirtyIdx) 75 | } 76 | 77 | val rtags = Wire(Vec(memWidth, Vec(nWays, UInt(nTagBits.W)))) 78 | val rmetaSet = Wire(Vec(memWidth, Vec(nWays, new MetaLine))) 79 | 80 | rtags := 0.U.asTypeOf(Vec(memWidth, Vec(nWays, UInt(nTagBits.W)))) 81 | rmetaSet := 0.U.asTypeOf(Vec(memWidth, Vec(nWays, new MetaLine))) 82 | 83 | for (w <- 0 until memWidth) { 84 | io.read(w).resp := 0.U.asTypeOf(Valid(new MetaResp)) 85 | // read tags 86 | rtags(w) := tags.read(ridx(w)) 87 | for (i <- 0 until nWays) { 88 | // assemble metaSet 89 | rmetaSet(w)(i).valid := RegNext(valids(ridx(w))(i)) 90 | rmetaSet(w)(i).dirty := RegNext(dirtys(ridx(w))(i)) 91 | rmetaSet(w)(i).readOnly := RegNext(readOnlys(ridx(w))(i)) 92 | rmetaSet(w)(i).fixed := RegNext(fixeds(ridx(w))(i)) 93 | rmetaSet(w)(i).tag := rtags(w)(i) 94 | } 95 | io.read(w).resp.bits.rmetaSet := rmetaSet(w) 96 | io.read(w).resp.valid := RegNext(rvalid(w)) 97 | io.read(w).resp.bits.dirtyIdx := RegNext(dirtyIdx) 98 | io.read(w).resp.bits.dirtyPos := RegNext(dirtyPos) 99 | } 100 | 101 | // write 102 | val wvalid = io.write.req.valid 103 | val wreq = io.write.req.bits 104 | val widx = wreq.idx 105 | val wpos = wreq.pos 106 | val wmask = UIntToOH(wpos) 107 | 108 | 109 | io.write.resp := 0.U.asTypeOf(Valid(new MetaResp)) 110 | 111 | when(wvalid){ 112 | // write tags 113 | when(wreq.setTag.valid){ 114 | tags.write(widx, VecInit(Seq.fill(nWays)(wreq.setTag.bits)), wmask.asBools) 115 | } 116 | 117 | // write valids 118 | when(wreq.setvalid.valid){ 119 | // valids(widx)(wpos) := wreq.setvalid.bits 120 | valids(widx) := (valids(widx) & ~wmask) | (VecInit(Seq.fill(nWays)(wreq.setvalid.bits)).asUInt & wmask) 121 | } 122 | 123 | // write dirtys 124 | when(wreq.setdirty.valid){ 125 | // dirtys(widx)(wpos) := wreq.setdirty.bits 126 | dirtys(widx) := (dirtys(widx) & ~wmask) | (VecInit(Seq.fill(nWays)(wreq.setdirty.bits)).asUInt & wmask) 127 | } 128 | 129 | // write readOnlys 130 | when(wreq.setreadOnly.valid){ 131 | // readOnlys(widx)(wpos) := wreq.setreadOnly.bits 132 | readOnlys(widx) := (readOnlys(widx) & ~wmask) | (VecInit(Seq.fill(nWays)(wreq.setreadOnly.bits)).asUInt & wmask) 133 | } 134 | 135 | // write fixeds 136 | when(wreq.setfixed.valid){ 137 | // fixeds(widx)(wpos) := wreq.setfixed.bits 138 | fixeds(widx) := (fixeds(widx) & ~wmask) | (VecInit(Seq.fill(nWays)(wreq.setfixed.bits)).asUInt & wmask) 139 | } 140 | } 141 | 142 | io.write.resp.valid := RegNext(wvalid) 143 | 144 | // bypass 145 | val bypass = Wire(Vec(memWidth, Bool())) 146 | if(!FPGAPlatform)dontTouch(bypass) 147 | for (w <- 0 until memWidth) { 148 | bypass(w) := (rvalid(w)) && (wvalid) && IsEqual( (ridx(w)) , (widx) ) 149 | // 当周期判断,下周期转发 150 | val wpos_bypass = RegNext(wpos) 151 | when (RegNext(bypass(w))) { 152 | // 看看write操作对应位有修改吗,如果有,用写的值,没有的话,还是保留原来读到的rmetaSet的值 153 | io.read(w).resp.bits.rmetaSet(wpos_bypass).valid := Mux(RegNext(wreq.setvalid.valid), RegNext(wreq.setvalid.bits), rmetaSet(w)(wpos_bypass).valid) 154 | io.read(w).resp.bits.rmetaSet(wpos_bypass).dirty := Mux(RegNext(wreq.setdirty.valid), RegNext(wreq.setdirty.bits), rmetaSet(w)(wpos_bypass).dirty) 155 | io.read(w).resp.bits.rmetaSet(wpos_bypass).readOnly := Mux(RegNext(wreq.setreadOnly.valid), RegNext(wreq.setreadOnly.bits), rmetaSet(w)(wpos_bypass).readOnly) 156 | io.read(w).resp.bits.rmetaSet(wpos_bypass).tag := Mux(RegNext(wreq.setTag.valid), RegNext(wreq.setTag.bits), rmetaSet(w)(wpos_bypass).tag) 157 | } 158 | } 159 | } -------------------------------------------------------------------------------- /src/main/scala/isa/Instructions.scala: -------------------------------------------------------------------------------- 1 | package iFu.isa 2 | 3 | import chisel3.util.BitPat 4 | 5 | object Instructions { 6 | def ADDW = BitPat("b00000000000100000???????????????") 7 | def SUBW = BitPat("b00000000000100010???????????????") 8 | def SLT = BitPat("b00000000000100100???????????????") 9 | def SLTU = BitPat("b00000000000100101???????????????") 10 | def NOR = BitPat("b00000000000101000???????????????") 11 | def AND = BitPat("b00000000000101001???????????????") 12 | def OR = BitPat("b00000000000101010???????????????") 13 | def XOR = BitPat("b00000000000101011???????????????") 14 | def LU12IW = BitPat("b0001010?????????????????????????") 15 | def ADDIW = BitPat("b0000001010??????????????????????") 16 | def SLTI = BitPat("b0000001000??????????????????????") 17 | def SLTUI = BitPat("b0000001001??????????????????????") 18 | def PCADDI = BitPat("b0001100?????????????????????????") 19 | def PCADDU12I = BitPat("b0001110?????????????????????????") 20 | def ANDN = BitPat("b00000000000101101???????????????") 21 | def ORN = BitPat("b00000000000101100???????????????") 22 | def ANDI = BitPat("b0000001101??????????????????????") 23 | def ORI = BitPat("b0000001110??????????????????????") 24 | def XORI = BitPat("b0000001111??????????????????????") 25 | def MULW = BitPat("b00000000000111000???????????????") 26 | def MULHW = BitPat("b00000000000111001???????????????") 27 | def MULHWU = BitPat("b00000000000111010???????????????") 28 | def DIVW = BitPat("b00000000001000000???????????????") 29 | def MODW = BitPat("b00000000001000001???????????????") 30 | def DIVWU = BitPat("b00000000001000010???????????????") 31 | def MODWU = BitPat("b00000000001000011???????????????") 32 | def SLLIW = BitPat("b00000000010000001???????????????") 33 | def SRLIW = BitPat("b00000000010001001???????????????") 34 | def SRAIW = BitPat("b00000000010010001???????????????") 35 | def SLLW = BitPat("b00000000000101110???????????????") 36 | def SRLW = BitPat("b00000000000101111???????????????") 37 | def SRAW = BitPat("b00000000000110000???????????????") 38 | def JIRL = BitPat("b010011??????????????????????????") 39 | def B = BitPat("b010100??????????????????????????") 40 | def BL = BitPat("b010101??????????????????????????") 41 | def BEQ = BitPat("b010110??????????????????????????") 42 | def BNE = BitPat("b010111??????????????????????????") 43 | def BLT = BitPat("b011000??????????????????????????") 44 | def BGE = BitPat("b011001??????????????????????????") 45 | def BLTU = BitPat("b011010??????????????????????????") 46 | def BGEU = BitPat("b011011??????????????????????????") 47 | def LLW = BitPat("b00100000????????????????????????") 48 | def SCW = BitPat("b00100001????????????????????????") 49 | def LDB = BitPat("b0010100000??????????????????????") 50 | def LDBU = BitPat("b0010101000??????????????????????") 51 | def LDH = BitPat("b0010100001??????????????????????") 52 | def LDHU = BitPat("b0010101001??????????????????????") 53 | def LDW = BitPat("b0010100010??????????????????????") 54 | def STB = BitPat("b0010100100??????????????????????") 55 | def STH = BitPat("b0010100101??????????????????????") 56 | def STW = BitPat("b0010100110??????????????????????") 57 | def SYSCALL = BitPat("b00000000001010110???????????????") 58 | def BREAK = BitPat("b00000000001010100???????????????") 59 | def CSRRD = BitPat("b00000100??????????????00000?????") 60 | def CSRWR = BitPat("b00000100??????????????00001?????") 61 | 62 | // rj != 0 and rj != 1 63 | def CSRXCHG1 = BitPat("b00000100??????????????0001??????") 64 | def CSRXCHG2 = BitPat("b00000100??????????????0010??????") 65 | def CSRXCHG3 = BitPat("b00000100??????????????0011??????") 66 | def CSRXCHG4 = BitPat("b00000100??????????????0100??????") 67 | def CSRXCHG5 = BitPat("b00000100??????????????0101??????") 68 | def CSRXCHG6 = BitPat("b00000100??????????????0110??????") 69 | def CSRXCHG7 = BitPat("b00000100??????????????0111??????") 70 | def CSRXCHG8 = BitPat("b00000100??????????????1000??????") 71 | def CSRXCHG9 = BitPat("b00000100??????????????1001??????") 72 | def CSRXCHG10 = BitPat("b00000100??????????????1010??????") 73 | def CSRXCHG11 = BitPat("b00000100??????????????1011??????") 74 | def CSRXCHG12 = BitPat("b00000100??????????????1100??????") 75 | def CSRXCHG13 = BitPat("b00000100??????????????1101??????") 76 | def CSRXCHG14 = BitPat("b00000100??????????????1110??????") 77 | def CSRXCHG15 = BitPat("b00000100??????????????1111??????") 78 | def ERTN = BitPat("b00000110010010000011100000000000") 79 | // 合并RDCNTIDW 以及 RDCNTVLW 80 | def RDTIMELW = BitPat("b0000000000000000011000??????????") 81 | def RDCNTVHW = BitPat("b000000000000000001100100000?????") 82 | def IDLE = BitPat("b00000110010010001???????????????") 83 | def TLBSRCH = BitPat("b00000110010010000010100000000000") 84 | def TLBRD = BitPat("b00000110010010000010110000000000") 85 | def TLBWR = BitPat("b00000110010010000011000000000000") 86 | def TLBFILL = BitPat("b00000110010010000011010000000000") 87 | // def INVTLB = BitPat("b00000110010010011???????????????") 88 | def INVTLB0 = BitPat("b00000110010010011??????????00000") 89 | def INVTLB1 = BitPat("b00000110010010011??????????00001") 90 | def INVTLB2 = BitPat("b00000110010010011??????????00010") 91 | def INVTLB3 = BitPat("b00000110010010011??????????00011") 92 | def INVTLB4 = BitPat("b00000110010010011??????????00100") 93 | def INVTLB5 = BitPat("b00000110010010011??????????00101") 94 | def INVTLB6 = BitPat("b00000110010010011??????????00110") 95 | 96 | def CACOP = BitPat("b0000011000??????????????????????") 97 | def PRELD = BitPat("b0010101011??????????????????????") 98 | def DBAR = BitPat("b00111000011100100???????????????") 99 | def IBAR = BitPat("b00111000011100101???????????????") 100 | 101 | def NEMU_TRAP = BitPat("b10000000000000000000000000000000") 102 | } 103 | -------------------------------------------------------------------------------- /src/main/scala/tlb/DTLB.scala: -------------------------------------------------------------------------------- 1 | package iFu.tlb 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import iFu.common._ 7 | import iFu.common.Consts._ 8 | 9 | class DTLBException extends CoreBundle { 10 | val xcpt_cause = UInt(CauseCode.microCauseBits.W) 11 | } 12 | 13 | class DTLBReq extends CoreBundle { 14 | val vaddr = UInt(vaddrBits.W) 15 | val size = UInt(2.W) 16 | val use_ldq = Bool() 17 | val use_stq = Bool() 18 | } 19 | 20 | class DTLBResp extends CoreBundle { 21 | val paddr = UInt(paddrBits.W) 22 | val exception = Valid(new DTLBException()) 23 | val is_uncacheable = Bool() 24 | } 25 | 26 | class DTLBIO extends CoreBundle { 27 | val req = Vec(memWidth, Flipped(Valid(new DTLBReq))) 28 | val resp = Vec(memWidth, Output(new DTLBResp)) 29 | val r_req = Vec(memWidth, Valid(new TLBDataRReq)) 30 | val r_resp = Vec(memWidth, Flipped(Valid(new TLBDataRResp))) 31 | val dtlb_csr_context = Input(new TLBCsrContext) 32 | } 33 | 34 | class DTLB(num_l0_dtlb_entries: Int = 2) extends CoreModule with L0TLBState { 35 | require(isPow2(num_l0_dtlb_entries)) 36 | 37 | val io = IO(new DTLBIO) 38 | 39 | val state = RegInit(s_ready) 40 | val state_nxt = WireInit(state) 41 | state := state_nxt 42 | 43 | // L0 DTLB 44 | val l0_entry = (0 until memWidth) map { w => 45 | RegInit(VecInit( 46 | Seq.fill(num_l0_dtlb_entries)(0.U.asTypeOf(new L0ITLBEntry)) 47 | )) 48 | } 49 | 50 | val csr_regs = io.dtlb_csr_context 51 | 52 | // addr translation 53 | val l0_miss = WireInit(VecInit(Seq.fill(memWidth)(false.B))) 54 | val use_page_table = WireInit(VecInit(Seq.fill(memWidth)(false.B))) 55 | for (w <- 0 until memWidth) { 56 | val trans_resp = WireInit(0.U.asTypeOf(new DTLBResp)) 57 | val vaddr = io.req(w).bits.vaddr 58 | val l0_hit_oh = VecInit(l0_entry(w).map( 59 | e => e.entry.matches(vaddr(vaddrBits - 1, 13), csr_regs.asid_asid) 60 | )) 61 | val l0_hit = l0_hit_oh.asUInt.orR 62 | val l0_hit_idx = OHToUInt(l0_hit_oh) 63 | val l0_hit_entry = l0_entry(w)(l0_hit_idx) 64 | 65 | when ( 66 | (vaddr(0) && io.req(w).bits.size === 1.U) || 67 | (vaddr(1, 0) =/= 0.U && io.req(w).bits.size === 2.U) 68 | ) { 69 | trans_resp.exception.valid := true.B 70 | trans_resp.exception.bits.xcpt_cause := CauseCode.ALE 71 | } .elsewhen (csr_regs.da_mode) { 72 | trans_resp.paddr := vaddr 73 | trans_resp.is_uncacheable := csr_regs.crmd_datm === 0.U 74 | } .elsewhen (csr_regs.pg_mode) { 75 | val dmw0_en = ( 76 | (csr_regs.dmw0_plv0 && csr_regs.crmd_plv === 0.U) || 77 | (csr_regs.dmw0_plv3 && csr_regs.crmd_plv === 3.U) 78 | ) && vaddr(31, 29) === csr_regs.dmw0_vseg 79 | val dmw1_en = ( 80 | (csr_regs.dmw1_plv0 && csr_regs.crmd_plv === 0.U) || 81 | (csr_regs.dmw1_plv3 && csr_regs.crmd_plv === 3.U) 82 | ) && vaddr(31, 29) === csr_regs.dmw1_vseg 83 | if (!FPGAPlatform) dontTouch(dmw0_en) 84 | if (!FPGAPlatform) dontTouch(dmw1_en) 85 | 86 | when (dmw0_en || dmw1_en) { 87 | trans_resp.paddr := Cat( 88 | Mux(dmw0_en, csr_regs.dmw0_pseg, csr_regs.dmw1_pseg), vaddr(28, 0) 89 | ) 90 | trans_resp.is_uncacheable := ( 91 | (dmw0_en && (csr_regs.dmw0_mat === 0.U)) || 92 | (dmw1_en && (csr_regs.dmw1_mat === 0.U)) 93 | ) 94 | } .otherwise { 95 | use_page_table(w) := true.B 96 | val entry = l0_hit_entry.entry 97 | val odd_even_page = Mux(entry.meta.ps === 12.U, vaddr(12), vaddr(21)) 98 | val data = entry.data(odd_even_page) 99 | switch (state) { 100 | is (s_ready) { 101 | when (!l0_hit) { 102 | l0_miss(w) := io.req(w).valid 103 | trans_resp.exception.valid := true.B 104 | trans_resp.exception.bits.xcpt_cause := CauseCode.MINI_EXCEPTION_L0TLB_MISS 105 | } .elsewhen (!l0_hit_entry.exist) { 106 | trans_resp.exception.valid := true.B 107 | trans_resp.exception.bits.xcpt_cause := CauseCode.TLBR 108 | } .otherwise { 109 | when (!data.v) { 110 | trans_resp.exception.valid := true.B 111 | trans_resp.exception.bits.xcpt_cause := Mux(io.req(w).bits.use_ldq, CauseCode.PIL, CauseCode.PIS) 112 | } .elsewhen (io.dtlb_csr_context.crmd_plv > data.plv) { 113 | trans_resp.exception.valid := true.B 114 | trans_resp.exception.bits.xcpt_cause := CauseCode.PPI 115 | } .elsewhen (io.req(w).bits.use_stq && !data.d) { 116 | trans_resp.exception.valid := true.B 117 | trans_resp.exception.bits.xcpt_cause := CauseCode.PME 118 | } 119 | } 120 | } 121 | is (s_refill) { 122 | trans_resp.exception.valid := true.B 123 | trans_resp.exception.bits.xcpt_cause := CauseCode.MINI_EXCEPTION_L0TLB_MISS 124 | } 125 | } 126 | trans_resp.paddr := Mux( 127 | entry.meta.ps === 12.U, 128 | Cat(data.ppn, vaddr(11, 0)), 129 | Cat(data.ppn(paddrBits - 13, 9), vaddr(20, 0)) 130 | ) 131 | trans_resp.is_uncacheable := data.mat === 0.U 132 | } 133 | } 134 | io.resp(w) := RegNext(trans_resp) 135 | } 136 | state_nxt := Mux(l0_miss.reduce(_||_), s_refill, state) 137 | 138 | // access L1 TLB 139 | for (w <- 0 until memWidth) { 140 | io.r_req(w).valid := RegNext(l0_miss(w)) 141 | io.r_req(w).bits.vaddr := RegNext(io.req(w).bits.vaddr) 142 | val r_resp = RegNext(io.r_resp(w)) 143 | 144 | val refill_vppn = RegNext(RegNext(RegNext(io.req(w).bits.vaddr(vaddrBits - 1, 13)))) 145 | val refill_en = RegNext(RegNext(RegNext(l0_miss(w) && use_page_table(w)))) && (state === s_refill) 146 | val refill_idx = RegInit(0.U(log2Ceil(num_l0_dtlb_entries).W)) 147 | refill_idx := refill_idx + refill_en 148 | if (!FPGAPlatform) dontTouch(refill_idx) 149 | 150 | when (refill_en) { 151 | when (r_resp.valid) { 152 | l0_entry(w)(refill_idx) := Mux( 153 | r_resp.bits.found, 154 | L0ITLBEntry.new_entry(r_resp.bits.entry), 155 | L0ITLBEntry.fake_entry(refill_vppn, csr_regs.asid_asid) 156 | ) 157 | } 158 | state_nxt := s_ready 159 | } 160 | when (csr_regs.inv_l0_tlb) { 161 | l0_entry(w) map { e => e.entry.meta.e := false.B } 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/scala/frontend/FetchBuffer.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import iFu.common.Consts.FPGAPlatform 6 | import iFu.common._ 7 | import iFu.util.MaskUpper 8 | import iFu.frontend.FrontendUtils._ 9 | 10 | class FetchBufferResp() extends CoreBundle { 11 | val uops = Vec(coreWidth,Valid(new MicroOp)) 12 | } 13 | 14 | class FetchBuffer extends CoreModule { 15 | val io = IO(new CoreBundle{ 16 | val clear = Input(Bool()) 17 | val enq = Flipped(Decoupled(new FetchBundle())) // Input 18 | val deq = new DecoupledIO(new FetchBufferResp()) // Output 19 | }) 20 | //------------------------------------ 21 | val numFetchBufferEntries = frontendParams.numFetchBufferEntries 22 | val fetchWidth = frontendParams.fetchWidth 23 | //------------------------------------ 24 | 25 | val numEnt = numFetchBufferEntries 26 | require(numEnt % coreWidth == 0, "FetchBuffer size must be divisible by coreWidth") 27 | val numRow = numEnt / coreWidth // dequeue 1 row of uops at a time 28 | 29 | val ram = Reg(Vec(numEnt, new MicroOp)) // physical implementation of the buffer 30 | val lram = Wire(Vec(numRow, Vec(coreWidth, new MicroOp))) // logical implementation of the buffer 31 | 32 | for(i <- 0 until numEnt){ 33 | lram(i / coreWidth)(i % coreWidth) := ram(i) 34 | } 35 | 36 | val head = RegInit(1.U(numRow.W)) // pointer to the dequeue row 37 | val tail = RegInit(1.U(numEnt.W)) // pointer to the enqueue position 38 | 39 | val mayFull = RegInit(false.B) // if enqueueing, set to true 40 | // note: mayFull indicates that there are uops present in the buffer 41 | 42 | // enqueue stage 43 | def rotateLeft(in : UInt, k : Int) = { 44 | val n = in.getWidth 45 | Cat(in(n-k-1,0),in(n-1,n-k)) 46 | } 47 | 48 | val mayHitHead = (1 until frontendParams.fetchWidth).map( // testing insert 1, 2, ..., fetchWidth uops 49 | k => VecInit(rotateLeft(tail, k).asBools.zipWithIndex.filter { 50 | case (bit,idx) => idx % coreWidth == 0 // the head is always aligned to a row boundary, so only check those 51 | }.map {case (bit,idx) => bit}).asUInt // get the tail position which need to be check 52 | ).map(newTail => head & newTail).reduce(_|_).orR // check if any of the insertions hit the head 53 | // if not hit head, indicate that there are at least 8 empty slots in the buffer or the buffer is full 54 | 55 | // now we check whether the second case is true 56 | // if the buffer is full, tail will be equal to head, and mayFull will be true 57 | val atHead = ( 58 | VecInit(tail.asBools.zipWithIndex.filter { 59 | case (bit,idx) => idx % coreWidth == 0 // get the bits on position which need to be check 60 | }.map {case (bit,idx) => bit}).asUInt & head).orR // check whether the tail is equal to head 61 | 62 | val doEnqueue = !(atHead && mayFull || mayHitHead) // if the first case is true, we can enqueue 63 | 64 | io.enq.ready := doEnqueue 65 | 66 | val inMask = Wire(Vec(frontendParams.fetchWidth, Bool())) // which uops are valid 67 | val inUops = Wire(Vec(frontendParams.fetchWidth, new MicroOp())) 68 | 69 | for (i <- 0 until fetchWidth){ 70 | 71 | val pc = (fetchAlign(io.enq.bits.pc) + (i << 2).U) | io.enq.bits.pc(1, 0) 72 | 73 | inUops(i) := DontCare // set the value afterward 74 | inMask(i) := io.enq.valid && io.enq.bits.mask(i) 75 | 76 | inUops(i).xcpt_valid := io.enq.bits.exception.valid 77 | inUops(i).xcpt_cause := io.enq.bits.exception.bits.xcpt_cause 78 | 79 | inUops(i).pcLowBits := pc 80 | inUops(i).ftqIdx := io.enq.bits.ftqIdx 81 | inUops(i).instr := io.enq.bits.instrs(i) 82 | if (!FPGAPlatform) { 83 | inUops(i).debug_inst := io.enq.bits.instrs(i) 84 | inUops(i).debug_pc := pc 85 | } 86 | inUops(i).taken := io.enq.bits.cfiIdx.bits.asUInt === i.U && io.enq.bits.cfiIdx.valid 87 | } 88 | 89 | // the index of the uop which will be enqueued 90 | // note: the index is one-hot encoded 91 | val enqIdxOH = Wire(Vec(frontendParams.fetchWidth,UInt(numEnt.W))) 92 | 93 | def inc(ptr: UInt) = { // the pointer is one-hot encoded, so simply shift it 94 | val n = ptr.getWidth 95 | Cat(ptr(n-2,0), ptr(n-1)) 96 | } 97 | 98 | var enqIdx = tail 99 | for (i <- 0 until frontendParams.fetchWidth){ 100 | enqIdxOH(i) := enqIdx 101 | enqIdx = Mux(inMask(i), inc(enqIdx), enqIdx) // if the uop is valid, enqueue it 102 | } 103 | 104 | // enqueue the uops 105 | for( i <- 0 until frontendParams.fetchWidth){ // for each uop 106 | for(j <- 0 until numEnt){ // for each entry in the buffer 107 | when (doEnqueue && inMask(i) && enqIdxOH(i)(j)){ 108 | ram(j) := inUops(i) 109 | } 110 | } 111 | } 112 | 113 | // dequeue stage 114 | val mayHitTail = VecInit((0 until numEnt).map( 115 | idx => head(idx / coreWidth) && (!mayFull || (idx % coreWidth != 0).B) 116 | )).asUInt & tail 117 | // First, expand the head to the size of the tail by copying each bit of the head four times 118 | // Then, if shifting the head four times still does not reach the tail, it indicates that the buffer has at least 4 valid uops 119 | // If the head is equal to the tail, we need to check whether the buffer is full or empty 120 | // If mayFull is false, it means the buffer was dequeued the last time, and thus the buffer is empty 121 | 122 | val slotWillHitTail = (0 until numRow).map( 123 | i => mayHitTail((i + 1) * coreWidth - 1, i * coreWidth) 124 | ).reduce(_|_) // 4 bits, indicate which slot will hit the tail, if not, equal to 0000 125 | val willHitTail = slotWillHitTail.orR // if hit, slotWillHitTail will have one bit set to 1 126 | 127 | val doDequeue = io.deq.ready && !willHitTail 128 | 129 | val deqValid = (~MaskUpper(slotWillHitTail)).asBools // the positions before the tail are valid 130 | 131 | 132 | (io.deq.bits.uops zip deqValid).map { case (d, valid) => d.valid := valid } // connect the valid signal using the map function 133 | (io.deq.bits.uops zip Mux1H(head, lram)).map {case (d, uop) => d.bits := uop} 134 | io.deq.valid := deqValid.reduce(_||_) 135 | // note: here, we dequeue uops from the buffer if it's not empty, which means there may not be at least 4 uops in the buffer 136 | // however, there's no need to worry as we use the signal doDequeue to control the head, this means that if there are not 4 uops, 137 | // we should not modify the head, next time, we will still dequeue the same uops until we have dequeued the 4 uops 138 | // the repetitive uops will be handled in the next stage, the decode stage, in core.scala. 139 | 140 | // update registers 141 | // note: priority: clear > enqueue > dequeue 142 | when (doEnqueue){ 143 | tail := enqIdx 144 | when (inMask.reduce(_||_)){ 145 | mayFull := true.B 146 | } 147 | } 148 | 149 | when (doDequeue) { 150 | head := inc(head) 151 | mayFull := false.B 152 | } 153 | 154 | when (io.clear) { 155 | head := 1.U 156 | tail := 1.U 157 | mayFull := false.B 158 | } 159 | 160 | when(reset.asBool){ 161 | io.deq.bits.uops map { u => u.valid := false.B} 162 | } 163 | 164 | } -------------------------------------------------------------------------------- /src/main/scala/frontend/bpu/Btb.scala: -------------------------------------------------------------------------------- 1 | package iFu.frontend 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import chisel3.util.random.LFSR 6 | import iFu.common.Consts._ 7 | import iFu.frontend.FrontendUtils._ 8 | import iFu.util.IsEqual 9 | import ram.SDPRam 10 | 11 | class BTBEntry extends Bundle with HasBtbParameters { 12 | // val lowBits = UInt(lowBitSz.W) 13 | val target = UInt(targetSz.W) 14 | } 15 | 16 | class BTBMeta extends Bundle with HasBtbParameters { 17 | val tag = UInt(tagSz.W) 18 | val is_br = Bool() 19 | } 20 | 21 | class BTBPredictMeta extends Bundle with HasBtbParameters { 22 | val writeWay = UInt(log2Ceil(nWays).W) 23 | val hit = Bool() 24 | } 25 | 26 | class BTBIO extends Bundle with HasBtbParameters { 27 | val s0valid = Input(Bool()) 28 | val s0pc = Input(UInt(vaddrBits.W)) 29 | val s0_mixed_pc = Input(UInt(vaddrBits.W)) 30 | 31 | val s2br = Output(Vec(fetchWidth, Bool())) 32 | val s2jal = Output(Vec(fetchWidth, Bool())) 33 | val s2taken = Output(Vec(fetchWidth, Bool())) 34 | // val s2targspc = Output(Vec(fetchWidth, Valid(UInt(vaddrBits.W)))) 35 | val s2targs = Output(Vec(fetchWidth, Valid(UInt(targetSz.W)))) 36 | 37 | val s3meta = Output(Vec(fetchWidth, new BTBPredictMeta)) 38 | 39 | val s1update = Input(Valid(new BranchPredictionUpdate)) 40 | } 41 | 42 | class BTBPredictor extends Module with HasBtbParameters{ 43 | val io = IO(new BTBIO) 44 | 45 | // def getLowBits(pc: UInt): UInt = pc(lowBitSz + 1, 2) 46 | // def getHighBits(pc: UInt): UInt = pc(vaddrBits - 1, lowBitSz + 2) 47 | // def getTarget(pc: UInt, lowBits: UInt): UInt = Cat(getHighBits(pc), lowBits, 0.U(2.W)) 48 | 49 | val meta = Seq.fill(nWays) { Module(new SDPRam(nSets, new BTBMeta, fetchWidth)) } 50 | val btb = Seq.fill(nWays) { Module(new SDPRam(nSets, new BTBEntry, fetchWidth)) } 51 | 52 | // --------------------------------------------- 53 | // Reset Logic 54 | val reset_en = RegInit(false.B) 55 | val reset_idx = RegInit(0.U(log2Ceil(nSets).W)) 56 | when (reset_en) { 57 | reset_idx := reset_idx + 1.U 58 | } 59 | when (reset_idx === (nSets - 1).U) { 60 | reset_en := false.B 61 | } 62 | // --------------------------------------------- 63 | 64 | // --------------------------------------------- 65 | // Predict Logic 66 | val s0_valid = io.s0valid 67 | // val s0_tag_idx = fetchIdx(io.s0pc) 68 | val s0_mixed_pc = io.s0_mixed_pc 69 | val s0_idx = getIdx(s0_mixed_pc) 70 | val s0_tag = getTag(s0_mixed_pc) 71 | 72 | val s1_valid = RegNext(io.s0valid) 73 | val s1_pc = RegNext(io.s0pc) 74 | val s1_tag = RegNext(s0_tag) 75 | 76 | // stage 1: read btb, meta, ebtb, and prepare hit signals 77 | val s1_btb = VecInit(btb.map(b => {b.io.raddr := s0_idx 78 | b.io.rdata})) 79 | val s1_meta = VecInit(meta.map(m => {m.io.raddr := s0_idx 80 | m.io.rdata 81 | })) 82 | 83 | // val s1_tag = s1_tag_idx >> log2Ceil(nSets) 84 | val s1_hit_OHs = VecInit((0 until fetchWidth) map { i => 85 | VecInit((0 until nWays) map { w => 86 | s1_meta(w)(i).tag === s1_tag.asUInt 87 | }) 88 | }) 89 | val s1_hits = s1_hit_OHs.map(_.asUInt.orR) 90 | val s1_hit_ways = s1_hit_OHs.map(oh => PriorityEncoder(oh)) 91 | 92 | for (w <- 0 until fetchWidth) { 93 | // s1 stage 94 | val resp_valid = !reset_en && s1_valid && s1_hits(w) 95 | val entry_meta = s1_meta(s1_hit_ways(w))(w) 96 | val entry_btb = s1_btb(s1_hit_ways(w))(w) 97 | // s2 stage 98 | val is_br = RegNext(resp_valid && entry_meta.is_br) 99 | val is_jal = RegNext(resp_valid && !entry_meta.is_br) 100 | io.s2br(w) := is_br 101 | io.s2jal(w) := is_jal 102 | io.s2taken(w) := is_jal 103 | io.s2targs(w).valid := RegNext(resp_valid) 104 | // io.s2targs(w).bits := RegNext(getTarget(getPc(s1_pc, w.U), entry_btb.lowBits)) 105 | io.s2targs(w).bits := RegNext(entry_btb.target) 106 | 107 | // io.s2targspc(w).valid := RegNext(resp_valid) 108 | // io.s2targspc(w).bits := RegNext(getTargetPC(s1_pc, entry_btb.target)) 109 | } 110 | // --------------------------------------------- 111 | 112 | // --------------------------------------------- 113 | // Prepare Meta for Update 114 | val repl_way_update_en = s1_valid && !s1_hits.reduce(_||_) 115 | val repl_way = LFSR(nWays, repl_way_update_en)(log2Ceil(nWays) - 1, 0) 116 | 117 | val s1_update_info = Wire(Vec(fetchWidth, new BTBPredictMeta)) 118 | for (w <- 0 until fetchWidth) { 119 | s1_update_info(w).hit := s1_hits(w) 120 | s1_update_info(w).writeWay := Mux( 121 | s1_hits(w), 122 | s1_hit_ways(w), 123 | repl_way 124 | ) 125 | } 126 | io.s3meta := RegNext(RegNext(s1_update_info)) 127 | // --------------------------------------------- 128 | 129 | // --------------------------------------------- 130 | // Update Logic 131 | val s1_update = io.s1update 132 | val s1_update_cfi_idx = s1_update.bits.cfiIdx.bits 133 | val s1_update_meta = VecInit(s1_update.bits.meta.map(_.btbMeta)) 134 | val s1_update_ways = VecInit(s1_update_meta.map(_.writeWay)) 135 | val s1_update_way = s1_update_ways(s1_update_cfi_idx) 136 | val s1_update_mixed_pc = mixHILO(s1_update.bits.pc) 137 | val s1_update_idx = getIdx(s1_update_mixed_pc) 138 | val s1_update_tag = getTag(s1_update_mixed_pc) 139 | 140 | // val target_overflow = !IsEqual(getHighBits(getPc(s1_update.bits.pc, s1_update.bits.cfiIdx.bits)), getHighBits(s1_update.bits.target)) 141 | // if (!FPGAPlatform) dontTouch(target_overflow) 142 | 143 | val s1_update_wmeta = Wire(Vec(fetchWidth, new BTBMeta)) 144 | for (w <- 0 until fetchWidth) { 145 | // s1_update_wmeta(w).tag := s1_update_idx >> log2Ceil(nSets) 146 | s1_update_wmeta(w).tag := s1_update_tag 147 | s1_update_wmeta(w).is_br := s1_update.bits.brMask(w) 148 | } 149 | 150 | val s1_update_wbtb = Wire(new BTBEntry) 151 | // s1_update_wbtb.lowBits := getLowBits(s1_update.bits.target) 152 | s1_update_wbtb.target := getTarget(s1_update.bits.target) 153 | 154 | val s1_update_wbtb_mask = UIntToOH(s1_update_cfi_idx) & Fill(fetchWidth, s1_update.valid && s1_update.bits.cfiIdx.valid && s1_update.bits.cfiTaken) 155 | val s1_update_wmeta_mask = s1_update_wbtb_mask 156 | 157 | for (w <- 0 until nWays) { 158 | val update_en = s1_update_way === w.U /* && !target_overflow */ 159 | meta(w).io.wen := reset_en || update_en 160 | meta(w).io.waddr := Mux(reset_en, reset_idx, s1_update_idx) 161 | meta(w).io.wdata := Mux(reset_en, VecInit(Seq.fill(fetchWidth) {0.U.asTypeOf(new BTBMeta)}), s1_update_wmeta) 162 | meta(w).io.wstrobe := Mux(reset_en, ~0.U(fetchWidth.W), s1_update_wmeta_mask.asUInt) 163 | btb(w).io.wen := reset_en || update_en 164 | btb(w).io.waddr := Mux(reset_en, reset_idx, s1_update_idx) 165 | btb(w).io.wdata := VecInit(Seq.fill(fetchWidth) {Mux(reset_en, 0.U.asTypeOf(new BTBEntry), s1_update_wbtb)}) 166 | btb(w).io.wstrobe := Mux(reset_en, ~0.U(fetchWidth.W), s1_update_wbtb_mask.asUInt) 167 | } 168 | // --------------------------------------------- 169 | 170 | // --------------------------------------------- 171 | // Performance Counter 172 | // TODO 173 | // --------------------------------------------- 174 | } 175 | --------------------------------------------------------------------------------