├── doc ├── slides.pdf ├── src │ ├── bpu.png │ ├── cache.png │ ├── linux.jpg │ ├── uboot.jpg │ ├── back-end.png │ ├── pua-mips.png │ ├── front-end.png │ ├── template.typ │ └── main.typ ├── final-design.pdf └── primary-design.pdf ├── .vscode └── settings.json ├── chisel ├── .scalafmt.conf ├── src │ ├── main │ │ ├── scala │ │ │ └── cpu │ │ │ │ ├── Elaborate.scala │ │ │ │ ├── pipeline │ │ │ │ ├── execute │ │ │ │ │ ├── HiLo.scala │ │ │ │ │ ├── LLbit.scala │ │ │ │ │ ├── BranchCtrl.scala │ │ │ │ │ ├── ExecuteStage.scala │ │ │ │ │ ├── ExeAccessMemCtrl.scala │ │ │ │ │ ├── ALU.scala │ │ │ │ │ ├── Fu.scala │ │ │ │ │ ├── Div.scala │ │ │ │ │ ├── ExecuteUnit.scala │ │ │ │ │ └── Mul.scala │ │ │ │ ├── writeback │ │ │ │ │ ├── WriteBackStage.scala │ │ │ │ │ ├── CommitBuffer.scala │ │ │ │ │ └── WriteBackUnit.scala │ │ │ │ ├── fetch │ │ │ │ │ ├── FetchUnit.scala │ │ │ │ │ ├── PreDecoder.scala │ │ │ │ │ ├── InstFifo.scala │ │ │ │ │ └── BranchPredictorUnit.scala │ │ │ │ ├── decoder │ │ │ │ │ ├── JumpCtrl.scala │ │ │ │ │ ├── ForwardCtrl.scala │ │ │ │ │ ├── ARegfile.scala │ │ │ │ │ ├── Issue.scala │ │ │ │ │ └── DecoderUnit.scala │ │ │ │ └── memory │ │ │ │ │ ├── MemoryStage.scala │ │ │ │ │ ├── DataMemoryAccess.scala │ │ │ │ │ └── MemoryUnit.scala │ │ │ │ ├── PuaMips.scala │ │ │ │ ├── defines │ │ │ │ ├── StaticBundles.scala │ │ │ │ ├── Util.scala │ │ │ │ ├── Cp0Bundles.scala │ │ │ │ ├── Bundles.scala │ │ │ │ └── Const.scala │ │ │ │ ├── cache │ │ │ │ ├── Cache.scala │ │ │ │ ├── memory │ │ │ │ │ ├── PortDefinitions.scala │ │ │ │ │ ├── LUTRam.scala │ │ │ │ │ ├── LUTRamIP.scala │ │ │ │ │ ├── SimpleDualPortRamIP.scala │ │ │ │ │ └── SimpleDualPortRam.scala │ │ │ │ ├── CacheAXIInterface.scala │ │ │ │ └── ICache.scala │ │ │ │ ├── CpuConfig.scala │ │ │ │ ├── mmu │ │ │ │ ├── TlbL1I.scala │ │ │ │ ├── TlbL2.scala │ │ │ │ └── TlbL1D.scala │ │ │ │ ├── ctrl │ │ │ │ └── Ctrl.scala │ │ │ │ ├── axi │ │ │ │ └── FIFO.scala │ │ │ │ └── Core.scala │ │ └── resources │ │ │ ├── mycpu_top.v │ │ │ └── mycpu_top_for_soc_simulator.v │ └── test │ │ └── scala │ │ └── test.scala ├── build.sbt └── Makefile ├── .gitignore ├── LICENSE ├── README.md └── README_EN.md /doc/slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/slides.pdf -------------------------------------------------------------------------------- /doc/src/bpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/bpu.png -------------------------------------------------------------------------------- /doc/src/cache.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/cache.png -------------------------------------------------------------------------------- /doc/src/linux.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/linux.jpg -------------------------------------------------------------------------------- /doc/src/uboot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/uboot.jpg -------------------------------------------------------------------------------- /doc/final-design.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/final-design.pdf -------------------------------------------------------------------------------- /doc/src/back-end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/back-end.png -------------------------------------------------------------------------------- /doc/src/pua-mips.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/pua-mips.png -------------------------------------------------------------------------------- /doc/primary-design.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/primary-design.pdf -------------------------------------------------------------------------------- /doc/src/front-end.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Clo91eaf/PUA-MIPS/HEAD/doc/src/front-end.png -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.watcherExclude": { 3 | "**/target": true 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /chisel/.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.6.1" 2 | runner.dialect = scala213 3 | 4 | # format config 5 | maxColumn = 120 6 | rewrite.trailingCommas.style = always 7 | # align.tokens = [{ 8 | # code = "<>" 9 | # owners = [{ 10 | # regex = "Term.ApplyInfix" 11 | # }] 12 | # }] 13 | align.preset = more -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .workflow/ 3 | 4 | # chisel file 5 | chisel/.bloop/ 6 | chisel/.bsp/ 7 | chisel/.metals/ 8 | chisel/.vscode/ 9 | chisel/project/ 10 | chisel/generated/ 11 | !chisel/generated/mycpu_top.v 12 | chisel/target/ 13 | chisel/vivado* 14 | chisel/.Xil/ 15 | chisel/inst/u-boot.s 16 | chisel/test_run_dir/ 17 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/Elaborate.scala: -------------------------------------------------------------------------------- 1 | import cache._ 2 | import cpu._ 3 | import chisel3._ 4 | import chisel3.stage.ChiselStage 5 | import firrtl.options.TargetDirAnnotation 6 | 7 | object Elaborate extends App { 8 | implicit val config = new CpuConfig() 9 | (new ChiselStage).emitVerilog(new PuaMips(), Array("--target-dir", "generated")) 10 | } 11 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/HiLo.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | 7 | import cpu.defines._ 8 | class HiLo extends Module { 9 | val io = IO(new Bundle { 10 | val wen = Input(Bool()) 11 | val wdata = Input(UInt(HILO_WID.W)) 12 | val rdata = Output(UInt(HILO_WID.W)) 13 | }) 14 | // output 15 | val hilo = RegInit(0.U(HILO_WID.W)) 16 | 17 | when(io.wen) { 18 | hilo := io.wdata 19 | } 20 | 21 | io.rdata := hilo 22 | } 23 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/LLbit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | class LLbit extends Module { 7 | val io = IO(new Bundle { 8 | val do_flush = Input(Bool()) 9 | val wen = Input(Bool()) 10 | val wdata = Input(Bool()) 11 | 12 | val rdata = Output(Bool()) 13 | }) 14 | val llbit = RegInit(false.B) 15 | 16 | when(io.do_flush) { 17 | llbit := false.B 18 | }.elsewhen(io.wen) { 19 | llbit := io.wdata 20 | } 21 | 22 | io.rdata := llbit 23 | } 24 | -------------------------------------------------------------------------------- /chisel/src/test/scala/test.scala: -------------------------------------------------------------------------------- 1 | package cpu 2 | 3 | import chisel3.stage.ChiselGeneratorAnnotation 4 | 5 | import cpu.CpuConfig 6 | import cpu.pipeline.execute._ 7 | import cpu.pipeline.memory.DataMemoryAccess 8 | import cpu.pipeline.memory.MemoryUnit 9 | import cpu.pipeline.writeback.WriteBackUnit 10 | import cpu.pipeline.fetch.PreDecoder 11 | 12 | object testMain extends App { 13 | implicit val config = new CpuConfig() 14 | (new chisel3.stage.ChiselStage).execute( 15 | Array("--target-dir", "generated"), 16 | Seq(ChiselGeneratorAnnotation(() => new PreDecoder())), 17 | ) 18 | } 19 | -------------------------------------------------------------------------------- /chisel/build.sbt: -------------------------------------------------------------------------------- 1 | // See README.md for license details. 2 | 3 | scalaVersion := "2.13.8" 4 | val chiselVersion = "3.5.4" 5 | 6 | lazy val root = (project in file(".")) 7 | .settings( 8 | name := "pua-mips", 9 | libraryDependencies ++= Seq( 10 | "edu.berkeley.cs" %% "chisel3" % chiselVersion, 11 | "edu.berkeley.cs" %% "chiseltest" % "0.5.4" % "test", 12 | "org.scalameta" % "semanticdb-scalac" % "4.7.7" cross CrossVersion.full, 13 | ), 14 | scalacOptions ++= Seq( 15 | "-language:reflectiveCalls", 16 | "-deprecation", 17 | "-feature", 18 | "-Xcheckinit", 19 | "-P:chiselplugin:genBundleElements", 20 | "-Yrangepos", 21 | ), 22 | addCompilerPlugin("edu.berkeley.cs" % "chisel3-plugin" % chiselVersion cross CrossVersion.full), 23 | ) 24 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/PuaMips.scala: -------------------------------------------------------------------------------- 1 | import chisel3._ 2 | import chisel3.util._ 3 | import cache._ 4 | import cpu._ 5 | import cpu.defines._ 6 | 7 | class PuaMips extends Module { 8 | implicit val config = new CpuConfig() 9 | val io = IO(new Bundle { 10 | val ext_int = Input(UInt(6.W)) 11 | val axi = new AXI() 12 | val debug = new DEBUG() 13 | val statistic = if (!config.build) Some(new GlobalStatistic()) else None 14 | }) 15 | val core = Module(new Core()) 16 | val cache = Module(new Cache()) 17 | 18 | core.io.inst <> cache.io.inst 19 | core.io.data <> cache.io.data 20 | 21 | io.ext_int <> core.io.ext_int 22 | io.debug <> core.io.debug 23 | io.axi <> cache.io.axi 24 | 25 | // ===----------------------------------------------------------------=== 26 | // statistic 27 | // ===----------------------------------------------------------------=== 28 | if (!config.build) { 29 | io.statistic.get.cpu <> core.io.statistic.get 30 | io.statistic.get.cache <> cache.io.statistic.get 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Clo91eaf 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/BranchCtrl.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | 8 | class BranchCtrl extends Module { 9 | val io = IO(new Bundle { 10 | val in = new Bundle { 11 | val inst_info = Input(new InstInfo()) 12 | val src_info = Input(new SrcInfo()) 13 | val pred_branch = Input(Bool()) 14 | } 15 | val out = new Bundle { 16 | val branch = Output(Bool()) 17 | val pred_fail = Output(Bool()) 18 | } 19 | }) 20 | val src1 = io.in.src_info.src1_data 21 | val src2 = io.in.src_info.src2_data 22 | io.out.pred_fail := io.in.pred_branch =/= io.out.branch 23 | io.out.branch := MuxLookup( 24 | io.in.inst_info.op, 25 | false.B, 26 | Seq( 27 | EXE_BEQ -> (src1 === src2), 28 | EXE_BNE -> (src1 =/= src2), 29 | EXE_BGTZ -> (!src1(31) && (src1 =/= 0.U)), 30 | EXE_BLEZ -> (src1(31) || src1 === 0.U), 31 | EXE_BGEZ -> (!src1(31)), 32 | EXE_BGEZAL -> (!src1(31)), 33 | EXE_BLTZ -> (src1(31)), 34 | EXE_BLTZAL -> (src1(31)), 35 | ), 36 | ) 37 | } 38 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/defines/StaticBundles.scala: -------------------------------------------------------------------------------- 1 | package cpu.defines 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | import cpu.CpuConfig 7 | 8 | class SocStatistic extends Bundle { 9 | val cp0_count = Output(UInt(32.W)) 10 | val cp0_random = Output(UInt(32.W)) 11 | val cp0_cause = Output(UInt(32.W)) 12 | val int = Output(Bool()) 13 | val commit = Output(Bool()) 14 | } 15 | 16 | class BranchPredictorUnitStatistic extends Bundle { 17 | val branch = Output(UInt(32.W)) 18 | val success = Output(UInt(32.W)) 19 | } 20 | 21 | class CPUStatistic extends Bundle { 22 | val soc = new SocStatistic() 23 | val bpu = new BranchPredictorUnitStatistic() 24 | } 25 | 26 | class GlobalStatistic extends Bundle { 27 | val cpu = new CPUStatistic() 28 | val cache = new CacheStatistic() 29 | } 30 | 31 | class ICacheStatistic extends Bundle { 32 | val request = Output(UInt(32.W)) 33 | val hit = Output(UInt(32.W)) 34 | } 35 | 36 | class DCacheStatistic extends Bundle { 37 | val request = Output(UInt(32.W)) 38 | val hit = Output(UInt(32.W)) 39 | } 40 | 41 | class CacheStatistic extends Bundle { 42 | val icache = new ICacheStatistic() 43 | val dcache = new DCacheStatistic() 44 | } 45 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/Cache.scala: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.CacheConfig 7 | import cpu.CpuConfig 8 | 9 | class Cache(implicit config: CpuConfig) extends Module { 10 | val io = IO(new Bundle { 11 | val inst = Flipped(new Cache_ICache()) 12 | val data = Flipped(new Cache_DCache()) 13 | val axi = new AXI() 14 | val statistic = if (!config.build) Some(new CacheStatistic()) else None 15 | }) 16 | implicit val iCacheConfig = CacheConfig(nset = 64, nbank = 4, bankWidth = 16) 17 | implicit val dCacheConfig = CacheConfig(nset = 128, bankWidth = 4) 18 | 19 | val icache = Module(new ICache(iCacheConfig)) 20 | val dcache = Module(new DCache(dCacheConfig)) 21 | val axi_interface = Module(new CacheAXIInterface()) 22 | 23 | icache.io.axi <> axi_interface.io.icache 24 | dcache.io.axi <> axi_interface.io.dcache 25 | 26 | io.inst <> icache.io.cpu 27 | io.data <> dcache.io.cpu 28 | io.axi <> axi_interface.io.axi 29 | 30 | // ===----------------------------------------------------------------=== 31 | // statistic 32 | // ===----------------------------------------------------------------=== 33 | if (!config.build) { 34 | io.statistic.get.icache <> icache.io.statistic.get 35 | io.statistic.get.dcache <> dcache.io.statistic.get 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/memory/PortDefinitions.scala: -------------------------------------------------------------------------------- 1 | package cache.memory 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.CacheConfig 6 | 7 | class ReadOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { 8 | val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) 9 | val data = Output(gen) 10 | } 11 | 12 | class WriteOnlyPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { 13 | val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) 14 | val en = Input(Bool()) 15 | val data = Input(gen) 16 | } 17 | 18 | class WriteOnlyMaskPort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { 19 | val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) 20 | val en = Input(UInt(cacheConfig.bankWidth.W)) 21 | val data = Input(gen) 22 | } 23 | 24 | 25 | class ReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { 26 | val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) 27 | val en = Input(Bool()) 28 | val wdata = Input(gen) 29 | val rdata = Output(gen) 30 | } 31 | 32 | class MaskedReadWritePort[+T <: Data](gen: T)(implicit cacheConfig: CacheConfig) extends Bundle { 33 | val addr = Input(UInt(log2Ceil(cacheConfig.nset * cacheConfig.nbank).W)) 34 | val writeMask = Input(UInt(cacheConfig.bankWidth.W)) 35 | val wdata = Input(gen) 36 | val rdata = Output(gen) 37 | } 38 | -------------------------------------------------------------------------------- /doc/src/template.typ: -------------------------------------------------------------------------------- 1 | // The project function defines how your document looks. 2 | // It takes your content and some metadata and formats it. 3 | // Go ahead and customize it to your liking! 4 | #let project( 5 | title: "", 6 | abstract: [], 7 | authors: (), 8 | date: none, 9 | logo: none, 10 | body, 11 | ) = { 12 | // Set the document's basic properties. 13 | set document(author: authors.map(a => a.name), title: title) 14 | set page(numbering: "1", number-align: center) 15 | set text(font: "Linux Libertine", lang: "zh") 16 | 17 | // Title page. 18 | // The page can contain a logo if you pass one with `logo: "logo.png"`. 19 | v(0.6fr) 20 | if logo != none { 21 | align(right, image(logo, width: 26%)) 22 | } 23 | v(9.6fr) 24 | 25 | text(1.1em, date) 26 | v(1.2em, weak: true) 27 | text(2em, weight: 700, title) 28 | 29 | // Author information. 30 | pad( 31 | top: 0.7em, 32 | right: 20%, 33 | grid( 34 | columns: (1fr,) * calc.min(3, authors.len()), 35 | gutter: 1em, 36 | ..authors.map(author => align(start)[ 37 | *#author.name* \ 38 | #author.email 39 | ]), 40 | ), 41 | ) 42 | 43 | v(2.4fr) 44 | pagebreak() 45 | 46 | // Abstract page. 47 | v(1fr) 48 | align(center)[ 49 | #heading( 50 | outlined: false, 51 | numbering: none, 52 | text(0.85em, smallcaps[Abstract]), 53 | ) 54 | #abstract 55 | ] 56 | v(1.618fr) 57 | pagebreak() 58 | 59 | // Main body. 60 | set par(justify: true) 61 | 62 | body 63 | } -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/writeback/WriteBackStage.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.writeback 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | import cpu.pipeline.memory.Cp0Info 9 | 10 | class MemWbInst1 extends Bundle { 11 | val pc = UInt(PC_WID.W) 12 | val inst_info = new InstInfo() 13 | val rd_info = new RdInfo() 14 | val ex = new ExceptionInfo() 15 | } 16 | class MemWbInst0 extends MemWbInst1 { 17 | val cp0 = new Cp0Info() 18 | } 19 | 20 | class MemoryUnitWriteBackUnit extends Bundle { 21 | val inst0 = new MemWbInst0() 22 | val inst1 = new MemWbInst1() 23 | } 24 | class WriteBackStage(implicit val config: CpuConfig) extends Module { 25 | val io = IO(new Bundle { 26 | val ctrl = Input(new Bundle { 27 | val allow_to_go = Bool() 28 | val clear = Bool() 29 | }) 30 | val memoryUnit = Input(new MemoryUnitWriteBackUnit()) 31 | val writeBackUnit = Output(new MemoryUnitWriteBackUnit()) 32 | }) 33 | val inst0 = RegInit(0.U.asTypeOf(new MemWbInst0())) 34 | val inst1 = RegInit(0.U.asTypeOf(new MemWbInst1())) 35 | 36 | when(io.ctrl.clear(0)) { 37 | inst0 := 0.U.asTypeOf(new MemWbInst0()) 38 | inst1 := 0.U.asTypeOf(new MemWbInst1()) 39 | }.elsewhen(io.ctrl.allow_to_go) { 40 | inst0 := io.memoryUnit.inst0 41 | inst1 := io.memoryUnit.inst1 42 | } 43 | 44 | io.writeBackUnit.inst0 := inst0 45 | io.writeBackUnit.inst1 := inst1 46 | } 47 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/fetch/FetchUnit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.fetch 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | import cpu.CpuConfig 7 | 8 | class FetchUnit(implicit 9 | val config: CpuConfig, 10 | ) extends Module { 11 | val io = IO(new Bundle { 12 | val memory = new Bundle { 13 | val flush = Input(Bool()) 14 | val flush_pc = Input(UInt(PC_WID.W)) 15 | } 16 | val decoder = new Bundle { 17 | val branch = Input(Bool()) 18 | val target = Input(UInt(PC_WID.W)) 19 | } 20 | val execute = new Bundle { 21 | val branch = Input(Bool()) 22 | val target = Input(UInt(PC_WID.W)) 23 | } 24 | val instFifo = new Bundle { 25 | val full = Input(Bool()) 26 | } 27 | val iCache = new Bundle { 28 | val inst_valid = Input(Vec(config.instFetchNum, Bool())) 29 | val pc = Output(UInt(PC_WID.W)) 30 | val pc_next = Output(UInt(PC_WID.W)) 31 | } 32 | 33 | }) 34 | val pc = RegNext(io.iCache.pc_next, "h_bfc00000".U(32.W)) 35 | io.iCache.pc := pc 36 | 37 | // when inst_valid(1) is true, inst_valid(0) must be true 38 | 39 | val pc_next_temp = Wire(UInt(PC_WID.W)) 40 | 41 | pc_next_temp := pc 42 | for (i <- 0 until config.instFetchNum) { 43 | when(io.iCache.inst_valid(i)) { 44 | pc_next_temp := pc + ((i + 1) * 4).U 45 | } 46 | } 47 | 48 | io.iCache.pc_next := MuxCase( 49 | pc_next_temp, 50 | Seq( 51 | io.memory.flush -> io.memory.flush_pc, 52 | io.execute.branch -> io.execute.target, 53 | io.decoder.branch -> io.decoder.target, 54 | io.instFifo.full -> pc, 55 | ), 56 | ) 57 | } 58 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/defines/Util.scala: -------------------------------------------------------------------------------- 1 | package cpu.defines 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | object Util { 7 | def subwordModify(source: UInt, start: Int, md: UInt): UInt = { 8 | val ms = md.getWidth 9 | subwordModify(source, (start, start - ms + 1), md) 10 | } 11 | 12 | def subwordModify(source: UInt, tuple: (Int, Int), md: UInt): UInt = { 13 | val ws = source.getWidth 14 | val ms = md.getWidth 15 | val start = tuple._1 16 | val end = tuple._2 17 | require( 18 | ws > start && start >= end && end >= 0, 19 | s"ws: $ws, start: $start, end: $end" 20 | ) 21 | require(start - end == ms - 1) 22 | if (end == 0) Cat(source(ws - 1, start + 1), md) 23 | else if (start == ws - 1) Cat(md, source(end - 1, 0)) 24 | else Cat(source(ws - 1, start + 1), md, source(end - 1, 0)) 25 | } 26 | 27 | def listHasElement(list: Seq[UInt], element: UInt): Bool = { 28 | list.foldLeft(false.B)((r, e) => r || (e === element)) 29 | } 30 | 31 | def MAXnBIT(m: Int): BigInt = BigInt(1) << m 32 | 33 | def unsignedToSigned(s: BigInt, width: Int = 32): BigInt = { 34 | val m = MAXnBIT(width - 1) 35 | if (s >= m) s - 2 * m 36 | else s 37 | } 38 | 39 | def signedExtend(raw: UInt, to: Int = 32): UInt = { 40 | signedExtend(raw, raw.getWidth, to) 41 | } 42 | 43 | def signedExtend(raw: UInt, from: Int, to: Int): UInt = { 44 | require(to > from && from >= 1) 45 | Cat(Fill(to - from, raw(from - 1)), raw) 46 | } 47 | 48 | def zeroExtend(raw: UInt, to: Int = 32): UInt = { 49 | zeroExtend(raw, raw.getWidth, to) 50 | } 51 | 52 | def zeroExtend(raw: UInt, from: Int, to: Int): UInt = { 53 | require(to > from && from >= 1) 54 | Cat(Fill(to - from, 0.U), raw) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/decoder/JumpCtrl.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.decoder 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import cpu.defines._ 7 | import cpu.defines.Const._ 8 | import cpu.CpuConfig 9 | 10 | class JumpCtrl(implicit val config: CpuConfig) extends Module { 11 | val io = IO(new Bundle { 12 | val in = Input(new Bundle { 13 | val allow_to_go = Bool() 14 | val pc = UInt(PC_WID.W) 15 | val decoded_inst0 = new InstInfo() 16 | val reg1_data = UInt(DATA_WID.W) 17 | val forward = Vec(config.fuNum, new DataForwardToDecoderUnit()) 18 | }) 19 | val out = Output(new Bundle { 20 | val jump_inst = Bool() 21 | val jump_register = Bool() 22 | val jump = Bool() 23 | val jump_target = UInt(PC_WID.W) 24 | }) 25 | }) 26 | 27 | val op = io.in.decoded_inst0.op 28 | val jump_inst = VecInit(EXE_J, EXE_JAL).contains(op) 29 | val jump_register_inst = VecInit(EXE_JR, EXE_JALR).contains(op) 30 | io.out.jump_inst := jump_inst || jump_register_inst 31 | io.out.jump := io.in.allow_to_go && (jump_inst || jump_register_inst && !io.out.jump_register) 32 | io.out.jump_register := jump_register_inst && 33 | ((io.in.forward(0).exe.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(0).exe.waddr) || 34 | (io.in.forward(1).exe.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(1).exe.waddr) || 35 | (io.in.forward(0).mem.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(0).mem.waddr) || 36 | (io.in.forward(1).mem.wen && io.in.decoded_inst0.reg1_raddr === io.in.forward(1).mem.waddr)) 37 | val pc_plus_4 = io.in.pc + 4.U(PC_WID.W) 38 | io.out.jump_target := Mux( 39 | jump_inst, 40 | Cat(pc_plus_4(31, 28), io.in.decoded_inst0.inst(25, 0), 0.U(2.W)), 41 | io.in.reg1_data, 42 | ) 43 | } 44 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/CpuConfig.scala: -------------------------------------------------------------------------------- 1 | package cpu 2 | 3 | import chisel3.util._ 4 | 5 | case class CpuConfig( 6 | val build: Boolean = true, // 是否为build模式 7 | val hasCommitBuffer: Boolean = false, // 是否有提交缓存 8 | val decoderNum: Int = 2, // 同时访问寄存器的指令数 9 | val commitNum: Int = 2, // 同时提交的指令数 10 | val fuNum: Int = 2, // 功能单元数 11 | val instFetchNum: Int = 2, // iCache取到的指令数量 12 | val instFifoDepth: Int = 8, // 指令缓存深度 13 | val writeBufferDepth: Int = 16, // 写缓存深度 14 | val mulClockNum: Int = 2, // 乘法器的时钟周期数 15 | val divClockNum: Int = 8, // 除法器的时钟周期数 16 | val branchPredictor: String = "adaptive",// adaptive, pesudo, global 17 | ) 18 | 19 | case class BranchPredictorConfig( 20 | val bhtDepth: Int = 5, 21 | val phtDepth: Int = 6, 22 | ) 23 | 24 | case class CacheConfig( 25 | nway: Int = 2, // 路数 26 | nbank: Int = 8, // bank数 27 | nset: Int, 28 | bankWidth: Int, // bytes per bank 29 | ) { 30 | val config = CpuConfig() 31 | val indexWidth = log2Ceil(nset) // 6 32 | val bankIndexWidth = log2Ceil(nbank) // 3 33 | val bankOffsetWidth = log2Ceil(bankWidth) // 3 34 | val offsetWidth = bankIndexWidth + bankOffsetWidth // 6 35 | val tagWidth = 32 - indexWidth - offsetWidth // 20 36 | val tagvWidth = tagWidth + 1 // 21 37 | val bankWidthBits = bankWidth * 8 // 64 38 | val burstSize = 16 39 | val ninst = config.instFetchNum // TODO:改成可随意修改的参数 40 | require(isPow2(nset)) 41 | require(isPow2(nway)) 42 | require(isPow2(nbank)) 43 | require(isPow2(bankWidth)) 44 | require( 45 | tagWidth + indexWidth + bankIndexWidth + bankOffsetWidth == 32, 46 | "basic request calculation", 47 | ) 48 | } 49 | -------------------------------------------------------------------------------- /chisel/Makefile: -------------------------------------------------------------------------------- 1 | TEST_TOP=../../PUA-MIPS-TEST 2 | CPU_DIR=./src/main/scala/cpu 3 | PUA_MIPS=./generated/PuaMips.v 4 | MYCPU_TOP=./src/main/resources/mycpu_top.v 5 | SCORES_FILE=../SCORES.md 6 | OUTPUT=$(TEST_TOP)/verilator/output.txt 7 | 8 | verilog: 9 | @echo "make verilog" 10 | find ./generated ! -name 'mycpu_top.v' -type f -delete 11 | sbt 'runMain Elaborate' 12 | 13 | 14 | # run trace target 15 | func: 16 | $(MAKE) -C $(TEST_TOP)/verilator/ func 17 | 18 | perf: 19 | $(MAKE) -C $(TEST_TOP)/verilator/ perf 20 | 21 | score: 22 | ifeq ($(strip $(MESSAGE)),) 23 | @echo "message is empty" 24 | else 25 | @echo "\ 26 | |$(shell date +"%Y-%m-%d")\ 27 | |$(shell grep IPC $(OUTPUT) | grep -oP '\d+\.\d+')\ 28 | |$(shell grep scores $(OUTPUT) | grep -oP '\d+\.\d+')\ 29 | |$(MESSAGE)\ 30 | |$(shell grep -oE '^[0-9]+\.[0-9]{3}' $(OUTPUT) | tr '\n' '|')\ 31 | " | tee -a $(SCORES_FILE) 32 | endif 33 | 34 | perfdiff: 35 | $(MAKE) -C $(TEST_TOP)/verilator/ perfdiff 36 | 37 | count: 38 | @echo "count the lines" 39 | find . -name "*.scala" | xargs wc -l 40 | 41 | count_commit: 42 | @echo "count the commits" 43 | git log --format=oneline | wc -l 44 | 45 | test: 46 | @echo "make test" 47 | find ./generated ! -name 'mycpu_top.v' -type f -delete 48 | sbt "Test / runMain cpu.testMain" 49 | 50 | define TASK 51 | sbt "runMain Elaborate $(1)" 52 | mv ./generated/PuaMips.v ./tmp/PuaMips_$(1).v 53 | endef 54 | 55 | tmp/BranchPredictorUnit_$(BHT_DEPTH)_$(PHT_DEPTH).v: src/main/scala/cpu/pipeline/fetch/BranchPredictorConfig.scala 56 | @echo 'package cpu.pipeline.fetch' > $< 57 | @echo 'case class BranchPredictorConfig(' >> $< 58 | @echo 'val bhtDepth: Int = $(BHT_DEPTH),' >> $< 59 | @echo 'val phtDepth: Int = $(PHT_DEPTH),' >> $< 60 | @echo ')' >> $< 61 | sbt 'runMain Elaborate' 62 | cp ./generated/BranchPredictorUnit.v $@ 63 | 64 | loop:tmp/BranchPredictorUnit_$(BHT_DEPTH)_$(PHT_DEPTH).v 65 | 66 | # Makefile targets 67 | .PHONY: get run wave -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/memory/LUTRam.scala: -------------------------------------------------------------------------------- 1 | package cache.memory 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.CacheConfig 6 | import cpu.CpuConfig 7 | 8 | /** LUT ram for XPM, one port for read/write, one port for read 9 | * @param depth 10 | * how many lines there are in the bank 11 | * @param width 12 | * how wide in bits each line is 13 | * @param config 14 | * implicit configuration to control generate ram for simulation or elaboration 15 | */ 16 | class LUTRam(depth: Int, width: Int)(implicit val config: CpuConfig) extends Module { 17 | require(isPow2(depth)) 18 | val waddridth = log2Ceil(depth) 19 | val io = IO(new Bundle { 20 | val raddr = Input(UInt(waddridth.W)) 21 | val rdata = Output(UInt(width.W)) 22 | 23 | val waddr = Input(UInt(waddridth.W)) 24 | val wdata = Input(UInt(width.W)) 25 | val wen = Input(Bool()) 26 | val writeOutput = Output(UInt(width.W)) 27 | }) 28 | 29 | if (config.build) { 30 | val bank = Module( 31 | new LUTRamIP( 32 | wdataidth = width, 33 | waddridth = waddridth, 34 | byteWriteWidth = width, 35 | numberOfLines = depth, 36 | ), 37 | ) 38 | bank.io.clka := clock 39 | bank.io.clkb := clock 40 | bank.io.rsta := reset 41 | bank.io.rstb := reset 42 | 43 | bank.io.regcea := false.B 44 | bank.io.regceb := false.B 45 | bank.io.ena := true.B 46 | bank.io.enb := true.B 47 | 48 | bank.io.addra := io.waddr 49 | bank.io.wea := io.wen 50 | bank.io.dina := io.wdata 51 | io.writeOutput := DontCare 52 | 53 | bank.io.addrb := io.raddr 54 | io.rdata := bank.io.doutb 55 | } else { 56 | val bank = RegInit(VecInit(Seq.fill(depth)(0.U(width.W)))) 57 | io.rdata := bank(io.raddr) 58 | io.writeOutput := DontCare 59 | when(io.wen) { 60 | bank(io.waddr) := io.wdata 61 | }.otherwise { 62 | io.writeOutput := bank(io.waddr) 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 简体中文 | [English](./README_EN.md) 2 | 3 | NSCSCC 2023 二等奖 🏆 4 | 5 | 杭州电子科技大学 PUA 队 参赛作品 6 | 7 | # 🚀 PUA (Powerful Ultimate Architecture) MIPS 8 | 9 | [比赛总结 📖](https://clo91eaf.github.io/posts/nscscc2023/) 10 | 11 | ## 🛠️ 环境配置 12 | 13 | 本 Chisel 项目使用 sbt 进行构建. 14 | 15 | - sbt 版本为 1.8.2 16 | 17 | - scala 版本为 2.13.8 18 | 19 | - Chisel 版本为 3.5.4 20 | 21 | 在 chisel 目录下执行以下命令生成 Verilog 文件: 22 | 23 | ```shell 24 | make verilog 25 | ``` 26 | 27 | 执行完成后,你将在 chisel/generated 目录下找到 PuaMips.v Verilog 文件,而 chisel/src/main/resources/mycpu_top.v 可作为顶层文件。 28 | 29 | ## 📚 简介 30 | 31 | - 本项目在《CPU 设计实战》的基础上简单设计了一个高性能双发射六级流水 MIPS 处理器。初赛性能分 62,频率为 88MHz,IPC 为 1.25。 32 | - 具体的设计情况请参阅 doc/final-design.pdf。 33 | - 该处理器可以运行 linux6.5-rc3 最新版内核。 34 | - 该处理器所有的代码不超过 5000 行,应该为龙芯杯历史上代码量相当小的处理器。 35 | - 该处理器有较大的可改进空间,欢迎大家参考,由于时间紧张,没有对频率进行太大的优化。 36 | - 本项目已进入尾声,不再进行维护. 37 | 38 | ## 📦 资源 39 | 40 | 1. [Git 提交消息约定](https://gitee.com/help/articles/4231#article-header0) 📜 - Git 的提交规范。 41 | 2. [Chisel-template](https://github.com/freechipsproject/chisel-template) 📁 - pua-mips 中的 Chisel 项目的初始化使用了这个模板。 42 | 3. [在线汇编器](https://godbolt.org/) 💻 - 在线的编译器,用来生成对应的 MIPS 二进制文件参考。 43 | 4. [MIPS 转换器](https://www.eg.bucknell.edu/~csci320/mips_web/) 🔄 - 负责把 MIPS 指令转换为二进制,并且提供对应指令的详细说明。 44 | 5. [Chisel 环境配置](https://clo91eaf.github.io/80b5fe4ebe03/) 🛠️ 45 | 46 | ## 🧩 杂项 47 | 48 | ### 正确参考本项目的方式 49 | 50 | 1. ❌ 请不要直接复制本项目的代码。 51 | 2. ✍️ 在理解本项目模块的基础上,使用你自己的书写习惯进行代码实现,并尝试通过以下角度获得更好的代码实现: 52 | - 💬 代码的可读性。多写注释。 53 | - 🧰 代码的可维护性。解耦,模块化。本项目中除了少部分文件的代码量超过 200 行,其他文件的代码量都在 200 行以内。 54 | - 🚀 尽情释放 Chisel 语言的高度抽象能力。 55 | - 📝 撇弃软件思维,先设计,后实现。 56 | 3. 📊 通过性能计数器比较性能。永远是数据指导优化。 57 | 4. 📡 模拟器,仿真器。不要陷入波形的陷阱。不要安逸于现状。始终思考如何才能少看波形,如何获得更多的信息。 58 | 59 | ### 本项目的一些特点 60 | 61 | 你将在本项目中不可避免的看到: 62 | 63 | 1. 📝 中英文注释混杂。 64 | 2. 🎨 代码风格不统一。 65 | 3. 🤔 奇怪的命名。 66 | 4. 🧱 莫名其妙的硬编码设计。 67 | 5. 🔄 冗余的提交记录。 68 | 69 | 我们在开发中已经尽可能的规避这些问题,并且进行了多次重构。但是由于时间紧张,我们无法保证代码的完美。 70 | 71 | 如果我们的设计过程对你有启发,我们将感到很高兴。 🌟 72 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/memory/MemoryStage.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.memory 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class Cp0Info extends Bundle { 10 | val cp0_count = UInt(DATA_WID.W) 11 | val cp0_random = UInt(DATA_WID.W) 12 | val cp0_cause = UInt(DATA_WID.W) 13 | } 14 | 15 | class ExeMemInst1 extends Bundle { 16 | val pc = UInt(PC_WID.W) 17 | val inst_info = new InstInfo() 18 | val rd_info = new RdInfo() 19 | val ex = new ExceptionInfo() 20 | } 21 | 22 | class ExeMemInst0(implicit val config: CpuConfig) extends ExeMemInst1 { 23 | val cp0 = new Cp0Info() 24 | val mem = new Bundle { 25 | val en = Bool() 26 | val ren = Bool() 27 | val wen = Bool() 28 | val inst_info = new InstInfo() 29 | val addr = UInt(DATA_ADDR_WID.W) 30 | val wdata = UInt(DATA_WID.W) 31 | val sel = Vec(config.fuNum, Bool()) 32 | val llbit = Bool() 33 | } 34 | } 35 | 36 | class ExecuteUnitMemoryUnit(implicit val config: CpuConfig) extends Bundle { 37 | 38 | val inst0 = new ExeMemInst0() 39 | val inst1 = new ExeMemInst1() 40 | } 41 | 42 | class MemoryStage(implicit val config: CpuConfig) extends Module { 43 | val io = IO(new Bundle { 44 | val ctrl = Input(new Bundle { 45 | val allow_to_go = Bool() 46 | val clear = Bool() 47 | }) 48 | val executeUnit = Input(new ExecuteUnitMemoryUnit()) 49 | val memoryUnit = Output(new ExecuteUnitMemoryUnit()) 50 | }) 51 | val inst0 = RegInit(0.U.asTypeOf(new ExeMemInst0())) 52 | val inst1 = RegInit(0.U.asTypeOf(new ExeMemInst1())) 53 | 54 | when(io.ctrl.clear) { 55 | inst0 := 0.U.asTypeOf(new ExeMemInst0()) 56 | inst1 := 0.U.asTypeOf(new ExeMemInst1()) 57 | }.elsewhen(io.ctrl.allow_to_go) { 58 | inst0 := io.executeUnit.inst0 59 | inst1 := io.executeUnit.inst1 60 | } 61 | 62 | io.memoryUnit.inst0 := inst0 63 | io.memoryUnit.inst1 := inst1 64 | } 65 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/mmu/TlbL1I.scala: -------------------------------------------------------------------------------- 1 | package cpu.mmu 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | 7 | class ITLB extends Bundle { 8 | val vpn = UInt(20.W) 9 | val ppn = UInt(20.W) 10 | val uncached = Bool() 11 | val valid = Bool() 12 | } 13 | 14 | class TlbL1I extends Module { 15 | val io = IO(new Bundle { 16 | val addr = Input(UInt(32.W)) 17 | val fence = Input(Bool()) 18 | val cpu_stall = Input(Bool()) 19 | val icache_stall = Input(Bool()) 20 | val cache = new Tlb_ICache() 21 | val tlb1 = Output(new Tlb1InfoI()) 22 | val tlb2 = Flipped(new Tlb2Info()) 23 | }) 24 | val itlb = RegInit(0.U.asTypeOf(new ITLB())) 25 | val vpn = io.addr(31, 12) 26 | val direct_mapped = io.addr(31, 30) === 2.U(2.W) 27 | 28 | io.cache.uncached := Mux(direct_mapped, io.addr(29), itlb.uncached) 29 | io.cache.translation_ok := direct_mapped || (itlb.vpn === vpn && itlb.valid) 30 | io.cache.hit := io.tlb2.found && io.tlb2.entry.v(vpn(0)) 31 | io.cache.tag := Mux(direct_mapped, Cat(0.U(3.W), io.addr(28, 12)), itlb.ppn) 32 | io.cache.pa := Cat(io.cache.tag, io.addr(11, 0)) 33 | 34 | when(io.fence && !io.icache_stall && !io.cpu_stall) { itlb.valid := false.B } 35 | 36 | // * tlb1 * // 37 | val tlb1 = RegInit(0.U.asTypeOf(new Tlb1InfoI())) 38 | tlb1 <> io.tlb1 39 | 40 | io.tlb2.vpn2 := vpn(19, 1) 41 | 42 | when(io.cache.fill) { 43 | when(io.tlb2.found) { 44 | when(io.tlb2.entry.v(vpn(0))) { 45 | itlb.vpn := vpn 46 | itlb.ppn := io.tlb2.entry.pfn(vpn(0)) 47 | itlb.uncached := !io.tlb2.entry.c(vpn(0)) 48 | itlb.valid := true.B 49 | }.otherwise { 50 | tlb1.invalid := true.B 51 | } 52 | }.otherwise { 53 | tlb1.refill := true.B 54 | } 55 | }.elsewhen(io.cache.icache_is_save && !io.cpu_stall && !io.icache_stall) { 56 | tlb1.invalid := false.B 57 | tlb1.refill := false.B 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/decoder/ForwardCtrl.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.decoder 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import cpu.defines._ 7 | import cpu.defines.Const._ 8 | import cpu.CpuConfig 9 | 10 | class ForwardCtrl(implicit val config: CpuConfig) extends Module { 11 | val io = IO(new Bundle { 12 | val in = Input(new Bundle { 13 | val forward = Vec(config.fuNum, new DataForwardToDecoderUnit()) 14 | val regfile = Vec(config.decoderNum, new Src12Read()) 15 | }) 16 | val out = Output(new Bundle { 17 | val inst = Vec(config.decoderNum, new Src12Read()) 18 | }) 19 | }) 20 | 21 | // wb优先度最低 22 | for (i <- 0 until (config.decoderNum)) { 23 | io.out.inst(i).src1.raddr := DontCare 24 | io.out.inst(i).src2.raddr := DontCare 25 | io.out.inst(i).src1.rdata := io.in.regfile(i).src1.rdata 26 | io.out.inst(i).src2.rdata := io.in.regfile(i).src2.rdata 27 | } 28 | 29 | // mem优先度中 30 | for (i <- 0 until (config.decoderNum)) { 31 | for (j <- 0 until (config.fuNum)) { 32 | when( 33 | io.in.forward(j).mem.wen && 34 | io.in.forward(j).mem.waddr === io.in.regfile(i).src1.raddr, 35 | ) { 36 | io.out.inst(i).src1.rdata := io.in.forward(j).mem.wdata 37 | } 38 | when( 39 | io.in.forward(j).mem.wen && 40 | io.in.forward(j).mem.waddr === io.in.regfile(i).src2.raddr, 41 | ) { 42 | io.out.inst(i).src2.rdata := io.in.forward(j).mem.wdata 43 | } 44 | } 45 | } 46 | 47 | // exe优先度高 48 | for (i <- 0 until (config.decoderNum)) { 49 | for (j <- 0 until (config.fuNum)) { 50 | when( 51 | io.in.forward(j).exe.wen && !io.in.forward(j).mem_wreg && 52 | io.in.forward(j).exe.waddr === io.in.regfile(i).src1.raddr, 53 | ) { 54 | io.out.inst(i).src1.rdata := io.in.forward(j).exe.wdata 55 | } 56 | when( 57 | io.in.forward(j).exe.wen && !io.in.forward(j).mem_wreg && 58 | io.in.forward(j).exe.waddr === io.in.regfile(i).src2.raddr, 59 | ) { 60 | io.out.inst(i).src2.rdata := io.in.forward(j).exe.wdata 61 | } 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/ExecuteStage.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.{CpuConfig, BranchPredictorConfig} 8 | 9 | class IdExeInst0 extends Bundle { 10 | val config = new BranchPredictorConfig() 11 | val pc = UInt(PC_WID.W) 12 | val inst_info = new InstInfo() 13 | val src_info = new SrcInfo() 14 | val ex = new ExceptionInfo() 15 | val jb_info = new Bundle { 16 | // jump ctrl 17 | val jump_regiser = Bool() 18 | // bpu 19 | val branch_inst = Bool() 20 | val pred_branch = Bool() 21 | val branch_target = UInt(PC_WID.W) 22 | val update_pht_index = UInt(config.phtDepth.W) 23 | } 24 | } 25 | 26 | class IdExeInst1 extends Bundle { 27 | val allow_to_go = Bool() 28 | val pc = UInt(PC_WID.W) 29 | val inst_info = new InstInfo() 30 | val src_info = new SrcInfo() 31 | val ex = new ExceptionInfo() 32 | } 33 | 34 | class DecoderUnitExecuteUnit extends Bundle { 35 | val inst0 = new IdExeInst0() 36 | val inst1 = new IdExeInst1() 37 | } 38 | 39 | class ExecuteStage(implicit val config: CpuConfig) extends Module { 40 | val io = IO(new Bundle { 41 | val ctrl = Input(new Bundle { 42 | val inst0_allow_to_go = Bool() 43 | val clear = Vec(config.decoderNum, Bool()) 44 | }) 45 | val decoderUnit = Input(new DecoderUnitExecuteUnit()) 46 | val executeUnit = Output(new DecoderUnitExecuteUnit()) 47 | }) 48 | 49 | val inst0 = RegInit(0.U.asTypeOf(new IdExeInst0())) 50 | val inst1 = RegInit(0.U.asTypeOf(new IdExeInst1())) 51 | 52 | when(io.ctrl.clear(0)) { 53 | inst0 := 0.U.asTypeOf(new IdExeInst0()) 54 | }.elsewhen(io.ctrl.inst0_allow_to_go) { 55 | inst0 := io.decoderUnit.inst0 56 | } 57 | 58 | when(io.ctrl.clear(1)) { 59 | inst1 := 0.U.asTypeOf(new IdExeInst1()) 60 | }.elsewhen(io.decoderUnit.inst1.allow_to_go) { 61 | inst1 := io.decoderUnit.inst1 62 | } 63 | 64 | io.executeUnit.inst0 := inst0 65 | io.executeUnit.inst1 := inst1 66 | } 67 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/memory/LUTRamIP.scala: -------------------------------------------------------------------------------- 1 | package cache.memory 2 | 3 | import chisel3._ 4 | import chisel3.util.log2Ceil 5 | 6 | /** XPM 2019.2 XPM_MEMORY_DPDISTRAM, at page 124 of UG953(2019.2) by default, this is initialized to 7 | * all 0 8 | * 9 | * @param wdataidth 10 | * : the size of the data to store in each line, in bits 11 | * @param waddridth 12 | * : the width of request 13 | * @param byteWriteWidth 14 | * : addressable size of write 15 | * @param numberOfLines 16 | * : how many **bits** there are in the memory 17 | */ 18 | class LUTRamIP(wdataidth: Int, waddridth: Int, byteWriteWidth: Int, numberOfLines: Int) 19 | extends BlackBox( 20 | Map( 21 | "ADDR_WIDTH_A" -> waddridth, 22 | "ADDR_WIDTH_B" -> waddridth, 23 | "MEMORY_SIZE" -> numberOfLines * wdataidth, 24 | "WRITE_DATA_WIDTH_A" -> wdataidth, 25 | "READ_DATA_WIDTH_A" -> wdataidth, 26 | "READ_DATA_WIDTH_B" -> wdataidth, 27 | "BYTE_WRITE_WIDTH_A" -> byteWriteWidth, 28 | "READ_LATENCY_A" -> 0, 29 | "READ_LATENCY_B" -> 0, 30 | "READ_RESET_VALUE_A" -> 0, 31 | "READ_RESET_VALUE_B" -> 0, 32 | "CLOCKING_MODE" -> "common_clock", 33 | ), 34 | ) { 35 | override def desiredName: String = "xpm_memory_dpdistram" 36 | require( 37 | waddridth == log2Ceil(numberOfLines), 38 | "request width should be log 2 of number of lines to request all", 39 | ) 40 | require( 41 | wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0, 42 | "data width should be a multiple of byte write width", 43 | ) 44 | require(waddridth <= 20, "request width should be 1 to 20") 45 | val io = IO(new Bundle { 46 | val clka = Input(Clock()) 47 | val clkb = Input(Clock()) 48 | val rsta = Input(Reset()) 49 | val rstb = Input(Reset()) 50 | 51 | val ena = Input(Bool()) 52 | val enb = Input(Bool()) 53 | val regcea = Input(Bool()) 54 | val regceb = Input(Bool()) 55 | 56 | val dina = Input(UInt(wdataidth.W)) 57 | val addra = Input(UInt(waddridth.W)) 58 | val addrb = Input(UInt(waddridth.W)) 59 | 60 | val wea = Input(UInt((wdataidth / byteWriteWidth).W)) 61 | 62 | val douta = Output(UInt(wdataidth.W)) 63 | val doutb = Output(UInt(wdataidth.W)) 64 | }) 65 | } 66 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/decoder/ARegfile.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.decoder 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class SrcRead extends Bundle { 10 | val raddr = Output(UInt(REG_ADDR_WID.W)) 11 | val rdata = Input(UInt(DATA_WID.W)) 12 | } 13 | 14 | class Src12Read extends Bundle { 15 | val src1 = new SrcRead() 16 | val src2 = new SrcRead() 17 | } 18 | 19 | class RegWrite extends Bundle { 20 | val wen = Output(Bool()) 21 | val waddr = Output(UInt(REG_ADDR_WID.W)) 22 | val wdata = Output(UInt(DATA_WID.W)) 23 | } 24 | 25 | class ARegFile(implicit val config: CpuConfig) extends Module { 26 | val io = IO(new Bundle { 27 | val read = Flipped(Vec(config.decoderNum, new Src12Read())) 28 | val write = Flipped(Vec(config.commitNum, new RegWrite())) 29 | val bpu = if (config.branchPredictor == "pesudo") Some(Flipped(new Src12Read())) else None 30 | }) 31 | 32 | // 定义32个32位寄存器 33 | val regs = RegInit(VecInit(Seq.fill(AREG_NUM)(0.U(DATA_WID.W)))) 34 | 35 | // 写寄存器堆 36 | for (i <- 0 until (config.commitNum)) { 37 | when(io.write(i).wen && io.write(i).waddr =/= 0.U) { 38 | regs(io.write(i).waddr) := io.write(i).wdata 39 | } 40 | } 41 | 42 | // 读寄存器堆 43 | for (i <- 0 until (config.decoderNum)) { 44 | // src1 45 | when(io.read(i).src1.raddr === 0.U) { 46 | io.read(i).src1.rdata := 0.U 47 | }.otherwise { 48 | io.read(i).src1.rdata := regs(io.read(i).src1.raddr) 49 | for (j <- 0 until (config.commitNum)) { 50 | when(io.write(j).wen && io.read(i).src1.raddr === io.write(j).waddr) { 51 | io.read(i).src1.rdata := io.write(j).wdata 52 | } 53 | } 54 | } 55 | // src2 56 | when(io.read(i).src2.raddr === 0.U) { 57 | io.read(i).src2.rdata := 0.U 58 | }.otherwise { 59 | io.read(i).src2.rdata := regs(io.read(i).src2.raddr) 60 | for (j <- 0 until (config.commitNum)) { 61 | when(io.write(j).wen && io.read(i).src2.raddr === io.write(j).waddr) { 62 | io.read(i).src2.rdata := io.write(j).wdata 63 | } 64 | } 65 | } 66 | } 67 | 68 | if (config.branchPredictor == "pesudo") { 69 | io.bpu.get.src1.rdata := regs(io.bpu.get.src1.raddr) 70 | io.bpu.get.src2.rdata := regs(io.bpu.get.src2.raddr) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/ctrl/Ctrl.scala: -------------------------------------------------------------------------------- 1 | package cpu.ctrl 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class Ctrl(implicit val config: CpuConfig) extends Module { 10 | val io = IO(new Bundle { 11 | val cacheCtrl = Flipped(new CacheCtrl()) 12 | val fetchUnit = Flipped(new FetchUnitCtrl()) 13 | val instFifo = Flipped(new InstFifoCtrl()) 14 | val decoderUnit = Flipped(new DecoderUnitCtrl()) 15 | val executeUnit = Flipped(new ExecuteCtrl()) 16 | val memoryUnit = Flipped(new MemoryCtrl()) 17 | val writeBackUnit = Flipped(new WriteBackCtrl()) 18 | }) 19 | 20 | val inst0_lw_stall = (io.executeUnit.inst(0).mem_wreg) && 21 | (io.decoderUnit.inst0.src1.ren && io.decoderUnit.inst0.src1.raddr === io.executeUnit.inst(0).reg_waddr || 22 | io.decoderUnit.inst0.src2.ren && io.decoderUnit.inst0.src2.raddr === io.executeUnit.inst(0).reg_waddr) 23 | val inst1_lw_stall = (io.executeUnit.inst(1).mem_wreg) && 24 | (io.decoderUnit.inst0.src1.ren && io.decoderUnit.inst0.src1.raddr === io.executeUnit.inst(1).reg_waddr || 25 | io.decoderUnit.inst0.src2.ren && io.decoderUnit.inst0.src2.raddr === io.executeUnit.inst(1).reg_waddr) 26 | val lw_stall = inst0_lw_stall || inst1_lw_stall 27 | // TODO: 这里的stall信号可能不对 28 | val longest_stall = io.executeUnit.fu_stall || io.cacheCtrl.iCache_stall || io.cacheCtrl.dCache_stall 29 | 30 | io.fetchUnit.allow_to_go := !io.cacheCtrl.iCache_stall 31 | io.decoderUnit.allow_to_go := !(lw_stall || longest_stall) 32 | io.executeUnit.allow_to_go := !longest_stall 33 | io.memoryUnit.allow_to_go := !longest_stall 34 | io.writeBackUnit.allow_to_go := !longest_stall || io.memoryUnit.flush_req 35 | 36 | io.fetchUnit.do_flush := false.B 37 | io.decoderUnit.do_flush := io.memoryUnit.flush_req || io.executeUnit.branch || io.decoderUnit.branch 38 | io.executeUnit.do_flush := io.memoryUnit.flush_req || io.executeUnit.branch 39 | io.memoryUnit.do_flush := io.memoryUnit.flush_req 40 | io.writeBackUnit.do_flush := false.B 41 | 42 | io.instFifo.delay_slot_do_flush := io.memoryUnit.flush_req 43 | 44 | io.executeUnit.fu.do_flush := io.memoryUnit.do_flush 45 | io.executeUnit.fu.eret := io.memoryUnit.eret 46 | io.executeUnit.fu.allow_to_go := io.memoryUnit.allow_to_go 47 | 48 | } 49 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/memory/SimpleDualPortRamIP.scala: -------------------------------------------------------------------------------- 1 | package cache.memory 2 | 3 | import chisel3._ 4 | import chisel3.util.log2Ceil 5 | 6 | /** simple dual port ram 7 | * 8 | * @param wdataidth 9 | * : width of every data line 10 | * @param byteWriteWidth 11 | * : how many bits to write per mask 12 | * @param numberOfLines 13 | * : how many lines of data are in the ram 14 | * @param waddridth 15 | * : how wide is the request (to cover all lines) 16 | * @param memoryPrimitive 17 | * : should I use auto, block ram or distributed ram 18 | */ 19 | class SimpleDualPortRamIP( 20 | wdataidth: Int = 32, 21 | byteWriteWidth: Int = 8, 22 | numberOfLines: Int, 23 | waddridth: Int, 24 | memoryPrimitive: String = "block", 25 | ) extends BlackBox( 26 | Map( 27 | "ADDR_WIDTH_A" -> waddridth, 28 | "ADDR_WIDTH_B" -> waddridth, 29 | "WRITE_DATA_WIDTH_A" -> wdataidth, 30 | "READ_DATA_WIDTH_B" -> wdataidth, 31 | "BYTE_WRITE_WIDTH_A" -> byteWriteWidth, 32 | "CLOCKING_MODE" -> "common_clock", 33 | "READ_LATENCY_B" -> 1, 34 | "MEMORY_SIZE" -> numberOfLines * wdataidth, 35 | "MEMORY_PRIMITIVE" -> memoryPrimitive, 36 | ), 37 | ) { 38 | override def desiredName: String = "xpm_memory_sdpram" 39 | require(waddridth <= 20, "request width should be 1 to 20") 40 | require( 41 | wdataidth - (wdataidth / byteWriteWidth) * byteWriteWidth == 0, 42 | "data width should be a multiple of byte write width", 43 | ) 44 | require( 45 | List("auto", "block", "distributed", "ultra").contains(memoryPrimitive), 46 | "memory primitive should be auto, block ram, dist ram or ultra ram", 47 | ) 48 | require( 49 | waddridth == log2Ceil(numberOfLines), 50 | "request width should be log 2 of number of lines to request all", 51 | ) 52 | val io = IO(new Bundle { 53 | // clock and reset 54 | val clka = Input(Clock()) 55 | val clkb = Input(Clock()) 56 | val rstb = Input(Reset()) 57 | 58 | val addra = Input(UInt(waddridth.W)) 59 | val dina = Input(UInt(wdataidth.W)) 60 | val ena = Input(Bool()) 61 | val wea = Input(UInt((wdataidth / byteWriteWidth).W)) 62 | 63 | val addrb = Input(UInt(waddridth.W)) 64 | val enb = Input(Bool()) 65 | val regceb = Input(Bool()) 66 | val doutb = Output(UInt(wdataidth.W)) 67 | }) 68 | } 69 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/mmu/TlbL2.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.TlbEntry 7 | import cpu.defines.Const._ 8 | 9 | class TlbL2 extends Module { 10 | val io = IO(new Bundle { 11 | val in = Input(new Bundle { 12 | val write = new Bundle { 13 | val en = Bool() 14 | val index = UInt(log2Ceil(TLB_NUM).W) 15 | val entry = new TlbEntry() 16 | } 17 | val read = new Bundle { 18 | val index = UInt(log2Ceil(TLB_NUM).W) 19 | } 20 | val entry_hi = new Bundle { 21 | val vpn2 = UInt(VPN2_WID.W) 22 | val asid = UInt(ASID_WID.W) 23 | } 24 | val tlb1_vpn2 = UInt(VPN2_WID.W) 25 | val tlb2_vpn2 = UInt(VPN2_WID.W) 26 | }) 27 | val out = Output(new Bundle { 28 | val read = new Bundle { 29 | val entry = new TlbEntry() 30 | } 31 | val tlb1_found = Bool() 32 | val tlb2_found = Bool() 33 | val tlb1_entry = new TlbEntry() 34 | val tlb2_entry = new TlbEntry() 35 | val tlb_found = Bool() 36 | val tlb_match_index = UInt(log2Ceil(TLB_NUM).W) 37 | }) 38 | }) 39 | // tlb l2 40 | val tlb_l2 = RegInit(VecInit(Seq.fill(TLB_NUM)(0.U.asTypeOf(new TlbEntry())))) 41 | 42 | val tlb_match = Seq.fill(3)(Wire(Vec(TLB_NUM, Bool()))) 43 | val tlb_find_vpn2 = Wire(Vec(3, UInt(VPN2_WID.W))) 44 | val tlb_match_index = Wire(Vec(3, UInt(log2Ceil(TLB_NUM).W))) 45 | 46 | tlb_find_vpn2(0) := io.in.entry_hi.vpn2 47 | tlb_find_vpn2(1) := io.in.tlb1_vpn2 48 | tlb_find_vpn2(2) := io.in.tlb2_vpn2 49 | 50 | io.out.tlb1_found := tlb_match(1).asUInt.orR 51 | io.out.tlb2_found := tlb_match(2).asUInt.orR 52 | io.out.tlb1_entry := tlb_l2(tlb_match_index(1)) 53 | io.out.tlb2_entry := tlb_l2(tlb_match_index(2)) 54 | io.out.tlb_found := tlb_match(0).asUInt.orR 55 | io.out.tlb_match_index := tlb_match_index(0) 56 | io.out.read.entry := tlb_l2(io.in.read.index) 57 | 58 | for (i <- 0 until (3)) { 59 | for (j <- 0 until (TLB_NUM)) { 60 | tlb_match(i)(j) := (tlb_l2(j).g || tlb_l2(j).asid === io.in.entry_hi.asid) && 61 | (tlb_l2(j).vpn2 === tlb_find_vpn2(i)) 62 | } 63 | tlb_match_index(i) := PriorityEncoder(tlb_match(i)) 64 | } 65 | 66 | when(io.in.write.en) { 67 | tlb_l2(io.in.write.index) := io.in.write.entry 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/mmu/TlbL1D.scala: -------------------------------------------------------------------------------- 1 | package cpu.mmu 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | 7 | class DTLB extends ITLB { 8 | val dirty = Bool() 9 | } 10 | 11 | class TlbL1D extends Module { 12 | val io = IO(new Bundle { 13 | val cache = new Tlb_DCache() 14 | val fence = Input(Bool()) 15 | val cpu_stall = Input(Bool()) 16 | val dcache_stall = Input(Bool()) 17 | val addr = Input(UInt(32.W)) 18 | 19 | val mem_en = Input(Bool()) 20 | val mem_write = Input(Bool()) 21 | 22 | val tlb1 = Output(new Tlb1InfoD()) 23 | val tlb2 = Flipped(new Tlb2Info()) 24 | }) 25 | val dtlb = RegInit(0.U.asTypeOf(new DTLB())) 26 | val vpn = io.addr(31, 12) 27 | val direct_mapped = io.addr(31, 30) === 2.U(2.W) 28 | 29 | io.cache.uncached := Mux(direct_mapped, io.addr(29), dtlb.uncached) 30 | io.cache.translation_ok := direct_mapped || (dtlb.vpn === vpn && dtlb.valid && (!io.mem_write || dtlb.dirty)) 31 | 32 | io.cache.tag := Mux(direct_mapped, Cat(0.U(3.W), io.addr(28, 12)), dtlb.ppn) 33 | io.cache.pa := Cat(io.cache.tag, io.addr(11, 0)) 34 | io.cache.tlb1_ok := dtlb.vpn === vpn && dtlb.valid 35 | io.cache.hit := io.cache.fill && io.tlb2.found && io.tlb2.entry.v(vpn(0)) 36 | 37 | when(io.fence) { dtlb.valid := false.B } 38 | 39 | val tlb1 = RegInit(0.U.asTypeOf(new Tlb1InfoD())) 40 | io.tlb1 <> tlb1 41 | 42 | val tlb2 = RegInit(0.U.asTypeOf(new Bundle { val vpn2 = UInt(19.W) })) 43 | io.tlb2.vpn2 <> tlb2.vpn2 44 | 45 | when(io.cache.dcache_is_idle && !io.cache.fill && io.mem_en && !io.cache.translation_ok) { 46 | when(io.cache.tlb1_ok) { 47 | tlb1.modify := true.B 48 | }.otherwise { 49 | tlb2.vpn2 := vpn(19, 1) 50 | } 51 | }.elsewhen(io.cache.fill) { 52 | when(io.tlb2.found) { 53 | when(io.tlb2.entry.v(vpn(0))) { 54 | dtlb.vpn := vpn 55 | dtlb.ppn := io.tlb2.entry.pfn(vpn(0)) 56 | dtlb.uncached := !io.tlb2.entry.c(vpn(0)) 57 | dtlb.dirty := io.tlb2.entry.d(vpn(0)) 58 | dtlb.valid := true.B 59 | }.otherwise { 60 | tlb1.invalid := true.B 61 | } 62 | }.otherwise { 63 | tlb1.refill := true.B 64 | } 65 | }.elsewhen(io.cache.dcache_is_save && !io.cpu_stall && !io.dcache_stall) { 66 | tlb1.invalid := false.B 67 | tlb1.refill := false.B 68 | tlb1.modify := false.B 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/decoder/Issue.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.decoder 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class Issue(implicit val config: CpuConfig) extends Module { 10 | val io = IO(new Bundle { 11 | // 输入 12 | val allow_to_go = Input(Bool()) 13 | val instFifo = Input(new Bundle { 14 | val empty = Bool() 15 | val almost_empty = Bool() 16 | }) 17 | val decodeInst = Input(Vec(config.decoderNum, new InstInfo())) 18 | val execute = Input(Vec(config.fuNum, new MemRead())) 19 | // 输出 20 | val inst1 = Output(new Bundle { 21 | val is_in_delayslot = Bool() 22 | val allow_to_go = Bool() 23 | }) 24 | }) 25 | 26 | val inst0 = io.decodeInst(0) 27 | val inst1 = io.decodeInst(1) 28 | 29 | // inst buffer是否存有至少2条指令 30 | val instFifo_invalid = io.instFifo.empty || io.instFifo.almost_empty 31 | 32 | // 结构冲突 33 | val mem_conflict = inst0.fusel === FU_MEM && inst1.fusel === FU_MEM 34 | val mul_conflict = inst0.fusel === FU_MUL && inst1.fusel === FU_MUL 35 | val div_conflict = inst0.fusel === FU_DIV && inst1.fusel === FU_DIV 36 | val struct_conflict = mem_conflict || mul_conflict || div_conflict 37 | 38 | // 写后读冲突 39 | val load_stall = 40 | io.execute(0).mem_wreg && (inst1.reg1_ren && inst1.reg1_raddr === io.execute(0).reg_waddr || 41 | inst1.reg2_ren && inst1.reg2_raddr === io.execute(0).reg_waddr) || 42 | io.execute(1).mem_wreg && (inst1.reg1_ren && inst1.reg1_raddr === io.execute(1).reg_waddr || 43 | inst1.reg2_ren && inst1.reg2_raddr === io.execute(1).reg_waddr) 44 | val raw_reg = 45 | inst0.reg_wen && (inst0.reg_waddr === inst1.reg1_raddr && inst1.reg1_ren || inst0.reg_waddr === inst1.reg2_raddr && inst1.reg2_ren) 46 | val raw_hilo = VecInit(FU_DIV, FU_MUL, FU_MTHILO).contains(inst0.fusel) && 47 | VecInit(FU_DIV, FU_MUL, FU_MFHILO, FU_MTHILO).contains(inst1.fusel) 48 | val raw_cp0 = 49 | inst0.op === EXE_MTC0 && inst1.op === EXE_MFC0 && inst0.cp0_addr === inst1.cp0_addr 50 | val data_conflict = raw_reg || raw_hilo || raw_cp0 || load_stall 51 | 52 | // 指令1是否在延迟槽中 53 | io.inst1.is_in_delayslot := inst0.fusel === FU_BR && io.inst1.allow_to_go 54 | // 指令1是否允许执行 55 | io.inst1.allow_to_go := io.allow_to_go && 56 | !instFifo_invalid && 57 | inst0.dual_issue && 58 | inst1.dual_issue && 59 | !struct_conflict && 60 | !data_conflict && 61 | !VecInit(FU_BR, FU_EX).contains(io.decodeInst(1).fusel) 62 | } 63 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/writeback/CommitBuffer.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.writeback 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | class CommitBuffer( 7 | depth: Int = 32, 8 | ) extends Module { 9 | val io = IO(new Bundle { 10 | val flush = Input(Bool()) 11 | val enq = Flipped( 12 | Vec( 13 | 2, 14 | new Bundle { 15 | val wb_pc = Output(UInt(32.W)) 16 | val wb_rf_wen = Output(UInt(4.W)) 17 | val wb_rf_wnum = Output(UInt(5.W)) 18 | val wb_rf_wdata = Output(UInt(32.W)) 19 | }, 20 | ), 21 | ) 22 | val deq = new Bundle { 23 | val wb_pc = Output(UInt(32.W)) 24 | val wb_rf_wen = Output(UInt(4.W)) 25 | val wb_rf_wnum = Output(UInt(5.W)) 26 | val wb_rf_wdata = Output(UInt(32.W)) 27 | } 28 | }) 29 | 30 | val ram = RegInit(VecInit(Seq.fill(depth)(0.U.asTypeOf(new Bundle { 31 | val wb_pc = UInt(32.W) 32 | val wb_rf_wen = UInt(4.W) 33 | val wb_rf_wnum = UInt(5.W) 34 | val wb_rf_wdata = UInt(32.W) 35 | })))) 36 | val enq_ptr = RegInit(0.U(log2Ceil(depth).W)) 37 | val deq_ptr = RegInit(0.U(log2Ceil(depth).W)) 38 | val maybe_full = RegInit(false.B) 39 | val ptr_match = enq_ptr === deq_ptr 40 | val empty = ptr_match && !maybe_full 41 | val full = ptr_match && maybe_full 42 | val do_enq = Wire(Vec(2, Bool())) 43 | val do_deq = WireDefault(io.deq.wb_rf_wen.orR) 44 | 45 | for { i <- 0 until 2 } { 46 | do_enq(i) := io.enq(i).wb_rf_wen.orR 47 | } 48 | 49 | val next_enq_ptr = MuxCase( 50 | enq_ptr, 51 | Seq( 52 | io.flush -> 0.U, 53 | (do_enq(0) && do_enq(1)) -> (enq_ptr + 2.U), 54 | (do_enq(0) || do_enq(1)) -> (enq_ptr + 1.U), 55 | ), 56 | ) 57 | 58 | when(do_enq(0)) { 59 | ram(enq_ptr) := io.enq(0) 60 | } 61 | 62 | val enq1_ptr = Mux(do_enq(0), enq_ptr + 1.U, enq_ptr) 63 | when(do_enq(1)) { 64 | ram(enq1_ptr) := io.enq(1) 65 | } 66 | 67 | val next_deq_ptr = 68 | Mux(do_deq, deq_ptr + 1.U, deq_ptr) 69 | 70 | when(do_enq(0) =/= do_deq) { 71 | maybe_full := do_enq(0) 72 | } 73 | 74 | when(do_enq(1)) { 75 | maybe_full := do_enq(1) 76 | } 77 | 78 | when(io.flush) { 79 | enq_ptr := 0.U 80 | deq_ptr := 0.U 81 | maybe_full := false.B 82 | }.otherwise { 83 | enq_ptr := next_enq_ptr 84 | deq_ptr := next_deq_ptr 85 | } 86 | 87 | when(do_deq) { 88 | ram(deq_ptr).wb_rf_wen := 0.U 89 | } 90 | 91 | when(empty) { 92 | do_deq := false.B 93 | io.deq := DontCare 94 | io.deq.wb_rf_wen := 0.U 95 | }.otherwise { 96 | io.deq := ram(deq_ptr) 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/memory/SimpleDualPortRam.scala: -------------------------------------------------------------------------------- 1 | package cache.memory 2 | 3 | import chisel3._ 4 | import chisel3.stage.{ChiselGeneratorAnnotation, ChiselStage} 5 | import chisel3.util._ 6 | import cpu.CpuConfig 7 | import firrtl.options.TargetDirAnnotation 8 | 9 | /** simple dual port ram, with a port for reading and a port for writing 10 | * 11 | * @param depth 12 | * how many lines there are in the ram 13 | * @param width 14 | * how wide in bits each line is 15 | * @param byteAddressable 16 | * is it byte addressable? 17 | * @param cpuCfg 18 | * the implicit configuration for simulation and elaboration 19 | */ 20 | class SimpleDualPortRam(depth: Int, width: Int, byteAddressable: Boolean)(implicit 21 | val config: CpuConfig, 22 | ) extends Module { 23 | require(isPow2(depth)) 24 | require( 25 | width % 8 == 0 || !byteAddressable, 26 | "if memory is byte addressable, then the adderss width must be a multiple of 8", 27 | ) 28 | val waddridth = log2Ceil(depth) 29 | 30 | val io = IO(new Bundle { 31 | val raddr = Input(UInt(waddridth.W)) 32 | val ren = Input(Bool()) 33 | val rdata = Output(UInt(width.W)) 34 | 35 | val waddr = Input(UInt(waddridth.W)) 36 | val wen = Input(Bool()) 37 | val wstrb = Input(UInt((if (byteAddressable) width / 8 else 1).W)) 38 | val wdata = Input(UInt(width.W)) 39 | }) 40 | 41 | if (config.build) { 42 | val memory = Module( 43 | new SimpleDualPortRamIP( 44 | wdataidth = width, 45 | byteWriteWidth = if (byteAddressable) 8 else width, 46 | numberOfLines = depth, 47 | waddridth = waddridth, 48 | ), 49 | ) 50 | memory.io.clka := clock 51 | memory.io.clkb := clock 52 | memory.io.rstb := reset 53 | 54 | memory.io.addra := io.waddr 55 | memory.io.ena := io.wen 56 | memory.io.dina := io.wdata 57 | memory.io.wea := io.wstrb 58 | 59 | memory.io.addrb := io.raddr 60 | memory.io.enb := io.ren 61 | memory.io.regceb := false.B 62 | io.rdata := memory.io.doutb 63 | } else { 64 | assert( 65 | io.wstrb.orR || !io.wen, 66 | "when write port enable is high, write vector cannot be all 0", 67 | ) 68 | if (byteAddressable) { 69 | val bank = SyncReadMem(depth, Vec(width / 8, UInt(8.W))) 70 | when(io.ren) { 71 | io.rdata := bank(io.raddr).asTypeOf(io.rdata) 72 | }.otherwise { 73 | io.rdata := DontCare 74 | } 75 | when(io.wen) { 76 | bank.write(io.waddr, io.wdata.asTypeOf(Vec(width / 8, UInt(8.W))), io.wstrb.asBools) 77 | } 78 | } else { 79 | val bank = SyncReadMem(depth, UInt(width.W)) 80 | 81 | when(io.ren) { 82 | io.rdata := bank.read(io.raddr) 83 | }.otherwise { 84 | io.rdata := 0.U(32.W) 85 | } 86 | 87 | when(io.wen) { 88 | bank.write(io.waddr, io.wdata) 89 | } 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/fetch/PreDecoder.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.fetch 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | import cpu.CpuConfig 7 | import cpu.pipeline.fetch.BufferUnit 8 | 9 | class BufferEnq extends Bundle { 10 | val valid = Bool() 11 | val jump_branch_inst = Bool() 12 | val op = UInt(OP_WID.W) 13 | val is_in_delayslot = Bool() 14 | 15 | val tlb = new Bundle { 16 | val refill = Bool() 17 | val invalid = Bool() 18 | } 19 | val inst = UInt(32.W) 20 | val pc = UInt(32.W) 21 | } 22 | 23 | class PreDecoder(implicit val config: CpuConfig) extends Module { 24 | val io = IO(new Bundle { 25 | val flush = Input(Bool()) 26 | 27 | val full = new Bundle { 28 | val fromInstFifo = Input(Bool()) 29 | val toIcache = Output(Bool()) 30 | } 31 | val read = Output(Vec(config.instFetchNum, new BufferEnq())) 32 | 33 | val wen = Input(Vec(config.instFetchNum, Bool())) 34 | val write = Input(Vec(config.instFetchNum, new BufferUnit())) 35 | }) 36 | 37 | val buffer = RegInit(VecInit(Seq.fill(config.instFetchNum)(0.U.asTypeOf(new BufferEnq())))) 38 | 39 | for (i <- 0 until config.instFetchNum) { 40 | when(io.wen(i) && !io.full.fromInstFifo) { 41 | buffer(i).tlb.refill := io.write(i).tlb.refill 42 | buffer(i).tlb.invalid := io.write(i).tlb.invalid 43 | buffer(i).inst := io.write(i).inst 44 | buffer(i).pc := io.write(i).pc 45 | } 46 | when(!io.full.fromInstFifo) { 47 | buffer(i).valid := io.wen(i) 48 | } 49 | } 50 | io.full.toIcache := io.full.fromInstFifo 51 | 52 | for (i <- 0 until config.instFetchNum) { 53 | val signals: List[UInt] = ListLookup( 54 | buffer(i).inst, 55 | List(EXE_NOP, false.B), 56 | Array( // 跳转指令 57 | J -> List(EXE_J, true.B), 58 | JAL -> List(EXE_JAL, true.B), 59 | JR -> List(EXE_JR, true.B), 60 | JALR -> List(EXE_JALR, true.B), 61 | BEQ -> List(EXE_BEQ, true.B), 62 | BNE -> List(EXE_BNE, true.B), 63 | BGTZ -> List(EXE_BGTZ, true.B), 64 | BLEZ -> List(EXE_BLEZ, true.B), 65 | BGEZ -> List(EXE_BGEZ, true.B), 66 | BGEZAL -> List(EXE_BGEZAL, true.B), 67 | BLTZ -> List(EXE_BLTZ, true.B), 68 | BLTZAL -> List(EXE_BLTZAL, true.B), 69 | ), 70 | ) 71 | val op :: jump_branch_inst :: Nil = signals 72 | 73 | io.read(i).tlb.refill := buffer(i).tlb.refill 74 | io.read(i).tlb.invalid := buffer(i).tlb.invalid 75 | io.read(i).inst := buffer(i).inst 76 | io.read(i).pc := buffer(i).pc 77 | io.read(i).valid := buffer(i).valid 78 | io.read(i).jump_branch_inst := jump_branch_inst 79 | io.read(i).op := op 80 | } 81 | 82 | val inst0_is_in_delayslot = RegNext(buffer(config.instFetchNum - 1).jump_branch_inst) 83 | 84 | for (i <- 1 until config.instFetchNum) { 85 | io.read(i).is_in_delayslot := buffer(i - 1).jump_branch_inst 86 | } 87 | io.read(0).is_in_delayslot := inst0_is_in_delayslot 88 | 89 | when(io.flush) { 90 | for (i <- 0 until config.instFetchNum) { 91 | buffer(i).valid := false.B 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/writeback/WriteBackUnit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.writeback 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.pipeline.decoder.RegWrite 8 | import cpu.CpuConfig 9 | 10 | class WriteBackUnit(implicit val config: CpuConfig) extends Module { 11 | val io = IO(new Bundle { 12 | val ctrl = new WriteBackCtrl() 13 | val writeBackStage = Input(new MemoryUnitWriteBackUnit()) 14 | val regfile = Output(Vec(config.commitNum, new RegWrite())) 15 | val debug = new DEBUG() 16 | val statistic = if (!config.build) Some(new SocStatistic()) else None 17 | }) 18 | 19 | io.regfile(0) 20 | .wen := io.writeBackStage.inst0.inst_info.reg_wen && io.ctrl.allow_to_go && !io.writeBackStage.inst0.ex.flush_req 21 | io.regfile(0).waddr := io.writeBackStage.inst0.inst_info.reg_waddr 22 | io.regfile(0).wdata := io.writeBackStage.inst0.rd_info.wdata 23 | 24 | io.regfile(1).wen := 25 | io.writeBackStage.inst1.inst_info.reg_wen && io.ctrl.allow_to_go && !io.writeBackStage.inst0.ex.flush_req && !io.writeBackStage.inst1.ex.flush_req 26 | io.regfile(1).waddr := io.writeBackStage.inst1.inst_info.reg_waddr 27 | io.regfile(1).wdata := io.writeBackStage.inst1.rd_info.wdata 28 | 29 | if (config.hasCommitBuffer) { 30 | val buffer = Module(new CommitBuffer()).io 31 | buffer.enq(0).wb_pc := io.writeBackStage.inst0.pc 32 | buffer.enq(0).wb_rf_wen := io.regfile(0).wen 33 | buffer.enq(0).wb_rf_wnum := io.regfile(0).waddr 34 | buffer.enq(0).wb_rf_wdata := io.regfile(0).wdata 35 | buffer.enq(1).wb_pc := io.writeBackStage.inst1.pc 36 | buffer.enq(1).wb_rf_wen := io.regfile(1).wen 37 | buffer.enq(1).wb_rf_wnum := io.regfile(1).waddr 38 | buffer.enq(1).wb_rf_wdata := io.regfile(1).wdata 39 | buffer.flush := io.ctrl.do_flush 40 | 41 | io.debug.wb_pc := buffer.deq.wb_pc 42 | io.debug.wb_rf_wen := buffer.deq.wb_rf_wen 43 | io.debug.wb_rf_wnum := buffer.deq.wb_rf_wnum 44 | io.debug.wb_rf_wdata := buffer.deq.wb_rf_wdata 45 | } else { 46 | io.debug.wb_pc := Mux( 47 | clock.asBool, 48 | io.writeBackStage.inst0.pc, 49 | Mux(io.writeBackStage.inst0.ex.flush_req, 0.U, io.writeBackStage.inst1.pc), 50 | ) 51 | io.debug.wb_rf_wen := Mux( 52 | clock.asBool, 53 | Fill(4, io.regfile(0).wen), 54 | Fill(4, io.regfile(1).wen), 55 | ) 56 | io.debug.wb_rf_wnum := Mux( 57 | clock.asBool, 58 | io.regfile(0).waddr, 59 | io.regfile(1).waddr, 60 | ) 61 | io.debug.wb_rf_wdata := Mux( 62 | clock.asBool, 63 | io.regfile(0).wdata, 64 | io.regfile(1).wdata, 65 | ) 66 | } 67 | 68 | // ===----------------------------------------------------------------=== 69 | // statistic 70 | // ===----------------------------------------------------------------=== 71 | if (!config.build) { 72 | io.statistic.get.cp0_cause := io.writeBackStage.inst0.cp0.cp0_cause 73 | io.statistic.get.cp0_count := io.writeBackStage.inst0.cp0.cp0_count 74 | io.statistic.get.cp0_random := io.writeBackStage.inst0.cp0.cp0_random 75 | io.statistic.get.int := io.writeBackStage.inst0.ex.excode === EX_INT 76 | io.statistic.get.commit := io.ctrl.allow_to_go 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/defines/Cp0Bundles.scala: -------------------------------------------------------------------------------- 1 | package cpu.defines 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | 7 | class Cp0Index extends Bundle { 8 | val p = Bool() 9 | val blank = UInt((32 - 1 - log2Ceil(TLB_NUM)).W) 10 | val index = UInt(log2Ceil(TLB_NUM).W) 11 | } 12 | 13 | class Cp0Random extends Bundle { 14 | val blank = UInt((32 - log2Ceil(TLB_NUM)).W) 15 | val random = UInt(log2Ceil(TLB_NUM).W) 16 | } 17 | 18 | class Cp0EntryLo extends Bundle { 19 | val fill = UInt((32 - PFN_WID - C_WID - 3).W) 20 | val pfn = UInt(PFN_WID.W) 21 | val c = UInt(C_WID.W) 22 | val d = Bool() 23 | val v = Bool() 24 | val g = Bool() 25 | } 26 | 27 | class Cp0Context extends Bundle { 28 | val ptebase = UInt(PTEBASE_WID.W) 29 | val badvpn2 = UInt(VPN2_WID.W) 30 | val blank = UInt((32 - PTEBASE_WID - VPN2_WID).W) 31 | } 32 | 33 | class Cp0Wired extends Bundle { 34 | val blank = UInt((31 - log2Ceil(TLB_NUM)).W) 35 | val wired = UInt(log2Ceil(TLB_NUM).W) 36 | } 37 | 38 | class Cp0BadVAddr extends Bundle { 39 | val badvaddr = UInt(PC_WID.W) 40 | } 41 | 42 | class Cp0Count extends Bundle { 43 | val count = UInt(DATA_WID.W) 44 | } 45 | 46 | class Cp0EntryHi extends Bundle { 47 | val vpn2 = UInt(VPN2_WID.W) 48 | val blank = UInt((32 - VPN2_WID - ASID_WID).W) 49 | val asid = UInt(ASID_WID.W) 50 | } 51 | 52 | class Cp0Compare extends Bundle { 53 | val compare = UInt(DATA_WID.W) 54 | } 55 | 56 | class Cp0Status extends Bundle { 57 | val blank3 = UInt(3.W) 58 | val cu0 = Bool() 59 | val blank2 = UInt(5.W) 60 | val bev = Bool() 61 | val blank1 = UInt(6.W) 62 | val im = UInt(8.W) 63 | val blank0 = UInt(3.W) 64 | val um = Bool() 65 | val r0 = Bool() 66 | val erl = Bool() 67 | val exl = Bool() 68 | val ie = Bool() 69 | } 70 | 71 | class Cp0Cause extends Bundle { 72 | val bd = Bool() 73 | val blank3 = UInt(7.W) 74 | val iv = Bool() 75 | val blank2 = UInt(7.W) 76 | val ip = UInt(8.W) 77 | val blank1 = Bool() 78 | val excode = UInt(5.W) 79 | val blank0 = UInt(2.W) 80 | } 81 | 82 | class Cp0Epc extends Bundle { 83 | val epc = UInt(PC_WID.W) 84 | } 85 | 86 | class Cp0Ebase extends Bundle { 87 | val fill = Bool() 88 | val blank1 = Bool() 89 | val ebase = UInt(18.W) 90 | val blank0 = UInt(2.W) 91 | val cpuNum = UInt(10.W) 92 | } 93 | 94 | class Cp0Config extends Bundle { 95 | val m = Bool() 96 | val k23 = UInt(3.W) 97 | val ku = UInt(3.W) 98 | val impl = UInt(9.W) 99 | val be = Bool() 100 | val at = UInt(2.W) 101 | val ar = UInt(3.W) 102 | val mt = UInt(3.W) 103 | val blank = UInt(3.W) 104 | val vi = Bool() 105 | val k0 = UInt(3.W) 106 | } 107 | 108 | class Cp0Config1 extends Bundle { 109 | val m = Bool() 110 | val ms = UInt(6.W) 111 | val is = UInt(3.W) 112 | val il = UInt(3.W) 113 | val ia = UInt(3.W) 114 | val ds = UInt(3.W) 115 | val dl = UInt(3.W) 116 | val da = UInt(3.W) 117 | val c2 = Bool() 118 | val md = Bool() 119 | val pc = Bool() 120 | val wr = Bool() 121 | val ca = Bool() 122 | val ep = Bool() 123 | val fp = Bool() 124 | } 125 | 126 | class Cp0ErrorEpc extends Bundle { 127 | val errorEpc = UInt(PC_WID.W) 128 | } 129 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/CacheAXIInterface.scala: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | 7 | class CacheAXIInterface extends Module { 8 | val io = IO(new Bundle { 9 | val icache = Flipped(new ICache_AXIInterface()) 10 | val dcache = Flipped(new DCache_AXIInterface()) 11 | val axi = new AXI() 12 | }) 13 | val ar_sel = Wire(Bool()) 14 | val ar_sel_lock = RegInit(false.B) 15 | val ar_sel_lock_val = RegInit(false.B) 16 | when(io.axi.ar.valid) { 17 | when(io.axi.ar.ready) { 18 | ar_sel_lock := false.B 19 | }.otherwise { 20 | ar_sel_lock := true.B 21 | ar_sel_lock_val := ar_sel 22 | } 23 | } 24 | 25 | ar_sel := Mux(ar_sel_lock, ar_sel_lock_val, !io.icache.ar.valid && io.dcache.ar.valid) 26 | val r_sel = io.axi.r.bits.id(0) 27 | 28 | // ===----------------------------------------------------------------=== 29 | // dcache 30 | // ===----------------------------------------------------------------=== 31 | io.dcache.ar.ready := io.axi.ar.ready && ar_sel 32 | io.dcache.r.bits.data := Mux(r_sel, io.axi.r.bits.data, 0.U) 33 | io.dcache.r.bits.last := Mux(r_sel, io.axi.r.bits.last, 0.U) 34 | io.dcache.r.valid := Mux(r_sel, io.axi.r.valid, 0.U) 35 | 36 | io.dcache.aw.ready := io.axi.aw.ready 37 | io.dcache.w.ready := io.axi.w.ready 38 | io.dcache.b.valid := io.axi.b.valid 39 | 40 | // ===----------------------------------------------------------------=== 41 | // icache 42 | // ===----------------------------------------------------------------=== 43 | io.icache.ar.ready := io.axi.ar.ready && !ar_sel 44 | io.icache.r.bits.data := Mux(!r_sel, io.axi.r.bits.data, 0.U) 45 | io.icache.r.bits.last := Mux(!r_sel, io.axi.r.bits.last, 0.U) 46 | io.icache.r.valid := Mux(!r_sel, io.axi.r.valid, 0.U) 47 | 48 | // ===----------------------------------------------------------------=== 49 | // axi 50 | // ===----------------------------------------------------------------=== 51 | io.axi.ar.bits.id := ar_sel 52 | io.axi.ar.bits.addr := Mux(ar_sel, io.dcache.ar.bits.addr, io.icache.ar.bits.addr) 53 | io.axi.ar.bits.len := Mux(ar_sel, io.dcache.ar.bits.len, io.icache.ar.bits.len) 54 | io.axi.ar.bits.size := Mux(ar_sel, io.dcache.ar.bits.size, io.icache.ar.bits.size) 55 | io.axi.ar.bits.burst := 1.U 56 | io.axi.ar.bits.lock := 0.U 57 | io.axi.ar.bits.cache := 0.U 58 | io.axi.ar.bits.prot := 0.U 59 | io.axi.ar.valid := Mux(ar_sel, io.dcache.ar.valid, io.icache.ar.valid) 60 | 61 | io.axi.r.ready := Mux(~r_sel, io.icache.r.ready, io.dcache.r.ready) 62 | 63 | io.axi.aw.bits.id := 0.U 64 | io.axi.aw.bits.addr := io.dcache.aw.bits.addr 65 | io.axi.aw.bits.len := io.dcache.aw.bits.len 66 | io.axi.aw.bits.size := io.dcache.aw.bits.size 67 | io.axi.aw.bits.burst := 1.U 68 | io.axi.aw.bits.lock := 0.U 69 | io.axi.aw.bits.cache := 0.U 70 | io.axi.aw.bits.prot := 0.U 71 | io.axi.aw.valid := io.dcache.aw.valid 72 | 73 | io.axi.w.bits.id := 0.U 74 | io.axi.w.bits.data := io.dcache.w.bits.data 75 | io.axi.w.bits.strb := io.dcache.w.bits.strb 76 | io.axi.w.bits.last := io.dcache.w.bits.last 77 | io.axi.w.valid := io.dcache.w.valid 78 | 79 | io.axi.b.ready := io.dcache.b.ready 80 | } 81 | -------------------------------------------------------------------------------- /README_EN.md: -------------------------------------------------------------------------------- 1 | [简体中文](./README.md) | English 2 | 3 | # 🚀 PUA (Powerful Ultimate Architecture) MIPS 🏗️ 4 | 5 | ## 🛠️ Environment Setup 6 | 7 | This Chisel project is built using sbt. 8 | 9 | - sbt version: 1.8.2 10 | - scala version: 2.13.8 11 | - Chisel version: 3.5.4 12 | 13 | Execute the following command in the chisel directory to generate the Verilog file: 14 | 15 | ```shell 16 | make verilog 17 | ``` 18 | 19 | Upon completion, you will find the `PuaMips.v` Verilog file in the `chisel/generated` directory, and `chisel/src/main/resources/mycpu_top.v` can be used as the top-level file. 20 | 21 | ## 📚 Introduction 22 | 23 | This project is based on "CPU Design in Practice" and features a simple high-performance dual-issue six-stage pipelined MIPS processor. In the preliminary round, it achieved a performance score of 62, a frequency of 88MHz, and an IPC of 1.25. 24 | For specific design details, please refer to `doc/final-design.pdf`. 25 | This processor is capable of running the latest version of the Linux 6.5-rc3 kernel. 26 | The codebase for this processor does not exceed 5000 lines, making it one of the smallest processors in the history of the Longson Cup. 27 | There is significant room for improvement in this processor, and we welcome you to explore it. Due to time constraints, we did not focus extensively on optimizing the frequency. 28 | This project is in its final stages and will no longer receive maintenance. 29 | 30 | ## 📦 Resources 31 | 32 | 1. [Git Commit Message Conventions](https://gitee.com/help/articles/4231#article-header0) 📜 - Git's commit conventions. 33 | 2. [Chisel-template](https://github.com/freechipsproject/chisel-template) 📁 - The Chisel project in pua-mips was initialized using this template. 34 | 3. [Online Assembler](https://godbolt.org/) 💻 - An online compiler used to generate corresponding MIPS binary files for reference. 35 | 4. [MIPS Converter](https://www.eg.bucknell.edu/~csci320/mips_web/) 🔄 - Responsible for converting MIPS instructions into binary and providing detailed explanations of the instructions. 36 | 5. [Chisel Environment Setup](https://clo91eaf.github.io/80b5fe4ebe03/) 🛠️ 37 | 38 | ## 🧩 Miscellaneous 39 | 40 | ### Proper Ways to Reference This Project 41 | 42 | 1. ❌ Please do not directly copy the code from this project. 43 | 2. ✍️ After understanding the modules of this project, implement the code in your own coding style. Try to achieve better code quality through the following aspects: 44 | - 💬 Code readability with ample comments. 45 | - 🧰 Code maintainability through decoupling and modularization. In this project, except for a few files, the code size of other files is within 200 lines. 46 | - 🚀 Utilize the high-level abstraction capabilities of the Chisel language. 47 | - 📝 Discard software thinking; design first, then implement. 48 | 3. 📊 Compare performance through performance counters. Optimization should always be data-driven. 49 | 4. 📡 Use simulators and emulators. Avoid falling into the waveform trap. Don't settle for the status quo. Always think about how to minimize waveform viewing and gain more insights. 50 | 51 | ### Some Characteristics of This Project 52 | 53 | In this project, you will inevitably encounter: 54 | 55 | 1. 📝 Mixed comments in both Chinese and English. 56 | 2. 🎨 Inconsistent code styles. 57 | 3. 🤔 Odd naming conventions. 58 | 4. 🧱 Unexplained hardcoded designs. 59 | 5. 🔄 Redundant commit records. 60 | 61 | While we have made efforts to address these issues during development and have conducted multiple refactorings, we cannot guarantee code perfection due to time constraints. 62 | 63 | If our design process inspires you, we would be delighted. 🌟 64 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/ExeAccessMemCtrl.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.CpuConfig 6 | import cpu.defines._ 7 | import cpu.defines.Const._ 8 | 9 | class ExeAccessMemCtrl(implicit val config: CpuConfig) extends Module { 10 | val io = IO(new Bundle { 11 | val mem = new Bundle { 12 | val out = Output(new Bundle { 13 | val en = Bool() 14 | val ren = Bool() 15 | val wen = Bool() 16 | val inst_info = new InstInfo() 17 | val addr = UInt(DATA_ADDR_WID.W) 18 | val wdata = UInt(DATA_WID.W) 19 | }) 20 | } 21 | 22 | val inst = Vec( 23 | config.fuNum, 24 | new Bundle { 25 | val inst_info = Input(new InstInfo()) 26 | val src_info = Input(new SrcInfo()) 27 | val ex = new Bundle { 28 | val in = Input(new ExceptionInfo()) 29 | val out = Output(new ExceptionInfo()) 30 | } 31 | val mem_sel = Output(Bool()) 32 | }, 33 | ) 34 | }) 35 | io.mem.out.en := io.inst.map(_.mem_sel).reduce(_ || _) 36 | io.mem.out.ren := io.inst(0).mem_sel && io.inst(0).inst_info.rmem || 37 | io.inst(1).mem_sel && io.inst(1).inst_info.rmem 38 | io.mem.out.wen := io.inst(0).mem_sel && io.inst(0).inst_info.wmem || 39 | io.inst(1).mem_sel && io.inst(1).inst_info.wmem 40 | io.mem.out.inst_info := MuxCase( 41 | DontCare, 42 | Seq( 43 | (io.inst(0).inst_info.fusel === FU_MEM) -> io.inst(0).inst_info, 44 | (io.inst(1).inst_info.fusel === FU_MEM) -> io.inst(1).inst_info, 45 | ), 46 | ) 47 | val mem_addr = Wire(Vec(config.fuNum, UInt(DATA_ADDR_WID.W))) 48 | mem_addr(0) := io.inst(0).inst_info.mem_addr 49 | mem_addr(1) := io.inst(1).inst_info.mem_addr 50 | io.mem.out.addr := MuxCase( 51 | 0.U, 52 | Seq( 53 | (io.inst(0).inst_info.fusel === FU_MEM) -> mem_addr(0), 54 | (io.inst(1).inst_info.fusel === FU_MEM) -> mem_addr(1), 55 | ), 56 | ) 57 | io.mem.out.wdata := MuxCase( 58 | 0.U, 59 | Seq( 60 | (io.inst(0).inst_info.fusel === FU_MEM) -> 61 | io.inst(0).src_info.src2_data, 62 | (io.inst(1).inst_info.fusel === FU_MEM) -> 63 | io.inst(1).src_info.src2_data, 64 | ), 65 | ) 66 | val mem_adel = Wire(Vec(config.fuNum, Bool())) 67 | for (i <- 0 until config.fuNum) { 68 | mem_adel(i) := VecInit(EXE_LW, EXE_LL).contains(io.inst(i).inst_info.op) && mem_addr(i)(1, 0) =/= 0.U || 69 | VecInit(EXE_LH, EXE_LHU).contains(io.inst(i).inst_info.op) && mem_addr(i)(0) =/= 0.U 70 | } 71 | val mem_ades = Wire(Vec(config.fuNum, Bool())) 72 | for (i <- 0 until config.fuNum) { 73 | mem_ades(i) := VecInit(EXE_SW, EXE_SC).contains(io.inst(i).inst_info.op) && mem_addr(i)(1, 0) =/= 0.U || 74 | io.inst(i).inst_info.op === EXE_SH && mem_addr(i)(0) =/= 0.U 75 | } 76 | 77 | for (i <- 0 until config.fuNum) { 78 | io.inst(i).ex.out := io.inst(i).ex.in 79 | io.inst(i).ex.out.excode := MuxCase( 80 | io.inst(i).ex.in.excode, 81 | Seq( 82 | (io.inst(i).ex.in.excode =/= EX_NO) -> io.inst(i).ex.in.excode, 83 | mem_adel(i) -> EX_ADEL, 84 | mem_ades(i) -> EX_ADES, 85 | ), 86 | ) 87 | io.inst(i).ex.out.badvaddr := Mux( 88 | VecInit(EX_ADEL, EX_ADES).contains(io.inst(i).ex.in.excode), 89 | io.inst(i).ex.in.badvaddr, 90 | mem_addr(i), 91 | ) 92 | io.inst(i).ex.out.flush_req := io.inst(i).ex.in.flush_req || io.inst(i).ex.out.excode =/= EX_NO 93 | } 94 | io.inst(0).mem_sel := (io.inst(0).inst_info.wmem || io.inst(0).inst_info.rmem) && 95 | !io.inst(0).ex.out.flush_req 96 | io.inst(1).mem_sel := (io.inst(1).inst_info.wmem || io.inst(1).inst_info.rmem) && 97 | !io.inst(0).ex.out.flush_req && !io.inst(1).ex.out.flush_req 98 | 99 | } 100 | -------------------------------------------------------------------------------- /chisel/src/main/resources/mycpu_top.v: -------------------------------------------------------------------------------- 1 | module mycpu_top ( 2 | input [ 5:0] ext_int, 3 | input aclk, 4 | input aresetn, 5 | //axi interface 6 | 7 | //read request 8 | output [ 3:0] arid, 9 | output [31:0] araddr, 10 | output [ 7:0] arlen, 11 | output [ 2:0] arsize, 12 | output [ 1:0] arburst, 13 | output [ 1:0] arlock, 14 | output [ 3:0] arcache, 15 | output [ 2:0] arprot, 16 | output arvalid, 17 | input arready, 18 | 19 | //read response 20 | input [ 3:0] rid, 21 | input [31:0] rdata, 22 | input [ 1:0] rresp, 23 | input rlast, 24 | input rvalid, 25 | output rready, 26 | 27 | //write request 28 | output [ 3:0] awid, 29 | output [31:0] awaddr, 30 | output [ 7:0] awlen, 31 | output [ 2:0] awsize, 32 | output [ 1:0] awburst, 33 | output [ 1:0] awlock, 34 | output [ 3:0] awcache, 35 | output [ 2:0] awprot, 36 | output awvalid, 37 | input awready, 38 | 39 | //write data 40 | output [ 3:0] wid, 41 | output [31:0] wdata, 42 | output [ 3:0] wstrb, 43 | output wlast, 44 | output wvalid, 45 | input wready, 46 | 47 | //write response 48 | input [ 3:0] bid, 49 | input [ 1:0] bresp, 50 | input bvalid, 51 | output bready, 52 | 53 | // trace debug interface 54 | output [31:0] debug_wb_pc, 55 | output [ 3:0] debug_wb_rf_wen, 56 | output [ 4:0] debug_wb_rf_wnum, 57 | output [31:0] debug_wb_rf_wdata 58 | ); 59 | 60 | PuaMips puamips( 61 | .clock (aclk ), 62 | .reset (~aresetn ), 63 | .io_ext_int (ext_int ), 64 | .io_axi_ar_bits_id (arid ), 65 | .io_axi_ar_bits_addr (araddr ), 66 | .io_axi_ar_bits_len (arlen ), 67 | .io_axi_ar_bits_size (arsize ), 68 | .io_axi_ar_bits_burst (arburst ), 69 | .io_axi_ar_bits_lock (arlock ), 70 | .io_axi_ar_bits_cache (arcache ), 71 | .io_axi_ar_bits_prot (arprot ), 72 | .io_axi_ar_valid (arvalid ), 73 | .io_axi_ar_ready (arready ), 74 | .io_axi_r_bits_id (rid ), 75 | .io_axi_r_bits_data (rdata ), 76 | .io_axi_r_bits_resp (rresp ), 77 | .io_axi_r_bits_last (rlast ), 78 | .io_axi_r_valid (rvalid ), 79 | .io_axi_r_ready (rready ), 80 | .io_axi_aw_bits_id (awid ), 81 | .io_axi_aw_bits_addr (awaddr ), 82 | .io_axi_aw_bits_len (awlen ), 83 | .io_axi_aw_bits_size (awsize ), 84 | .io_axi_aw_bits_burst (awburst ), 85 | .io_axi_aw_bits_lock (awlock ), 86 | .io_axi_aw_bits_cache (awcache ), 87 | .io_axi_aw_bits_prot (awprot ), 88 | .io_axi_aw_valid (awvalid ), 89 | .io_axi_aw_ready (awready ), 90 | .io_axi_w_bits_id (wid ), 91 | .io_axi_w_bits_data (wdata ), 92 | .io_axi_w_bits_strb (wstrb ), 93 | .io_axi_w_bits_last (wlast ), 94 | .io_axi_w_valid (wvalid ), 95 | .io_axi_w_ready (wready ), 96 | .io_axi_b_bits_id (bid ), 97 | .io_axi_b_bits_resp (bresp ), 98 | .io_axi_b_valid (bvalid ), 99 | .io_axi_b_ready (bready ), 100 | .io_debug_wb_pc (debug_wb_pc ), 101 | .io_debug_wb_rf_wen (debug_wb_rf_wen ), 102 | .io_debug_wb_rf_wnum (debug_wb_rf_wnum ), 103 | .io_debug_wb_rf_wdata (debug_wb_rf_wdata) 104 | ); 105 | 106 | endmodule -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/ALU.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | 8 | class DivSignal extends Bundle { 9 | val ready = Input(Bool()) 10 | val result = Input(UInt(HILO_WID.W)) 11 | 12 | val en = Output(Bool()) 13 | val signed = Output(Bool()) 14 | } 15 | class MultSignal extends Bundle { 16 | val ready = Input(Bool()) 17 | val result = Input(UInt(HILO_WID.W)) 18 | 19 | val en = Output(Bool()) 20 | val signed = Output(Bool()) 21 | } 22 | class Alu extends Module { 23 | val io = IO(new Bundle { 24 | val inst_info = Input(new InstInfo()) 25 | val src_info = Input(new SrcInfo()) 26 | val cp0_rdata = Input(UInt(DATA_WID.W)) 27 | val llbit = Input(Bool()) 28 | val hilo = new Bundle { 29 | val rdata = Input(UInt(HILO_WID.W)) 30 | val wdata = Output(UInt(HILO_WID.W)) 31 | } 32 | val mul = new MultSignal() 33 | val div = new DivSignal() 34 | val result = Output(UInt(DATA_WID.W)) 35 | val overflow = Output(Bool()) 36 | val trap = Output(Bool()) 37 | }) 38 | val op = io.inst_info.op 39 | val src1 = io.src_info.src1_data 40 | val src2 = io.src_info.src2_data 41 | 42 | val sum = src1 + src2 43 | val diff = src1 - src2 44 | val slt = src1.asSInt() < src2.asSInt() 45 | val sltu = src1 < src2 46 | val clo = WireInit(32.U) 47 | val clz = WireInit(32.U) 48 | for (i <- 0 until 32) { 49 | when(!src1(i)) { 50 | clo := (31 - i).U 51 | }.otherwise { 52 | clz := (31 - i).U 53 | } 54 | } 55 | 56 | val hilo = io.hilo.rdata 57 | 58 | io.hilo.wdata := MuxLookup( 59 | op, 60 | 0.U, 61 | Seq( 62 | EXE_MTHI -> Cat(src1, hilo(31, 0)), 63 | EXE_MTLO -> Cat(hilo(63, 32), src1), 64 | EXE_MULT -> Mux(io.mul.ready, io.mul.result, 0.U), 65 | EXE_MULTU -> Mux(io.mul.ready, io.mul.result, 0.U), 66 | EXE_MADD -> Mux(io.mul.ready, hilo + io.mul.result, 0.U), 67 | EXE_MADDU -> Mux(io.mul.ready, hilo + io.mul.result, 0.U), 68 | EXE_MSUB -> Mux(io.mul.ready, hilo - io.mul.result, 0.U), 69 | EXE_MSUBU -> Mux(io.mul.ready, hilo - io.mul.result, 0.U), 70 | EXE_DIV -> Mux(io.div.ready, io.div.result, 0.U), 71 | EXE_DIVU -> Mux(io.div.ready, io.div.result, 0.U), 72 | ), 73 | ) 74 | 75 | io.mul.signed := VecInit(EXE_MULT, EXE_MUL, EXE_MADD, EXE_MSUB).contains(op) 76 | io.mul.en := Mux( 77 | VecInit(EXE_MUL, EXE_MULT, EXE_MULTU, EXE_MADD, EXE_MSUB, EXE_MADDU, EXE_MSUBU).contains(op), 78 | !io.mul.ready, 79 | false.B, 80 | ) 81 | io.div.signed := VecInit(EXE_DIV).contains(op) 82 | io.div.en := Mux(VecInit(EXE_DIV, EXE_DIVU).contains(op), !io.div.ready, false.B) 83 | 84 | io.result := MuxLookup( 85 | op, 86 | 0.U, 87 | Seq( 88 | // 算数指令 89 | EXE_ADD -> sum, 90 | EXE_ADDU -> sum, 91 | EXE_SUB -> diff, 92 | EXE_SUBU -> diff, 93 | EXE_SLT -> slt, 94 | EXE_SLTU -> sltu, 95 | // 逻辑指令 96 | EXE_AND -> (src1 & src2), 97 | EXE_OR -> (src1 | src2), 98 | EXE_NOR -> (~(src1 | src2)), 99 | EXE_XOR -> (src1 ^ src2), 100 | // 移位指令 101 | EXE_SLL -> (src2 << src1(4, 0)), 102 | EXE_SRL -> (src2 >> src1(4, 0)), 103 | EXE_SRA -> ((src2.asSInt >> src1(4, 0)).asUInt), 104 | // 数据移动指令 105 | EXE_MFHI -> io.hilo.rdata(63, 32), 106 | EXE_MFLO -> io.hilo.rdata(31, 0), 107 | EXE_MFC0 -> io.cp0_rdata, 108 | EXE_MOVN -> src1, 109 | EXE_MOVZ -> src1, 110 | // 前导记数指令 111 | EXE_CLZ -> clz, 112 | EXE_CLO -> clo, 113 | // 特殊指令 114 | EXE_SC -> io.llbit, 115 | // 乘除法 116 | EXE_MUL -> Mux(io.mul.ready, io.mul.result(31, 0), 0.U), 117 | EXE_MULT -> Mux(io.mul.ready, io.mul.result(31, 0), 0.U), 118 | EXE_MULTU -> Mux(io.mul.ready, io.mul.result(31, 0), 0.U), 119 | ), 120 | ) 121 | 122 | io.overflow := MuxLookup( 123 | op, 124 | false.B, 125 | Seq( 126 | EXE_ADD -> ((src1(31) === src2(31)) & (src1(31) =/= sum(31))), 127 | EXE_SUB -> ((src1(31) =/= src2(31)) & (src1(31) =/= diff(31))), 128 | ), 129 | ) 130 | 131 | io.trap := MuxLookup( 132 | op, 133 | false.B, 134 | Seq( 135 | EXE_TEQ -> (src1 === src2), 136 | EXE_TNE -> (src1 =/= src2), 137 | EXE_TGE -> !slt, 138 | EXE_TGEU -> !sltu, 139 | EXE_TLT -> slt, 140 | EXE_TLTU -> sltu, 141 | ), 142 | ) 143 | } 144 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/axi/FIFO.scala: -------------------------------------------------------------------------------- 1 | package cpu.axi 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | /** A simple FIFO buffer implemented using Chisel's built-in Queue module. 7 | * 8 | * @param dataWidth 9 | * The width of the data to be stored in the buffer. 10 | * @param buffDepth 11 | * The depth of the buffer (i.e. the number of elements it can hold). 12 | * @param addrWidth 13 | * The width of the address used to access the buffer. 14 | */ 15 | class FifoBuffer( 16 | val dataWidth: Int = 32, 17 | val buffDepth: Int = 4, 18 | val addrWidth: Int = 2, 19 | ) extends Module { 20 | val io = IO(new Bundle { 21 | val wen = Input(Bool()) // Write enable signal. 22 | val ren = Input(Bool()) // Read enable signal. 23 | val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer. 24 | val output = Output(UInt(dataWidth.W)) // Data read from the buffer. 25 | val empty = Output(Bool()) // Output signal indicating whether the buffer is empty. 26 | val full = Output(Bool()) // Output signal indicating whether the buffer is full. 27 | }) 28 | 29 | // Instantiate a Queue module with the given data width and buffer depth. 30 | val queue = Module(new Queue(UInt(dataWidth.W), buffDepth)) 31 | 32 | // Connect the input and output signals to the Queue module. 33 | queue.io.enq.valid := io.wen 34 | queue.io.enq.bits := io.input 35 | io.full := queue.io.enq.ready === false.B 36 | queue.io.deq.ready := io.ren 37 | io.output := queue.io.deq.bits 38 | io.empty := queue.io.count === 0.U 39 | } 40 | 41 | /** A simple counter that keeps track of the number of elements in a FIFO buffer. 42 | * 43 | * @param buffDepth 44 | * The depth of the buffer (i.e. the number of elements it can hold). 45 | * @param addrWidth 46 | * The width of the address used to access the buffer. 47 | */ 48 | class FifoCount( 49 | val buffDepth: Int = 4, 50 | val addrWidth: Int = 2, 51 | ) extends Module { 52 | val io = IO(new Bundle { 53 | val wen = Input(Bool()) 54 | val ren = Input(Bool()) 55 | val empty = Output(Bool()) 56 | val full = Output(Bool()) 57 | }) 58 | 59 | val count = RegInit(0.U(addrWidth.W)) 60 | 61 | io.empty := count === 0.U 62 | io.full := count === buffDepth.U 63 | 64 | when(io.ren && !io.empty) { 65 | count := count - 1.U 66 | }.elsewhen(io.wen && !io.full) { 67 | count := count + 1.U 68 | } 69 | } 70 | 71 | /** A FIFO buffer with a valid signal that checks if the output data is related to a specific value. 72 | * 73 | * @param dataWidth 74 | * The width of the data to be stored in the buffer. 75 | * @param buffDepth 76 | * The depth of the buffer (i.e. the number of elements it can hold). 77 | * @param addrWidth 78 | * The width of the address used to access the buffer. 79 | * @param relatedDataWidth 80 | * The width of the related data used to check if the output data is related to a specific value. 81 | */ 82 | class FifoBufferValid( 83 | val dataWidth: Int = 33, 84 | val buffDepth: Int = 6, 85 | val addrWidth: Int = 3, 86 | val relatedDataWidth: Int = 32, 87 | ) extends Module { 88 | val io = IO(new Bundle { 89 | val wen = Input(Bool()) // Write enable signal. 90 | val ren = Input(Bool()) // Read enable signal. 91 | val empty = Output(Bool()) // Output signal indicating whether the buffer is empty. 92 | val full = Output(Bool()) // Output signal indicating whether the buffer is full. 93 | val related_1 = Output( 94 | Bool(), 95 | ) // Output signal indicating whether the output data is related to a specific value. 96 | val input = Input(UInt(dataWidth.W)) // Data to be written to the buffer. 97 | val output = Output(UInt(dataWidth.W)) // Data read from the buffer. 98 | val related_data_1 = Input( 99 | UInt(relatedDataWidth.W), 100 | ) // Related data used to check if the output data is related to a specific value. 101 | }) 102 | 103 | // Instantiate a Queue module with the given data width and buffer depth. 104 | val queue = Module(new Queue(UInt(dataWidth.W), buffDepth)) 105 | 106 | // Connect the input and output signals to the Queue module. 107 | queue.io.enq.valid := io.wen 108 | queue.io.enq.bits := io.input 109 | io.full := queue.io.count === buffDepth.U 110 | io.empty := queue.io.count === 0.U 111 | io.output := queue.io.deq.bits 112 | 113 | // Connect the ready signal to the read enable input. 114 | queue.io.deq.ready := io.ren 115 | 116 | // Check if the output data is related to a specific value. 117 | io.related_1 := queue.io.deq.valid && io.related_data_1 === queue.io.deq 118 | .bits(relatedDataWidth - 1, 0) 119 | } -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/fetch/InstFifo.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.fetch 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.{CpuConfig, BranchPredictorConfig} 6 | 7 | class BufferUnit extends Bundle { 8 | val bpuConfig = new BranchPredictorConfig() 9 | val tlb = new Bundle { 10 | val refill = Bool() 11 | val invalid = Bool() 12 | } 13 | val inst = UInt(32.W) 14 | val pht_index = UInt(bpuConfig.phtDepth.W) 15 | val pc = UInt(32.W) 16 | } 17 | 18 | class InstFifo(implicit val config: CpuConfig) extends Module { 19 | val io = IO(new Bundle { 20 | val do_flush = Input(Bool()) 21 | val flush_delay_slot = Input(Bool()) 22 | val delay_sel_flush = Input(Bool()) 23 | val decoder_delay_flush = Input(Bool()) 24 | val execute_delay_flush = Input(Bool()) 25 | val icache_stall = Input(Bool()) 26 | val jump_branch_inst = Input(Bool()) // 译码阶段的inst0是否为跳转指令 27 | val inst0_is_in_delayslot = Output(Bool()) 28 | 29 | val ren = Input(Vec(config.decoderNum, Bool())) 30 | val read = Output(Vec(config.decoderNum, new BufferUnit())) 31 | 32 | val wen = Input(Vec(config.instFetchNum, Bool())) 33 | val write = Input(Vec(config.instFetchNum, new BufferUnit())) 34 | 35 | val empty = Output(Bool()) 36 | val almost_empty = Output(Bool()) 37 | val full = Output(Bool()) 38 | }) 39 | // fifo buffer 40 | val buffer = RegInit(VecInit(Seq.fill(config.instFifoDepth)(0.U.asTypeOf(new BufferUnit())))) 41 | 42 | // fifo ptr 43 | val enq_ptr = RegInit(0.U(log2Ceil(config.instFifoDepth).W)) 44 | val deq_ptr = RegInit(0.U(log2Ceil(config.instFifoDepth).W)) 45 | val count = RegInit(0.U(log2Ceil(config.instFifoDepth).W)) 46 | 47 | // config.instFifoDepth - 1 is the last element, config.instFifoDepth - 2 is the last second element 48 | // the second last element's valid decide whether the fifo is full 49 | io.full := count >= (config.instFifoDepth - config.instFetchNum).U // TODO:这里的等于号还可以优化 50 | io.empty := count === 0.U 51 | io.almost_empty := count === 1.U 52 | 53 | val inst0_is_in_delayslot = RegInit(false.B) 54 | io.inst0_is_in_delayslot := inst0_is_in_delayslot 55 | inst0_is_in_delayslot := MuxCase( 56 | false.B, 57 | Seq( 58 | io.flush_delay_slot -> false.B, 59 | !io.ren(0) -> inst0_is_in_delayslot, 60 | (io.jump_branch_inst && !io.ren(1)) -> true.B, 61 | ), 62 | ) 63 | 64 | val delayslot_stall = RegInit(false.B) 65 | val delayslot_enable = RegInit(false.B) 66 | val delayslot_line = RegInit(0.U.asTypeOf(new BufferUnit())) 67 | when(io.do_flush && io.delay_sel_flush && !io.flush_delay_slot && io.icache_stall && (io.empty || io.almost_empty)) { 68 | delayslot_stall := true.B 69 | }.elsewhen(delayslot_stall && io.wen(0)) { 70 | delayslot_stall := false.B 71 | } 72 | 73 | when(io.do_flush && !io.flush_delay_slot && io.delay_sel_flush) { 74 | when(io.execute_delay_flush) { 75 | delayslot_enable := true.B 76 | delayslot_line := Mux(io.empty, io.write(0), buffer(deq_ptr)) 77 | }.elsewhen(io.decoder_delay_flush) { 78 | delayslot_enable := true.B 79 | delayslot_line := Mux(io.almost_empty, io.write(0), buffer(deq_ptr + 1.U)) 80 | }.otherwise { 81 | delayslot_enable := false.B 82 | } 83 | }.elsewhen(!delayslot_stall && io.ren(0)) { 84 | delayslot_enable := false.B 85 | } 86 | 87 | // * deq * // 88 | io.read(0) := MuxCase( 89 | buffer(deq_ptr), 90 | Seq( 91 | delayslot_enable -> delayslot_line, 92 | io.empty -> 0.U.asTypeOf(new BufferUnit()), 93 | io.almost_empty -> buffer(deq_ptr), 94 | ), 95 | ) 96 | io.read(1) := MuxCase( 97 | buffer(deq_ptr + 1.U), 98 | Seq( 99 | (delayslot_enable || io.empty || io.almost_empty) -> 0.U.asTypeOf(new BufferUnit()), 100 | ), 101 | ) 102 | 103 | val deq_num = MuxCase( 104 | 0.U, 105 | Seq( 106 | (io.empty || delayslot_enable) -> 0.U, 107 | io.ren(1) -> 2.U, 108 | io.ren(0) -> 1.U, 109 | ), 110 | ) 111 | 112 | when(io.do_flush) { 113 | deq_ptr := 0.U 114 | }.otherwise { 115 | deq_ptr := deq_ptr + deq_num 116 | } 117 | 118 | // * enq * // 119 | val enq_num = Wire(UInt(log2Ceil(config.instFetchNum + 1).W)) 120 | 121 | for (i <- 0 until config.instFetchNum) { 122 | when(io.wen(i)) { 123 | buffer(enq_ptr + i.U) := io.write(i) 124 | } 125 | } 126 | 127 | when(io.do_flush) { 128 | enq_ptr := 0.U 129 | }.otherwise { 130 | enq_ptr := enq_ptr + enq_num 131 | } 132 | 133 | enq_num := 0.U 134 | for (i <- 0 until config.instFetchNum) { 135 | when(io.wen(i)) { 136 | enq_num := (i + 1).U 137 | } 138 | } 139 | 140 | count := Mux(io.do_flush, 0.U, count + enq_num + config.instFifoDepth.U - deq_num) 141 | } 142 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/Fu.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | import org.scalameta.adt.branch 9 | 10 | class Fu(implicit val config: CpuConfig) extends Module { 11 | val io = IO(new Bundle { 12 | val ctrl = new ExecuteFuCtrl() 13 | val inst = Vec( 14 | config.decoderNum, 15 | new Bundle { 16 | val pc = Input(UInt(PC_WID.W)) 17 | val hilo_wen = Input(Bool()) 18 | val mul_en = Input(Bool()) 19 | val div_en = Input(Bool()) 20 | val inst_info = Input(new InstInfo()) 21 | val src_info = Input(new SrcInfo()) 22 | val ex = new Bundle { 23 | val in = Input(new ExceptionInfo()) 24 | val out = Output(new ExceptionInfo()) 25 | } 26 | val result = Output(UInt(DATA_WID.W)) 27 | }, 28 | ) 29 | val cp0_rdata = Input(Vec(config.fuNum, UInt(DATA_WID.W))) 30 | val stall_req = Output(Bool()) 31 | val branch = new Bundle { 32 | val pred_branch = Input(Bool()) 33 | val branch = Output(Bool()) 34 | val pred_fail = Output(Bool()) 35 | } 36 | val llbit = Output(Bool()) 37 | 38 | val statistic = if (!config.build) Some(new BranchPredictorUnitStatistic()) else None 39 | }) 40 | 41 | val alu = Seq.fill(config.decoderNum)(Module(new Alu())) 42 | val mul = Module(new Mul()).io 43 | val div = Module(new Div()).io 44 | val hilo = Module(new HiLo()).io 45 | val branchCtrl = Module(new BranchCtrl()).io 46 | val llbit = Module(new LLbit()).io 47 | 48 | branchCtrl.in.inst_info := io.inst(0).inst_info 49 | branchCtrl.in.src_info := io.inst(0).src_info 50 | branchCtrl.in.pred_branch := io.branch.pred_branch 51 | io.branch.branch := branchCtrl.out.branch 52 | io.branch.pred_fail := branchCtrl.out.pred_fail 53 | 54 | for (i <- 0 until (config.fuNum)) { 55 | alu(i).io.inst_info := io.inst(i).inst_info 56 | alu(i).io.src_info := io.inst(i).src_info 57 | alu(i).io.hilo.rdata := hilo.rdata 58 | alu(i).io.mul.result := mul.result 59 | alu(i).io.mul.ready := mul.ready 60 | alu(i).io.div.ready := div.ready 61 | alu(i).io.div.result := div.result 62 | alu(i).io.cp0_rdata := io.cp0_rdata(i) 63 | alu(i).io.llbit := io.llbit 64 | io.inst(i).ex.out := io.inst(i).ex.in 65 | io.inst(i).ex.out.flush_req := io.inst(i).ex.in.flush_req || alu(i).io.overflow 66 | io.inst(i).ex.out.excode := MuxCase( 67 | io.inst(i).ex.in.excode, 68 | Seq( 69 | (io.inst(i).ex.in.excode =/= EX_NO) -> io.inst(i).ex.in.excode, 70 | alu(i).io.overflow -> EX_OV, 71 | ), 72 | ) 73 | } 74 | 75 | mul.src1 := Mux(io.inst(0).mul_en, io.inst(0).src_info.src1_data, io.inst(1).src_info.src1_data) 76 | mul.src2 := Mux(io.inst(0).mul_en, io.inst(0).src_info.src2_data, io.inst(1).src_info.src2_data) 77 | mul.signed := Mux(io.inst(0).mul_en, alu(0).io.mul.signed, alu(1).io.mul.signed) 78 | mul.start := Mux(io.inst(0).mul_en, alu(0).io.mul.en, alu(1).io.mul.en) 79 | mul.allow_to_go := io.ctrl.allow_to_go 80 | 81 | div.src1 := Mux(io.inst(0).div_en, io.inst(0).src_info.src1_data, io.inst(1).src_info.src1_data) 82 | div.src2 := Mux(io.inst(0).div_en, io.inst(0).src_info.src2_data, io.inst(1).src_info.src2_data) 83 | div.signed := Mux(io.inst(0).div_en, alu(0).io.div.signed, alu(1).io.div.signed) 84 | div.start := Mux(io.inst(0).div_en, alu(0).io.div.en, alu(1).io.div.en) 85 | div.allow_to_go := io.ctrl.allow_to_go 86 | 87 | io.stall_req := (io.inst.map(_.div_en).reduce(_ || _) && !div.ready) || 88 | (io.inst.map(_.mul_en).reduce(_ || _) && !mul.ready) 89 | 90 | io.inst(0).result := Mux( 91 | io.inst(0).inst_info.branch_link, 92 | io.inst(0).pc + 8.U, 93 | alu(0).io.result, 94 | ) 95 | io.inst(1).result := alu(1).io.result 96 | 97 | hilo.wen := ((io.inst(1).hilo_wen && !io.inst.map(_.ex.out.flush_req).reduce(_ || _)) || 98 | (io.inst(0).hilo_wen && !io.inst(0).ex.out.flush_req)) && io.ctrl.allow_to_go && !io.ctrl.do_flush 99 | hilo.wdata := Mux(io.inst(1).hilo_wen, alu(1).io.hilo.wdata, alu(0).io.hilo.wdata) 100 | 101 | llbit.do_flush := io.ctrl.eret 102 | llbit.wen := (io.inst(0).inst_info.op === EXE_LL || io.inst(0).inst_info.op === EXE_SC || 103 | io.inst(1).inst_info.op === EXE_LL || io.inst(1).inst_info.op === EXE_SC) && io.ctrl.allow_to_go 104 | llbit.wdata := io.inst(0).inst_info.op === EXE_LL || io.inst(1).inst_info.op === EXE_LL 105 | val llbit_rdata = if (config.build) llbit.rdata else true.B 106 | io.llbit := llbit_rdata 107 | 108 | // ===----------------------------------------------------------------=== 109 | // statistic 110 | // ===----------------------------------------------------------------=== 111 | if (!config.build) { 112 | val branch_cnt = RegInit(0.U(32.W)) 113 | val success_cnt = RegInit(0.U(32.W)) 114 | when(io.branch.branch) { branch_cnt := branch_cnt + 1.U } 115 | when(!io.branch.pred_fail) { success_cnt := success_cnt + 1.U } 116 | io.statistic.get.branch := branch_cnt 117 | io.statistic.get.success := success_cnt 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/memory/DataMemoryAccess.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.memory 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class DataMemoryAccess(implicit val config: CpuConfig) extends Module { 10 | val io = IO(new Bundle { 11 | val memoryUnit = new Bundle { 12 | val in = Input(new Bundle { 13 | val mem_en = Bool() 14 | val inst_info = new InstInfo() 15 | val mem_wdata = UInt(DATA_WID.W) 16 | val mem_addr = UInt(DATA_ADDR_WID.W) 17 | val mem_sel = Vec(config.fuNum, Bool()) 18 | val ex = Vec(config.fuNum, new ExceptionInfo()) 19 | val llbit = Bool() 20 | }) 21 | val out = Output(new Bundle { 22 | val rdata = Output(UInt(DATA_WID.W)) 23 | }) 24 | } 25 | 26 | val dataMemory = new Bundle { 27 | val in = Input(new Bundle { 28 | val rdata = UInt(DATA_WID.W) 29 | }) 30 | val out = Output(new Bundle { 31 | val en = Bool() 32 | val rlen = UInt(2.W) 33 | val wen = UInt(4.W) 34 | val addr = UInt(DATA_ADDR_WID.W) 35 | val wdata = UInt(DATA_WID.W) 36 | }) 37 | } 38 | }) 39 | val mem_addr = io.memoryUnit.in.mem_addr 40 | val mem_addr2 = mem_addr(1, 0) 41 | val mem_rdata = io.dataMemory.in.rdata 42 | val mem_wdata = io.memoryUnit.in.mem_wdata 43 | val op = io.memoryUnit.in.inst_info.op 44 | io.dataMemory.out.en := io.memoryUnit.in.mem_en && 45 | (io.memoryUnit.in.mem_sel(0) && !io.memoryUnit.in.ex(0).flush_req || 46 | io.memoryUnit.in.mem_sel(1) && !io.memoryUnit.in.ex(0).flush_req && !io.memoryUnit.in.ex(1).flush_req) 47 | io.dataMemory.out.addr := mem_addr 48 | 49 | io.memoryUnit.out.rdata := MuxLookup( 50 | op, 51 | 0.U, 52 | Seq( 53 | EXE_LB -> MuxLookup( 54 | mem_addr2, 55 | 0.U, 56 | Seq( 57 | "b11".U -> Util.signedExtend(mem_rdata(31, 24)), 58 | "b10".U -> Util.signedExtend(mem_rdata(23, 16)), 59 | "b01".U -> Util.signedExtend(mem_rdata(15, 8)), 60 | "b00".U -> Util.signedExtend(mem_rdata(7, 0)), 61 | ), 62 | ), 63 | EXE_LBU -> MuxLookup( 64 | mem_addr2, 65 | 0.U, 66 | Seq( 67 | "b11".U -> Util.zeroExtend(mem_rdata(31, 24)), 68 | "b10".U -> Util.zeroExtend(mem_rdata(23, 16)), 69 | "b01".U -> Util.zeroExtend(mem_rdata(15, 8)), 70 | "b00".U -> Util.zeroExtend(mem_rdata(7, 0)), 71 | ), 72 | ), 73 | EXE_LH -> Mux( 74 | mem_addr2(1), 75 | Util.signedExtend(mem_rdata(31, 16)), 76 | Util.signedExtend(mem_rdata(15, 0)), 77 | ), 78 | EXE_LHU -> Mux( 79 | mem_addr2(1), 80 | Util.zeroExtend(mem_rdata(31, 16)), 81 | Util.zeroExtend(mem_rdata(15, 0)), 82 | ), 83 | EXE_LW -> mem_rdata, 84 | EXE_LL -> mem_rdata, 85 | EXE_LWL -> MuxLookup( 86 | mem_addr2, 87 | 0.U, 88 | Seq( 89 | "b11".U -> mem_rdata, 90 | "b10".U -> Cat(mem_rdata(23, 0), mem_wdata(7, 0)), 91 | "b01".U -> Cat(mem_rdata(15, 0), mem_wdata(15, 0)), 92 | "b00".U -> Cat(mem_rdata(7, 0), mem_wdata(23, 0)), 93 | ), 94 | ), 95 | EXE_LWR -> MuxLookup( 96 | mem_addr2, 97 | 0.U, 98 | Seq( 99 | "b11".U -> Cat(mem_wdata(31, 8), mem_rdata(31, 24)), 100 | "b10".U -> Cat(mem_wdata(31, 16), mem_rdata(31, 16)), 101 | "b01".U -> Cat(mem_wdata(31, 24), mem_rdata(31, 8)), 102 | "b00".U -> mem_rdata, 103 | ), 104 | ), 105 | ), 106 | ) 107 | io.dataMemory.out.wdata := MuxLookup( 108 | op, 109 | mem_wdata, // default SW, SC 110 | Seq( 111 | EXE_SB -> Fill(4, mem_wdata(7, 0)), 112 | EXE_SH -> Fill(2, mem_wdata(15, 0)), 113 | EXE_SWL -> MuxLookup( 114 | mem_addr2, 115 | 0.U, 116 | Seq( 117 | "b11".U -> mem_wdata, 118 | "b10".U -> Cat(0.U(8.W), mem_wdata(31, 8)), 119 | "b01".U -> Cat(0.U(16.W), mem_wdata(31, 16)), 120 | "b00".U -> Cat(0.U(24.W), mem_wdata(31, 24)), 121 | ), 122 | ), 123 | EXE_SWR -> MuxLookup( 124 | mem_addr2, 125 | 0.U, 126 | Seq( 127 | "b11".U -> Cat(mem_wdata(7, 0), 0.U(24.W)), 128 | "b10".U -> Cat(mem_wdata(15, 0), 0.U(16.W)), 129 | "b01".U -> Cat(mem_wdata(23, 0), 0.U(8.W)), 130 | "b00".U -> mem_wdata, 131 | ), 132 | ), 133 | ), 134 | ) 135 | io.dataMemory.out.wen := MuxLookup( 136 | op, 137 | 0.U, 138 | Seq( 139 | EXE_SB -> MuxLookup( 140 | mem_addr2, 141 | 0.U, 142 | Seq( 143 | "b11".U -> "b1000".U, 144 | "b10".U -> "b0100".U, 145 | "b01".U -> "b0010".U, 146 | "b00".U -> "b0001".U, 147 | ), 148 | ), 149 | EXE_SH -> Mux(mem_addr2(1), "b1100".U, "b0011".U), 150 | EXE_SW -> "b1111".U, 151 | EXE_SC -> Fill(4, io.memoryUnit.in.llbit), 152 | EXE_SWL -> MuxLookup( 153 | mem_addr2, 154 | 0.U, 155 | Seq( 156 | "b11".U -> "b1111".U, 157 | "b10".U -> "b0111".U, 158 | "b01".U -> "b0011".U, 159 | "b00".U -> "b0001".U, 160 | ), 161 | ), 162 | EXE_SWR -> MuxLookup( 163 | mem_addr2, 164 | 0.U, 165 | Seq( 166 | "b11".U -> "b1000".U, 167 | "b10".U -> "b1100".U, 168 | "b01".U -> "b1110".U, 169 | "b00".U -> "b1111".U, 170 | ), 171 | ), 172 | ), 173 | ) 174 | io.dataMemory.out.rlen := MuxLookup( 175 | op, 176 | 0.U, 177 | Seq( 178 | EXE_LW -> 2.U, 179 | EXE_LL -> 2.U, 180 | EXE_LH -> 1.U, 181 | EXE_LHU -> 1.U, 182 | EXE_LB -> 0.U, 183 | EXE_LBU -> 0.U, 184 | EXE_LWL -> 2.U, 185 | EXE_LWR -> 2.U, 186 | EXE_SW -> 2.U, 187 | EXE_SWL -> 2.U, 188 | EXE_SWR -> 2.U, 189 | EXE_SC -> 2.U, 190 | EXE_SH -> 1.U, 191 | EXE_SB -> 0.U, 192 | ), 193 | ) 194 | } 195 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/Div.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class SignedDiv extends BlackBox with HasBlackBoxResource { 10 | val io = IO(new Bundle { 11 | val aclk = Input(Clock()) 12 | // 除数 13 | val s_axis_divisor_tvalid = Input(Bool()) 14 | val s_axis_divisor_tready = Output(Bool()) 15 | val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) 16 | // 被除数 17 | val s_axis_dividend_tvalid = Input(Bool()) 18 | val s_axis_dividend_tready = Output(Bool()) 19 | val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) 20 | // 结果 21 | val m_axis_dout_tvalid = Output(Bool()) 22 | val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) 23 | }) 24 | } 25 | 26 | class UnsignedDiv extends BlackBox with HasBlackBoxResource { 27 | val io = IO(new Bundle { 28 | val aclk = Input(Clock()) 29 | // 除数 30 | val s_axis_divisor_tvalid = Input(Bool()) 31 | val s_axis_divisor_tready = Output(Bool()) 32 | val s_axis_divisor_tdata = Input(UInt(DATA_WID.W)) 33 | // 被除数 34 | val s_axis_dividend_tvalid = Input(Bool()) 35 | val s_axis_dividend_tready = Output(Bool()) 36 | val s_axis_dividend_tdata = Input(UInt(DATA_WID.W)) 37 | // 结果 38 | val m_axis_dout_tvalid = Output(Bool()) 39 | val m_axis_dout_tdata = Output(UInt(HILO_WID.W)) 40 | }) 41 | } 42 | 43 | class Div(implicit config: CpuConfig) extends Module { 44 | val io = IO(new Bundle { 45 | val src1 = Input(UInt(DATA_WID.W)) 46 | val src2 = Input(UInt(DATA_WID.W)) 47 | val signed = Input(Bool()) 48 | val start = Input(Bool()) 49 | val allow_to_go = Input(Bool()) 50 | 51 | val ready = Output(Bool()) 52 | val result = Output(UInt(HILO_WID.W)) 53 | }) 54 | 55 | if (config.build) { 56 | val signedDiv = Module(new SignedDiv()).io 57 | val unsignedDiv = Module(new UnsignedDiv()).io 58 | 59 | signedDiv.aclk := clock 60 | unsignedDiv.aclk := clock 61 | 62 | // 0为被除数,1为除数 63 | val unsignedDiv_sent = Seq.fill(2)(RegInit(false.B)) 64 | val unsignedDiv_done = RegInit(false.B) 65 | val signedDiv_sent = Seq.fill(2)(RegInit(false.B)) 66 | val signedDiv_done = RegInit(false.B) 67 | 68 | when(unsignedDiv.s_axis_dividend_tready && unsignedDiv.s_axis_dividend_tvalid) { 69 | unsignedDiv_sent(0) := true.B 70 | }.elsewhen(io.ready && io.allow_to_go) { 71 | unsignedDiv_sent(0) := false.B 72 | } 73 | when(unsignedDiv.s_axis_divisor_tready && unsignedDiv.s_axis_divisor_tvalid) { 74 | unsignedDiv_sent(1) := true.B 75 | }.elsewhen(io.ready && io.allow_to_go) { 76 | unsignedDiv_sent(1) := false.B 77 | } 78 | 79 | when(signedDiv.s_axis_dividend_tready && signedDiv.s_axis_dividend_tvalid) { 80 | signedDiv_sent(0) := true.B 81 | }.elsewhen(io.ready && io.allow_to_go) { 82 | signedDiv_sent(0) := false.B 83 | } 84 | when(signedDiv.s_axis_divisor_tready && signedDiv.s_axis_divisor_tvalid) { 85 | signedDiv_sent(1) := true.B 86 | }.elsewhen(io.ready && io.allow_to_go) { 87 | signedDiv_sent(1) := false.B 88 | } 89 | 90 | when(signedDiv.m_axis_dout_tvalid && !io.allow_to_go) { 91 | signedDiv_done := true.B 92 | }.elsewhen(io.allow_to_go) { 93 | signedDiv_done := false.B 94 | } 95 | 96 | when(unsignedDiv.m_axis_dout_tvalid && !io.allow_to_go) { 97 | unsignedDiv_done := true.B 98 | }.elsewhen(io.allow_to_go) { 99 | unsignedDiv_done := false.B 100 | } 101 | // 被除数和除数的valid信号 102 | signedDiv.s_axis_dividend_tvalid := io.start && !signedDiv_sent(0) && io.signed 103 | signedDiv.s_axis_divisor_tvalid := io.start && !signedDiv_sent(1) && io.signed 104 | 105 | unsignedDiv.s_axis_dividend_tvalid := io.start && !unsignedDiv_sent(0) && !io.signed 106 | unsignedDiv.s_axis_divisor_tvalid := io.start && !unsignedDiv_sent(1) && !io.signed 107 | 108 | // 被除数和除数的值 109 | signedDiv.s_axis_dividend_tdata := io.src1 110 | signedDiv.s_axis_divisor_tdata := io.src2 111 | 112 | unsignedDiv.s_axis_dividend_tdata := io.src1 113 | unsignedDiv.s_axis_divisor_tdata := io.src2 114 | 115 | io.ready := Mux( 116 | io.signed, 117 | signedDiv.m_axis_dout_tvalid || signedDiv_done, 118 | unsignedDiv.m_axis_dout_tvalid || unsignedDiv_done, 119 | ) 120 | val signedRes = 121 | Cat(signedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), signedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) 122 | val unsignedRes = 123 | Cat(unsignedDiv.m_axis_dout_tdata(DATA_WID - 1, 0), unsignedDiv.m_axis_dout_tdata(HILO_WID - 1, DATA_WID)) 124 | io.result := Mux(io.signed, signedRes, unsignedRes) 125 | } else { 126 | val cnt = RegInit(0.U(log2Ceil(config.divClockNum + 1).W)) 127 | cnt := MuxCase( 128 | cnt, 129 | Seq( 130 | (io.start && !io.ready) -> (cnt + 1.U), 131 | io.allow_to_go -> 0.U, 132 | ), 133 | ) 134 | 135 | val div_signed = io.signed 136 | 137 | val dividend_signed = io.src1(31) & div_signed 138 | val divisor_signed = io.src2(31) & div_signed 139 | 140 | val dividend_abs = Mux(dividend_signed, (-io.src1).asUInt, io.src1.asUInt) 141 | val divisor_abs = Mux(divisor_signed, (-io.src2).asUInt, io.src2.asUInt) 142 | 143 | val quotient_signed = (io.src1(31) ^ io.src2(31)) & div_signed 144 | val remainder_signed = io.src1(31) & div_signed 145 | 146 | val quotient_abs = dividend_abs / divisor_abs 147 | val remainder_abs = dividend_abs - quotient_abs * divisor_abs 148 | 149 | val quotient = RegInit(0.S(32.W)) 150 | val remainder = RegInit(0.S(32.W)) 151 | 152 | when(io.start) { 153 | quotient := Mux(quotient_signed, (-quotient_abs).asSInt, quotient_abs.asSInt) 154 | remainder := Mux(remainder_signed, (-remainder_abs).asSInt, remainder_abs.asSInt) 155 | } 156 | 157 | io.ready := cnt >= config.divClockNum.U 158 | io.result := Cat(remainder, quotient) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/memory/MemoryUnit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.memory 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | import cpu.pipeline.decoder.RegWrite 9 | import cpu.pipeline.execute.Cp0MemoryUnit 10 | import cpu.pipeline.writeback.MemoryUnitWriteBackUnit 11 | 12 | class MemoryUnit(implicit val config: CpuConfig) extends Module { 13 | val io = IO(new Bundle { 14 | val ctrl = new MemoryCtrl() 15 | val memoryStage = Input(new ExecuteUnitMemoryUnit()) 16 | val fetchUnit = Output(new Bundle { 17 | val flush = Bool() 18 | val flush_pc = UInt(PC_WID.W) 19 | }) 20 | val decoderUnit = Output(Vec(config.fuNum, new RegWrite())) 21 | val cp0 = Flipped(new Cp0MemoryUnit()) 22 | val writeBackStage = Output(new MemoryUnitWriteBackUnit()) 23 | val dataMemory = new Bundle { 24 | val in = Input(new Bundle { 25 | val tlb = new Bundle { 26 | val invalid = Bool() 27 | val refill = Bool() 28 | val modify = Bool() 29 | } 30 | val rdata = UInt(DATA_WID.W) 31 | }) 32 | val out = Output(new Bundle { 33 | val en = Bool() 34 | val rlen = UInt(2.W) 35 | val wen = UInt(4.W) 36 | val addr = UInt(DATA_ADDR_WID.W) 37 | val wdata = UInt(DATA_WID.W) 38 | }) 39 | } 40 | }) 41 | 42 | val dataMemoryAccess = Module(new DataMemoryAccess()).io 43 | dataMemoryAccess.memoryUnit.in.mem_en := io.memoryStage.inst0.mem.en 44 | dataMemoryAccess.memoryUnit.in.inst_info := io.memoryStage.inst0.mem.inst_info 45 | dataMemoryAccess.memoryUnit.in.mem_wdata := io.memoryStage.inst0.mem.wdata 46 | dataMemoryAccess.memoryUnit.in.mem_addr := io.memoryStage.inst0.mem.addr 47 | dataMemoryAccess.memoryUnit.in.mem_sel := io.memoryStage.inst0.mem.sel 48 | dataMemoryAccess.memoryUnit.in.ex(0) := io.memoryStage.inst0.ex 49 | dataMemoryAccess.memoryUnit.in.ex(1) := io.memoryStage.inst1.ex 50 | dataMemoryAccess.dataMemory.in.rdata := io.dataMemory.in.rdata 51 | dataMemoryAccess.memoryUnit.in.llbit := io.memoryStage.inst0.mem.llbit 52 | io.dataMemory.out := dataMemoryAccess.dataMemory.out 53 | 54 | io.decoderUnit(0).wen := io.writeBackStage.inst0.inst_info.reg_wen 55 | io.decoderUnit(0).waddr := io.writeBackStage.inst0.inst_info.reg_waddr 56 | io.decoderUnit(0).wdata := io.writeBackStage.inst0.rd_info.wdata 57 | io.decoderUnit(1).wen := io.writeBackStage.inst1.inst_info.reg_wen 58 | io.decoderUnit(1).waddr := io.writeBackStage.inst1.inst_info.reg_waddr 59 | io.decoderUnit(1).wdata := io.writeBackStage.inst1.rd_info.wdata 60 | 61 | io.writeBackStage.inst0.pc := io.memoryStage.inst0.pc 62 | io.writeBackStage.inst0.inst_info := io.memoryStage.inst0.inst_info 63 | io.writeBackStage.inst0.rd_info.wdata := Mux( 64 | io.writeBackStage.inst0.inst_info.mem_wreg, 65 | dataMemoryAccess.memoryUnit.out.rdata, 66 | io.memoryStage.inst0.rd_info.wdata, 67 | ) 68 | io.writeBackStage.inst0.ex := io.memoryStage.inst0.ex 69 | val inst0_access_mem = 70 | (io.dataMemory.out.en && (io.dataMemory.in.tlb.invalid || io.dataMemory.in.tlb.refill) && io.memoryStage.inst0.inst_info.fusel === FU_MEM) 71 | val inst0_tlbmod = 72 | (io.dataMemory.in.tlb.modify && io.dataMemory.out.wen.orR && io.memoryStage.inst0.inst_info.fusel === FU_MEM) 73 | io.writeBackStage.inst0.ex.excode := MuxCase( 74 | io.memoryStage.inst0.ex.excode, 75 | Seq( 76 | (io.memoryStage.inst0.ex.excode =/= EX_NO) -> io.memoryStage.inst0.ex.excode, 77 | inst0_access_mem -> Mux(io.dataMemory.out.wen.orR, EX_TLBS, EX_TLBL), 78 | inst0_tlbmod -> EX_MOD, 79 | ), 80 | ) 81 | io.writeBackStage.inst0.ex.tlb_refill := io.memoryStage.inst0.ex.tlb_refill && io.memoryStage.inst0.ex.excode === EX_TLBL || io.dataMemory.in.tlb.refill && io.memoryStage.inst0.inst_info.fusel === FU_MEM 82 | io.writeBackStage.inst0.ex.flush_req := io.memoryStage.inst0.ex.flush_req || io.writeBackStage.inst0.ex.excode =/= EX_NO || io.writeBackStage.inst0.ex.tlb_refill 83 | io.writeBackStage.inst0.cp0 := io.memoryStage.inst0.cp0 84 | 85 | io.writeBackStage.inst1.pc := io.memoryStage.inst1.pc 86 | io.writeBackStage.inst1.inst_info := io.memoryStage.inst1.inst_info 87 | io.writeBackStage.inst1.rd_info.wdata := Mux( 88 | io.writeBackStage.inst1.inst_info.mem_wreg, 89 | dataMemoryAccess.memoryUnit.out.rdata, 90 | io.memoryStage.inst1.rd_info.wdata, 91 | ) 92 | io.writeBackStage.inst1.ex := io.memoryStage.inst1.ex 93 | val inst1_access_mem = 94 | (io.dataMemory.out.en && (io.dataMemory.in.tlb.invalid || io.dataMemory.in.tlb.refill) && io.memoryStage.inst1.inst_info.fusel === FU_MEM) 95 | val inst1_tlbmod = 96 | (io.dataMemory.in.tlb.modify && io.dataMemory.out.wen.orR && io.memoryStage.inst1.inst_info.fusel === FU_MEM) 97 | io.writeBackStage.inst1.ex.excode := MuxCase( 98 | io.memoryStage.inst1.ex.excode, 99 | Seq( 100 | (io.memoryStage.inst1.ex.excode =/= EX_NO) -> io.memoryStage.inst1.ex.excode, 101 | inst1_access_mem -> Mux(io.dataMemory.out.wen.orR, EX_TLBS, EX_TLBL), 102 | inst1_tlbmod -> EX_MOD, 103 | ), 104 | ) 105 | io.writeBackStage.inst1.ex.tlb_refill := io.memoryStage.inst1.ex.tlb_refill && io.memoryStage.inst1.ex.excode === EX_TLBL || io.dataMemory.in.tlb.refill && io.memoryStage.inst1.inst_info.fusel === FU_MEM 106 | io.writeBackStage.inst1.ex.flush_req := io.memoryStage.inst1.ex.flush_req || io.writeBackStage.inst1.ex.excode =/= EX_NO || io.writeBackStage.inst1.ex.tlb_refill 107 | 108 | io.cp0.in.inst(0).pc := io.writeBackStage.inst0.pc 109 | io.cp0.in.inst(0).ex := io.writeBackStage.inst0.ex 110 | io.cp0.in.inst(1).pc := io.writeBackStage.inst1.pc 111 | io.cp0.in.inst(1).ex := io.writeBackStage.inst1.ex 112 | 113 | io.fetchUnit.flush := Mux( 114 | io.cp0.out.flush, 115 | io.cp0.out.flush, 116 | io.writeBackStage.inst0.inst_info.op === EXE_MTC0 && io.ctrl.allow_to_go, 117 | ) 118 | io.fetchUnit.flush_pc := Mux(io.cp0.out.flush, io.cp0.out.flush_pc, io.writeBackStage.inst0.pc + 4.U) 119 | 120 | io.ctrl.flush_req := io.fetchUnit.flush 121 | io.ctrl.eret := io.writeBackStage.inst0.ex.eret 122 | } 123 | -------------------------------------------------------------------------------- /chisel/src/main/resources/mycpu_top_for_soc_simulator.v: -------------------------------------------------------------------------------- 1 | module mycpu_top ( 2 | input [ 5:0] ext_int, 3 | input aclk, 4 | input aresetn, 5 | //axi interface 6 | 7 | //read request 8 | output [ 3:0] arid, 9 | output [31:0] araddr, 10 | output [ 7:0] arlen, 11 | output [ 2:0] arsize, 12 | output [ 1:0] arburst, 13 | output [ 1:0] arlock, 14 | output [ 3:0] arcache, 15 | output [ 2:0] arprot, 16 | output arvalid, 17 | input arready, 18 | 19 | //read response 20 | input [ 3:0] rid, 21 | input [31:0] rdata, 22 | input [ 1:0] rresp, 23 | input rlast, 24 | input rvalid, 25 | output rready, 26 | 27 | //write request 28 | output [ 3:0] awid, 29 | output [31:0] awaddr, 30 | output [ 7:0] awlen, 31 | output [ 2:0] awsize, 32 | output [ 1:0] awburst, 33 | output [ 1:0] awlock, 34 | output [ 3:0] awcache, 35 | output [ 2:0] awprot, 36 | output awvalid, 37 | input awready, 38 | 39 | //write data 40 | output [ 3:0] wid, 41 | output [31:0] wdata, 42 | output [ 3:0] wstrb, 43 | output wlast, 44 | output wvalid, 45 | input wready, 46 | 47 | //write response 48 | input [ 3:0] bid, 49 | input [ 1:0] bresp, 50 | input bvalid, 51 | output bready, 52 | 53 | // trace debug interface 54 | output [31:0] debug_wb_pc, 55 | output [ 3:0] debug_wb_rf_wen, 56 | output [ 4:0] debug_wb_rf_wnum, 57 | output [31:0] debug_wb_rf_wdata, 58 | // for soc-simulator 59 | output [31:0] statistic_cpu_soc_cp0_count, 60 | output [31:0] statistic_cpu_soc_cp0_random, 61 | output [31:0] statistic_cpu_soc_cp0_cause, 62 | output statistic_cpu_soc_int, 63 | output statistic_cpu_soc_commit, 64 | 65 | // bpu statistic 66 | output [31:0] statistic_cpu_bpu_branch, 67 | output [31:0] statistic_cpu_bpu_success, 68 | 69 | // cache statistic 70 | output [31:0] statistic_cache_icache_request, 71 | output [31:0] statistic_cache_icache_hit, 72 | output [31:0] statistic_cache_dcache_request, 73 | output [31:0] statistic_cache_dcache_hit 74 | ); 75 | 76 | PuaMips puamips( 77 | .clock (aclk ), 78 | .reset (~aresetn ), 79 | .io_ext_int (ext_int ), 80 | .io_axi_ar_bits_id (arid ), 81 | .io_axi_ar_bits_addr (araddr ), 82 | .io_axi_ar_bits_len (arlen ), 83 | .io_axi_ar_bits_size (arsize ), 84 | .io_axi_ar_bits_burst (arburst ), 85 | .io_axi_ar_bits_lock (arlock ), 86 | .io_axi_ar_bits_cache (arcache ), 87 | .io_axi_ar_bits_prot (arprot ), 88 | .io_axi_ar_valid (arvalid ), 89 | .io_axi_ar_ready (arready ), 90 | .io_axi_r_bits_id (rid ), 91 | .io_axi_r_bits_data (rdata ), 92 | .io_axi_r_bits_resp (rresp ), 93 | .io_axi_r_bits_last (rlast ), 94 | .io_axi_r_valid (rvalid ), 95 | .io_axi_r_ready (rready ), 96 | .io_axi_aw_bits_id (awid ), 97 | .io_axi_aw_bits_addr (awaddr ), 98 | .io_axi_aw_bits_len (awlen ), 99 | .io_axi_aw_bits_size (awsize ), 100 | .io_axi_aw_bits_burst (awburst ), 101 | .io_axi_aw_bits_lock (awlock ), 102 | .io_axi_aw_bits_cache (awcache ), 103 | .io_axi_aw_bits_prot (awprot ), 104 | .io_axi_aw_valid (awvalid ), 105 | .io_axi_aw_ready (awready ), 106 | .io_axi_w_bits_id (wid ), 107 | .io_axi_w_bits_data (wdata ), 108 | .io_axi_w_bits_strb (wstrb ), 109 | .io_axi_w_bits_last (wlast ), 110 | .io_axi_w_valid (wvalid ), 111 | .io_axi_w_ready (wready ), 112 | .io_axi_b_bits_id (bid ), 113 | .io_axi_b_bits_resp (bresp ), 114 | .io_axi_b_valid (bvalid ), 115 | .io_axi_b_ready (bready ), 116 | .io_debug_wb_pc (debug_wb_pc ), 117 | .io_debug_wb_rf_wen (debug_wb_rf_wen ), 118 | .io_debug_wb_rf_wnum (debug_wb_rf_wnum ), 119 | .io_debug_wb_rf_wdata (debug_wb_rf_wdata ), 120 | .io_statistic_cpu_soc_cp0_count (statistic_cpu_soc_cp0_count ), 121 | .io_statistic_cpu_soc_cp0_random (statistic_cpu_soc_cp0_random ), 122 | .io_statistic_cpu_soc_cp0_cause (statistic_cpu_soc_cp0_cause ), 123 | .io_statistic_cpu_soc_int (statistic_cpu_soc_int ), 124 | .io_statistic_cpu_soc_commit (statistic_cpu_soc_commit ), 125 | .io_statistic_cpu_bpu_success (statistic_cpu_bpu_success ), 126 | .io_statistic_cpu_bpu_branch (statistic_cpu_bpu_branch ), 127 | .io_statistic_cache_icache_request (statistic_cache_icache_request ), 128 | .io_statistic_cache_icache_hit (statistic_cache_icache_hit ), 129 | .io_statistic_cache_dcache_request (statistic_cache_dcache_request ), 130 | .io_statistic_cache_dcache_hit (statistic_cache_dcache_hit ) 131 | ); 132 | endmodule 133 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/ExecuteUnit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.CpuConfig 6 | import cpu.defines._ 7 | import cpu.defines.Const._ 8 | import cpu.pipeline.decoder.RegWrite 9 | import cpu.pipeline.memory.{ExecuteUnitMemoryUnit, Cp0Info} 10 | import cpu.pipeline.fetch.ExecuteUnitBranchPredictor 11 | 12 | class ExecuteUnit(implicit val config: CpuConfig) extends Module { 13 | val io = IO(new Bundle { 14 | val ctrl = new ExecuteCtrl() 15 | val executeStage = Input(new DecoderUnitExecuteUnit()) 16 | val cp0 = Flipped(new Cp0ExecuteUnit()) 17 | val bpu = new ExecuteUnitBranchPredictor() 18 | val fetchUnit = Output(new Bundle { 19 | val branch = Bool() 20 | val target = UInt(PC_WID.W) 21 | }) 22 | val decoderUnit = new Bundle { 23 | val forward = Output( 24 | Vec( 25 | config.fuNum, 26 | new Bundle { 27 | val exe = new RegWrite() 28 | val exe_mem_wreg = Bool() 29 | }, 30 | ), 31 | ) 32 | val inst0_bd = Input(Bool()) 33 | } 34 | val memoryStage = Output(new ExecuteUnitMemoryUnit()) 35 | 36 | val statistic = if (!config.build) Some(new BranchPredictorUnitStatistic()) else None 37 | }) 38 | 39 | val fu = Module(new Fu()).io 40 | val accessMemCtrl = Module(new ExeAccessMemCtrl()).io 41 | 42 | io.ctrl.inst(0).mem_wreg := io.executeStage.inst0.inst_info.mem_wreg 43 | io.ctrl.inst(0).reg_waddr := io.executeStage.inst0.inst_info.reg_waddr 44 | io.ctrl.inst(1).mem_wreg := io.executeStage.inst1.inst_info.mem_wreg 45 | io.ctrl.inst(1).reg_waddr := io.executeStage.inst1.inst_info.reg_waddr 46 | io.ctrl.branch := io.ctrl.allow_to_go && 47 | (io.executeStage.inst0.jb_info.jump_regiser || fu.branch.pred_fail) 48 | 49 | io.cp0.in.mtc0_wdata := io.executeStage.inst0.src_info.src2_data 50 | io.cp0.in.inst_info(0) := Mux( 51 | !io.executeStage.inst0.ex.flush_req, 52 | io.executeStage.inst0.inst_info, 53 | 0.U.asTypeOf(new InstInfo()), 54 | ) 55 | io.cp0.in.inst_info(1) := io.executeStage.inst1.inst_info 56 | 57 | // input accessMemCtrl 58 | accessMemCtrl.inst(0).inst_info := io.executeStage.inst0.inst_info 59 | accessMemCtrl.inst(0).src_info := io.executeStage.inst0.src_info 60 | accessMemCtrl.inst(0).ex.in := io.executeStage.inst0.ex 61 | accessMemCtrl.inst(1).inst_info := io.executeStage.inst1.inst_info 62 | accessMemCtrl.inst(1).src_info := io.executeStage.inst1.src_info 63 | accessMemCtrl.inst(1).ex.in := io.executeStage.inst1.ex 64 | 65 | // input fu 66 | fu.ctrl <> io.ctrl.fu 67 | fu.inst(0).pc := io.executeStage.inst0.pc 68 | fu.inst(0).hilo_wen := io.executeStage.inst0.inst_info.whilo 69 | fu.inst(0).mul_en := io.executeStage.inst0.inst_info.mul 70 | fu.inst(0).div_en := io.executeStage.inst0.inst_info.div 71 | fu.inst(0).inst_info := io.executeStage.inst0.inst_info 72 | fu.inst(0).src_info := io.executeStage.inst0.src_info 73 | fu.inst(0).ex.in := 74 | Mux(io.executeStage.inst0.inst_info.fusel === FU_MEM, accessMemCtrl.inst(0).ex.out, io.executeStage.inst0.ex) 75 | fu.inst(1).pc := io.executeStage.inst1.pc 76 | fu.inst(1).hilo_wen := io.executeStage.inst1.inst_info.whilo 77 | fu.inst(1).mul_en := io.executeStage.inst1.inst_info.mul 78 | fu.inst(1).div_en := io.executeStage.inst1.inst_info.div 79 | fu.inst(1).inst_info := io.executeStage.inst1.inst_info 80 | fu.inst(1).src_info := io.executeStage.inst1.src_info 81 | fu.inst(1).ex.in := io.executeStage.inst1.ex 82 | fu.cp0_rdata := io.cp0.out.cp0_rdata 83 | fu.branch.pred_branch := io.executeStage.inst0.jb_info.pred_branch 84 | 85 | io.bpu.pc := io.executeStage.inst0.pc 86 | io.bpu.update_pht_index := io.executeStage.inst0.jb_info.update_pht_index 87 | io.bpu.branch := fu.branch.branch 88 | io.bpu.branch_inst := io.executeStage.inst0.jb_info.branch_inst 89 | 90 | io.fetchUnit.branch := io.ctrl.allow_to_go && 91 | (io.executeStage.inst0.jb_info.jump_regiser || fu.branch.pred_fail) 92 | io.fetchUnit.target := MuxCase( 93 | io.executeStage.inst0.pc + 4.U, // 默认顺序运行吧 94 | Seq( 95 | (fu.branch.pred_fail && fu.branch.branch) -> io.executeStage.inst0.jb_info.branch_target, 96 | (fu.branch.pred_fail && !fu.branch.branch) -> Mux( 97 | io.decoderUnit.inst0_bd || io.executeStage.inst1.ex.bd, 98 | io.executeStage.inst0.pc + 8.U, 99 | io.executeStage.inst0.pc + 4.U, 100 | ), 101 | (io.executeStage.inst0.jb_info.jump_regiser) -> io.executeStage.inst0.src_info.src1_data, 102 | ), 103 | ) 104 | 105 | io.ctrl.fu_stall := fu.stall_req 106 | 107 | io.memoryStage.inst0.mem.en := accessMemCtrl.mem.out.en 108 | io.memoryStage.inst0.mem.ren := accessMemCtrl.mem.out.ren 109 | io.memoryStage.inst0.mem.wen := accessMemCtrl.mem.out.wen 110 | io.memoryStage.inst0.mem.addr := accessMemCtrl.mem.out.addr 111 | io.memoryStage.inst0.mem.wdata := accessMemCtrl.mem.out.wdata 112 | io.memoryStage.inst0.mem.sel := accessMemCtrl.inst.map(_.mem_sel) 113 | io.memoryStage.inst0.mem.inst_info := accessMemCtrl.mem.out.inst_info 114 | io.memoryStage.inst0.mem.llbit := fu.llbit 115 | 116 | io.memoryStage.inst0.pc := io.executeStage.inst0.pc 117 | io.memoryStage.inst0.inst_info := io.executeStage.inst0.inst_info 118 | io.memoryStage.inst0.rd_info.wdata := fu.inst(0).result 119 | io.memoryStage.inst0.ex := Mux( 120 | io.executeStage.inst0.inst_info.fusel === FU_MEM, 121 | accessMemCtrl.inst(0).ex.out, 122 | fu.inst(0).ex.out, 123 | ) 124 | io.memoryStage.inst0.cp0 := io.cp0.out.debug 125 | 126 | io.memoryStage.inst1.pc := io.executeStage.inst1.pc 127 | io.memoryStage.inst1.inst_info := io.executeStage.inst1.inst_info 128 | io.memoryStage.inst1.rd_info.wdata := fu.inst(1).result 129 | io.memoryStage.inst1.ex := Mux( 130 | io.executeStage.inst1.inst_info.fusel === FU_MEM, 131 | accessMemCtrl.inst(1).ex.out, 132 | fu.inst(1).ex.out, 133 | ) 134 | 135 | io.decoderUnit.forward(0).exe.wen := io.memoryStage.inst0.inst_info.reg_wen 136 | io.decoderUnit.forward(0).exe.waddr := io.memoryStage.inst0.inst_info.reg_waddr 137 | io.decoderUnit.forward(0).exe.wdata := io.memoryStage.inst0.rd_info.wdata 138 | io.decoderUnit.forward(0).exe_mem_wreg := io.memoryStage.inst0.inst_info.mem_wreg 139 | 140 | io.decoderUnit.forward(1).exe.wen := io.memoryStage.inst1.inst_info.reg_wen 141 | io.decoderUnit.forward(1).exe.waddr := io.memoryStage.inst1.inst_info.reg_waddr 142 | io.decoderUnit.forward(1).exe.wdata := io.memoryStage.inst1.rd_info.wdata 143 | io.decoderUnit.forward(1).exe_mem_wreg := io.memoryStage.inst1.inst_info.mem_wreg 144 | 145 | // ===----------------------------------------------------------------=== 146 | // statistic 147 | // ===----------------------------------------------------------------=== 148 | if (!config.build) { 149 | io.statistic.get <> fu.statistic.get 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/fetch/BranchPredictorUnit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.fetch 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | import cpu._ 7 | import cpu.pipeline.decoder.Src12Read 8 | 9 | class ExecuteUnitBranchPredictor extends Bundle { 10 | val bpuConfig = new BranchPredictorConfig() 11 | val pc = Output(UInt(DATA_ADDR_WID.W)) 12 | val update_pht_index = Output(UInt(bpuConfig.phtDepth.W)) 13 | val branch_inst = Output(Bool()) 14 | val branch = Output(Bool()) 15 | } 16 | 17 | class BranchPredictorIO(implicit config: CpuConfig) extends Bundle { 18 | val bpuConfig = new BranchPredictorConfig() 19 | val decoder = new Bundle { 20 | val inst = Input(UInt(INST_WID.W)) 21 | val op = Input(UInt(OP_WID.W)) 22 | val ena = Input(Bool()) 23 | val pc = Input(UInt(DATA_ADDR_WID.W)) 24 | val pc_plus4 = Input(UInt(DATA_ADDR_WID.W)) 25 | val pht_index = Input(UInt(bpuConfig.phtDepth.W)) 26 | 27 | val rs1 = Input(UInt(REG_ADDR_WID.W)) 28 | val rs2 = Input(UInt(REG_ADDR_WID.W)) 29 | 30 | val branch_inst = Output(Bool()) 31 | val pred_branch = Output(Bool()) 32 | val branch_target = Output(UInt(DATA_ADDR_WID.W)) 33 | val update_pht_index = Output(UInt(bpuConfig.phtDepth.W)) 34 | } 35 | 36 | val instBuffer = new Bundle { 37 | val pc = Input(Vec(config.instFetchNum, UInt(PC_WID.W))) 38 | val pht_index = Output(Vec(config.instFetchNum, UInt(bpuConfig.phtDepth.W))) 39 | } 40 | 41 | val execute = Flipped(new ExecuteUnitBranchPredictor()) 42 | 43 | val regfile = if (config.branchPredictor == "pesudo") Some(new Src12Read()) else None 44 | } 45 | 46 | class BranchPredictorUnit(implicit config: CpuConfig) extends Module { 47 | val io = IO(new BranchPredictorIO()) 48 | 49 | if (config.branchPredictor == "adaptive") { 50 | val adaptive_predictor = Module(new AdaptiveTwoLevelPredictor()) 51 | io <> adaptive_predictor.io 52 | } 53 | 54 | if (config.branchPredictor == "pesudo") { 55 | val pesudo_predictor = Module(new PesudoBranchPredictor()) 56 | io <> pesudo_predictor.io 57 | } 58 | 59 | if (config.branchPredictor == "global") { 60 | val global_predictor = Module(new GlobalBranchPredictor()) 61 | io <> global_predictor.io 62 | } 63 | } 64 | 65 | class PesudoBranchPredictor(implicit config: CpuConfig) extends Module { 66 | val io = IO(new BranchPredictorIO()) 67 | io.decoder.branch_inst := VecInit(EXE_BEQ, EXE_BNE, EXE_BGTZ, EXE_BLEZ, EXE_BGEZ, EXE_BGEZAL, EXE_BLTZ, EXE_BLTZAL) 68 | .contains(io.decoder.op) 69 | io.decoder.branch_target := io.decoder.pc_plus4 + Cat( 70 | Fill(14, io.decoder.inst(15)), 71 | io.decoder.inst(15, 0), 72 | 0.U(2.W), 73 | ) 74 | 75 | io.regfile.get.src1.raddr := io.decoder.rs1 76 | io.regfile.get.src2.raddr := io.decoder.rs2 77 | val (src1, src2) = (io.regfile.get.src1.rdata, io.regfile.get.src2.rdata) 78 | val pred_branch = MuxLookup( 79 | io.decoder.op, 80 | false.B, 81 | Seq( 82 | EXE_BEQ -> (src1 === src2), 83 | EXE_BNE -> (src1 =/= src2), 84 | EXE_BGTZ -> (!src1(31) && (src1 =/= 0.U)), 85 | EXE_BLEZ -> (src1(31) || src1 === 0.U), 86 | EXE_BGEZ -> (!src1(31)), 87 | EXE_BGEZAL -> (!src1(31)), 88 | EXE_BLTZ -> (src1(31)), 89 | EXE_BLTZAL -> (src1(31)), 90 | ), 91 | ) 92 | 93 | io.decoder.pred_branch := io.decoder.ena && io.decoder.branch_inst && pred_branch 94 | } 95 | 96 | class GlobalBranchPredictor( 97 | GHR_DEPTH: Int = 4, // 可以记录的历史记录个数 98 | PC_HASH_WID: Int = 4, // 取得PC的宽度 99 | PHT_DEPTH: Int = 6, // 可以记录的历史个数 100 | BHT_DEPTH: Int = 4, // 取得PC的宽度 101 | )(implicit 102 | config: CpuConfig, 103 | ) extends Module { 104 | val io = IO(new BranchPredictorIO()) 105 | 106 | val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4) 107 | 108 | io.decoder.branch_inst := VecInit(EXE_BEQ, EXE_BNE, EXE_BGTZ, EXE_BLEZ, EXE_BGEZ, EXE_BGEZAL, EXE_BLTZ, EXE_BLTZAL) 109 | .contains(io.decoder.op) 110 | io.decoder.branch_target := io.decoder.pc_plus4 + Cat( 111 | Fill(14, io.decoder.inst(15)), 112 | io.decoder.inst(15, 0), 113 | 0.U(2.W), 114 | ) 115 | // 局部预测模式 116 | 117 | val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W)))) 118 | val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken))) 119 | val bht_index = io.decoder.pc(1 + BHT_DEPTH, 2) 120 | val pht_index = bht(bht_index) 121 | 122 | io.decoder.pred_branch := 123 | io.decoder.ena && io.decoder.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken) 124 | val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2) 125 | val update_pht_index = bht(update_bht_index) 126 | 127 | when(io.execute.branch_inst) { 128 | bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch) 129 | switch(pht(update_pht_index)) { 130 | is(strongly_not_taken) { 131 | pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken) 132 | } 133 | is(weakly_not_taken) { 134 | pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken) 135 | } 136 | is(weakly_taken) { 137 | pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken) 138 | } 139 | is(strongly_taken) { 140 | pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken) 141 | } 142 | } 143 | } 144 | 145 | } 146 | 147 | class AdaptiveTwoLevelPredictor( 148 | )(implicit 149 | config: CpuConfig, 150 | ) extends Module { 151 | val bpuConfig = new BranchPredictorConfig() 152 | val PHT_DEPTH = bpuConfig.phtDepth 153 | val BHT_DEPTH = bpuConfig.bhtDepth 154 | val io = IO(new BranchPredictorIO()) 155 | 156 | val strongly_not_taken :: weakly_not_taken :: weakly_taken :: strongly_taken :: Nil = Enum(4) 157 | 158 | io.decoder.branch_inst := 159 | VecInit(EXE_BEQ, EXE_BNE, EXE_BGTZ, EXE_BLEZ, EXE_BGEZ, EXE_BGEZAL, EXE_BLTZ, EXE_BLTZAL).contains(io.decoder.op) 160 | io.decoder.branch_target := io.decoder.pc_plus4 + Cat( 161 | Fill(14, io.decoder.inst(15)), 162 | io.decoder.inst(15, 0), 163 | 0.U(2.W), 164 | ) 165 | 166 | val bht = RegInit(VecInit(Seq.fill(1 << BHT_DEPTH)(0.U(PHT_DEPTH.W)))) 167 | val pht = RegInit(VecInit(Seq.fill(1 << PHT_DEPTH)(strongly_taken))) 168 | val pht_index = io.decoder.pht_index 169 | 170 | for (i <- 0 until config.instFetchNum) { 171 | io.instBuffer.pht_index(i) := bht(io.instBuffer.pc(i)(1 + BHT_DEPTH, 2)) 172 | } 173 | 174 | io.decoder.pred_branch := 175 | io.decoder.ena && io.decoder.branch_inst && (pht(pht_index) === weakly_taken || pht(pht_index) === strongly_taken) 176 | io.decoder.update_pht_index := bht(io.decoder.pc(1 + BHT_DEPTH, 2)) 177 | 178 | val update_bht_index = io.execute.pc(1 + BHT_DEPTH, 2) 179 | val update_pht_index = io.execute.update_pht_index 180 | 181 | when(io.execute.branch_inst) { 182 | bht(update_bht_index) := Cat(bht(update_bht_index)(PHT_DEPTH - 2, 0), io.execute.branch) 183 | switch(pht(update_pht_index)) { 184 | is(strongly_not_taken) { 185 | pht(update_pht_index) := Mux(io.execute.branch, weakly_not_taken, strongly_not_taken) 186 | } 187 | is(weakly_not_taken) { 188 | pht(update_pht_index) := Mux(io.execute.branch, weakly_taken, strongly_not_taken) 189 | } 190 | is(weakly_taken) { 191 | pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_not_taken) 192 | } 193 | is(strongly_taken) { 194 | pht(update_pht_index) := Mux(io.execute.branch, strongly_taken, weakly_taken) 195 | } 196 | } 197 | } 198 | 199 | } 200 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/execute/Mul.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.execute 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.CpuConfig 8 | 9 | class SignedMul extends BlackBox with HasBlackBoxResource { 10 | val io = IO(new Bundle { 11 | val CLK = Input(Clock()) 12 | val CE = Input(Bool()) 13 | val A = Input(UInt((DATA_WID + 1).W)) 14 | val B = Input(UInt((DATA_WID + 1).W)) 15 | 16 | val P = Output(UInt((HILO_WID + 2).W)) 17 | }) 18 | } 19 | 20 | class Mul(implicit val config: CpuConfig) extends Module { 21 | val io = IO(new Bundle { 22 | val src1 = Input(UInt(DATA_WID.W)) 23 | val src2 = Input(UInt(DATA_WID.W)) 24 | val signed = Input(Bool()) 25 | val start = Input(Bool()) 26 | val allow_to_go = Input(Bool()) 27 | 28 | val ready = Output(Bool()) 29 | val result = Output(UInt(HILO_WID.W)) 30 | }) 31 | 32 | if (config.build) { 33 | val signedMul = Module(new SignedMul()).io 34 | val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) 35 | 36 | cnt := MuxCase( 37 | cnt, 38 | Seq( 39 | (io.start && !io.ready) -> (cnt + 1.U), 40 | io.allow_to_go -> 0.U, 41 | ), 42 | ) 43 | 44 | signedMul.CLK := clock 45 | signedMul.CE := io.start 46 | when(io.signed) { 47 | signedMul.A := Cat(io.src1(DATA_WID - 1), io.src1) 48 | signedMul.B := Cat(io.src2(DATA_WID - 1), io.src2) 49 | }.otherwise { 50 | signedMul.A := Cat(0.U(1.W), io.src1) 51 | signedMul.B := Cat(0.U(1.W), io.src2) 52 | } 53 | io.ready := cnt >= config.mulClockNum.U 54 | io.result := signedMul.P(HILO_WID - 1, 0) 55 | } else { 56 | val cnt = RegInit(0.U(log2Ceil(config.mulClockNum + 1).W)) 57 | cnt := MuxCase( 58 | cnt, 59 | Seq( 60 | (io.start && !io.ready) -> (cnt + 1.U), 61 | io.allow_to_go -> 0.U, 62 | ), 63 | ) 64 | 65 | val signed = RegInit(0.U(HILO_WID.W)) 66 | val unsigned = RegInit(0.U(HILO_WID.W)) 67 | when(io.start) { 68 | signed := (io.src1.asSInt * io.src2.asSInt).asUInt 69 | unsigned := io.src1 * io.src2 70 | } 71 | io.result := Mux(io.signed, signed, unsigned) 72 | io.ready := cnt >= config.mulClockNum.U 73 | } 74 | } 75 | 76 | // class ArrayMulDataModule(len: Int) extends Module { 77 | // val io = IO(new Bundle() { 78 | // val a, b = Input(UInt(len.W)) 79 | // val regEnables = Input(Vec(2, Bool())) 80 | // val result = Output(UInt((2 * len).W)) 81 | // }) 82 | // val (a, b) = (io.a, io.b) 83 | 84 | // val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len + 1).W)) 85 | // b_sext := SignExt(b, len + 1) 86 | // bx2 := b_sext << 1 87 | // neg_b := (~b_sext).asUInt() 88 | // neg_bx2 := neg_b << 1 89 | 90 | // val columns: Array[Seq[Bool]] = Array.fill(2 * len)(Seq()) 91 | 92 | // var last_x = WireInit(0.U(3.W)) 93 | // for (i <- Range(0, len, 2)) { 94 | // val x = if (i == 0) Cat(a(1, 0), 0.U(1.W)) else if (i + 1 == len) SignExt(a(i, i - 1), 3) else a(i + 1, i - 1) 95 | // val pp_temp = MuxLookup( 96 | // x, 97 | // 0.U, 98 | // Seq( 99 | // 1.U -> b_sext, 100 | // 2.U -> b_sext, 101 | // 3.U -> bx2, 102 | // 4.U -> neg_bx2, 103 | // 5.U -> neg_b, 104 | // 6.U -> neg_b, 105 | // ), 106 | // ) 107 | // val s = pp_temp(len) 108 | // val t = MuxLookup( 109 | // last_x, 110 | // 0.U(2.W), 111 | // Seq( 112 | // 4.U -> 2.U(2.W), 113 | // 5.U -> 1.U(2.W), 114 | // 6.U -> 1.U(2.W), 115 | // ), 116 | // ) 117 | // last_x = x 118 | // val (pp, weight) = i match { 119 | // case 0 => 120 | // (Cat(~s, s, s, pp_temp), 0) 121 | // case n if (n == len - 1) || (n == len - 2) => 122 | // (Cat(~s, pp_temp, t), i - 2) 123 | // case _ => 124 | // (Cat(1.U(1.W), ~s, pp_temp, t), i - 2) 125 | // } 126 | // for (j <- columns.indices) { 127 | // if (j >= weight && j < (weight + pp.getWidth)) { 128 | // columns(j) = columns(j) :+ pp(j - weight) 129 | // } 130 | // } 131 | // } 132 | 133 | // def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { 134 | // var sum = Seq[Bool]() 135 | // var cout1 = Seq[Bool]() 136 | // var cout2 = Seq[Bool]() 137 | // col.size match { 138 | // case 1 => // do nothing 139 | // sum = col ++ cin 140 | // case 2 => 141 | // val c22 = Module(new C22) 142 | // c22.io.in := col 143 | // sum = c22.io.out(0).asBool() +: cin 144 | // cout2 = Seq(c22.io.out(1).asBool()) 145 | // case 3 => 146 | // val c32 = Module(new C32) 147 | // c32.io.in := col 148 | // sum = c32.io.out(0).asBool() +: cin 149 | // cout2 = Seq(c32.io.out(1).asBool()) 150 | // case 4 => 151 | // val c53 = Module(new C53) 152 | // for ((x, y) <- c53.io.in.take(4) zip col) { 153 | // x := y 154 | // } 155 | // c53.io.in.last := (if (cin.nonEmpty) cin.head else 0.U) 156 | // sum = Seq(c53.io.out(0).asBool()) ++ (if (cin.nonEmpty) cin.drop(1) else Nil) 157 | // cout1 = Seq(c53.io.out(1).asBool()) 158 | // cout2 = Seq(c53.io.out(2).asBool()) 159 | // case n => 160 | // val cin_1 = if (cin.nonEmpty) Seq(cin.head) else Nil 161 | // val cin_2 = if (cin.nonEmpty) cin.drop(1) else Nil 162 | // val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1) 163 | // val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2) 164 | // sum = s_1 ++ s_2 165 | // cout1 = c_1_1 ++ c_2_1 166 | // cout2 = c_1_2 ++ c_2_2 167 | // } 168 | // (sum, cout1, cout2) 169 | // } 170 | 171 | // def max(in: Iterable[Int]): Int = in.reduce((a, b) => if (a > b) a else b) 172 | // def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = { 173 | // if (max(cols.map(_.size)) <= 2) { 174 | // val sum = Cat(cols.map(_(0)).reverse) 175 | // var k = 0 176 | // while (cols(k).size == 1) k = k + 1 177 | // val carry = Cat(cols.drop(k).map(_(1)).reverse) 178 | // (sum, Cat(carry, 0.U(k.W))) 179 | // } else { 180 | // val columns_next = Array.fill(2 * len)(Seq[Bool]()) 181 | // var cout1, cout2 = Seq[Bool]() 182 | // for (i <- cols.indices) { 183 | // val (s, c1, c2) = addOneColumn(cols(i), cout1) 184 | // columns_next(i) = s ++ cout2 185 | // cout1 = c1 186 | // cout2 = c2 187 | // } 188 | 189 | // val needReg = depth == 4 190 | // val toNextLayer = 191 | // if (needReg) 192 | // columns_next.map(_.map(x => RegEnable(x, io.regEnables(1)))) 193 | // else 194 | // columns_next 195 | 196 | // addAll(toNextLayer, depth + 1) 197 | // } 198 | // } 199 | 200 | // val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0)))) 201 | // val (sum, carry) = addAll(cols = columns_reg, depth = 0) 202 | 203 | // io.result := sum + carry 204 | // } 205 | 206 | // class ArrayMultiplier(len: Int) extends Module { 207 | // override def latency = 2 208 | 209 | // val mulDataModule = Module(new ArrayMulDataModule(len)) 210 | // mulDataModule.io.a := io.in.bits.src(0) 211 | // mulDataModule.io.b := io.in.bits.src(1) 212 | // mulDataModule.io.regEnables := VecInit((1 to latency) map (i => regEnable(i))) 213 | // val result = mulDataModule.io.result 214 | 215 | // var ctrlVec = Seq(ctrl) 216 | // for (i <- 1 to latency) { 217 | // ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i - 1)) 218 | // } 219 | // val 32 = len - 1 220 | // val res = Mux(ctrlVec.last.isHi, result(2 * 32 - 1, 32), result(32 - 1, 0)) 221 | 222 | // io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31, 0), 32), res) 223 | 224 | // XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n") 225 | // } 226 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/Core.scala: -------------------------------------------------------------------------------- 1 | package cpu 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import chisel3.internal.DontCareBinding 6 | 7 | import defines._ 8 | import defines.Const._ 9 | import pipeline.fetch._ 10 | import pipeline.decoder._ 11 | import pipeline.execute._ 12 | import pipeline.memory._ 13 | import pipeline.writeback._ 14 | import ctrl._ 15 | import mmu._ 16 | import chisel3.util.experimental.decode.decoder 17 | import cpu.pipeline.fetch.InstFifo 18 | 19 | class Core(implicit val config: CpuConfig) extends Module { 20 | val io = IO(new Bundle { 21 | val ext_int = Input(UInt(6.W)) 22 | val inst = new Cache_ICache() 23 | val data = new Cache_DCache() 24 | val debug = new DEBUG() 25 | val statistic = if (!config.build) Some(new CPUStatistic()) else None 26 | }) 27 | 28 | val ctrl = Module(new Ctrl()).io 29 | val fetchUnit = Module(new FetchUnit()).io 30 | val bpu = Module(new BranchPredictorUnit()).io 31 | val instFifo = Module(new InstFifo()).io 32 | val decoderUnit = Module(new DecoderUnit()).io 33 | val regfile = Module(new ARegFile()).io 34 | val executeStage = Module(new ExecuteStage()).io 35 | val executeUnit = Module(new ExecuteUnit()).io 36 | val cp0 = Module(new Cp0()).io 37 | val memoryStage = Module(new MemoryStage()).io 38 | val memoryUnit = Module(new MemoryUnit()).io 39 | val writeBackStage = Module(new WriteBackStage()).io 40 | val writeBackUnit = Module(new WriteBackUnit()).io 41 | val tlbL1I = Module(new TlbL1I()).io 42 | val tlbL1D = Module(new TlbL1D()).io 43 | 44 | tlbL1I.addr := fetchUnit.iCache.pc 45 | tlbL1I.fence := executeUnit.executeStage.inst0.inst_info.tlbfence 46 | tlbL1I.cpu_stall := !ctrl.fetchUnit.allow_to_go 47 | tlbL1I.icache_stall := io.inst.icache_stall 48 | tlbL1I.cache <> io.inst.tlb 49 | 50 | tlbL1D.addr := memoryUnit.dataMemory.out.addr 51 | tlbL1D.fence := memoryUnit.memoryStage.inst0.inst_info.tlbfence 52 | tlbL1D.cpu_stall := !ctrl.memoryUnit.allow_to_go 53 | tlbL1D.dcache_stall := io.data.dcache_stall 54 | tlbL1D.mem_write := memoryUnit.dataMemory.out.wen.orR 55 | tlbL1D.mem_en := memoryUnit.dataMemory.out.en 56 | tlbL1D.cache <> io.data.tlb 57 | 58 | ctrl.instFifo.has2insts := !(instFifo.empty || instFifo.almost_empty) 59 | ctrl.decoderUnit <> decoderUnit.ctrl 60 | ctrl.executeUnit <> executeUnit.ctrl 61 | ctrl.memoryUnit <> memoryUnit.ctrl 62 | ctrl.writeBackUnit <> writeBackUnit.ctrl 63 | ctrl.cacheCtrl.iCache_stall := io.inst.icache_stall 64 | ctrl.cacheCtrl.dCache_stall := io.data.dcache_stall 65 | 66 | fetchUnit.memory <> memoryUnit.fetchUnit 67 | fetchUnit.execute <> executeUnit.fetchUnit 68 | fetchUnit.decoder <> decoderUnit.fetchUnit 69 | fetchUnit.instFifo.full := instFifo.full 70 | fetchUnit.iCache.inst_valid := io.inst.inst_valid 71 | io.inst.addr(0) := fetchUnit.iCache.pc 72 | io.inst.addr(1) := fetchUnit.iCache.pc_next 73 | for (i <- 2 until config.instFetchNum) { 74 | io.inst.addr(i) := fetchUnit.iCache.pc_next + ((i - 1) * 4).U 75 | } 76 | 77 | bpu.decoder.ena := ctrl.decoderUnit.allow_to_go 78 | bpu.decoder.op := decoderUnit.bpu.decoded_inst0.op 79 | bpu.decoder.inst := decoderUnit.bpu.decoded_inst0.inst 80 | bpu.decoder.rs1 := decoderUnit.bpu.decoded_inst0.reg1_raddr 81 | bpu.decoder.rs2 := decoderUnit.bpu.decoded_inst0.reg2_raddr 82 | bpu.decoder.pc := decoderUnit.bpu.pc 83 | bpu.decoder.pc_plus4 := decoderUnit.bpu.pc + 4.U 84 | bpu.decoder.pht_index := decoderUnit.bpu.pht_index 85 | decoderUnit.bpu.update_pht_index := bpu.decoder.update_pht_index 86 | bpu.execute <> executeUnit.bpu 87 | if (config.branchPredictor == "pesudo") { 88 | bpu.regfile.get <> regfile.bpu.get 89 | } 90 | decoderUnit.bpu.branch_inst := bpu.decoder.branch_inst 91 | decoderUnit.bpu.pred_branch := bpu.decoder.pred_branch 92 | decoderUnit.bpu.branch_target := bpu.decoder.branch_target 93 | 94 | instFifo.do_flush := ctrl.decoderUnit.do_flush 95 | instFifo.flush_delay_slot := ctrl.instFifo.delay_slot_do_flush 96 | instFifo.icache_stall := io.inst.icache_stall 97 | instFifo.jump_branch_inst := decoderUnit.instFifo.jump_branch_inst 98 | instFifo.delay_sel_flush := Mux( 99 | ctrl.executeUnit.branch, 100 | !(executeUnit.memoryStage.inst1.ex.bd || decoderUnit.executeStage.inst0.ex.bd), 101 | Mux(ctrl.decoderUnit.branch, !decoderUnit.instFifo.allow_to_go(1), false.B), 102 | ) 103 | instFifo.decoder_delay_flush := ctrl.decoderUnit.branch 104 | instFifo.execute_delay_flush := ctrl.executeUnit.branch 105 | instFifo.ren <> decoderUnit.instFifo.allow_to_go 106 | decoderUnit.instFifo.inst <> instFifo.read 107 | 108 | for (i <- 0 until config.instFetchNum) { 109 | instFifo.write(i).pht_index := bpu.instBuffer.pht_index(i) 110 | bpu.instBuffer.pc(i) := instFifo.write(i).pc 111 | instFifo.wen(i) := io.inst.inst_valid(i) 112 | instFifo.write(i).tlb.refill := tlbL1I.tlb1.refill 113 | instFifo.write(i).tlb.invalid := tlbL1I.tlb1.invalid 114 | instFifo.write(i).pc := io.inst.addr(0) + (i * 4).U 115 | instFifo.write(i).inst := io.inst.inst(i) 116 | } 117 | 118 | decoderUnit.instFifo.info.empty := instFifo.empty 119 | decoderUnit.instFifo.info.almost_empty := instFifo.almost_empty 120 | decoderUnit.instFifo.info.inst0_is_in_delayslot := instFifo.inst0_is_in_delayslot 121 | decoderUnit.regfile <> regfile.read 122 | for (i <- 0 until (config.fuNum)) { 123 | decoderUnit.forward(i).exe := executeUnit.decoderUnit.forward(i).exe 124 | decoderUnit.forward(i).mem_wreg := executeUnit.decoderUnit.forward(i).exe_mem_wreg 125 | decoderUnit.forward(i).mem := memoryUnit.decoderUnit(i) 126 | } 127 | decoderUnit.cp0 <> cp0.decoderUnit 128 | decoderUnit.executeStage <> executeStage.decoderUnit 129 | 130 | executeStage.ctrl.clear(0) := ctrl.memoryUnit.flush_req || 131 | !decoderUnit.executeStage.inst0.ex.bd && ctrl.executeUnit.do_flush && ctrl.executeUnit.allow_to_go || 132 | !ctrl.decoderUnit.allow_to_go && ctrl.executeUnit.allow_to_go 133 | executeStage.ctrl.clear(1) := ctrl.memoryUnit.flush_req || 134 | (ctrl.executeUnit.do_flush && decoderUnit.executeStage.inst1.allow_to_go) || 135 | (ctrl.executeUnit.allow_to_go && !decoderUnit.executeStage.inst1.allow_to_go) 136 | executeStage.ctrl.inst0_allow_to_go := ctrl.executeUnit.allow_to_go 137 | 138 | executeUnit.decoderUnit.inst0_bd := decoderUnit.executeStage.inst0.ex.bd 139 | executeUnit.executeStage <> executeStage.executeUnit 140 | executeUnit.cp0 <> cp0.executeUnit 141 | executeUnit.memoryStage <> memoryStage.executeUnit 142 | 143 | cp0.ctrl.exe_stall := !ctrl.executeUnit.allow_to_go 144 | cp0.ctrl.mem_stall := !ctrl.memoryUnit.allow_to_go 145 | cp0.tlb(0).vpn2 := tlbL1I.tlb2.vpn2 146 | cp0.tlb(1).vpn2 := tlbL1D.tlb2.vpn2 147 | cp0.ext_int := io.ext_int 148 | tlbL1I.tlb2.found := cp0.tlb(0).found 149 | tlbL1D.tlb2.found := cp0.tlb(1).found 150 | tlbL1I.tlb2.entry := cp0.tlb(0).info 151 | tlbL1D.tlb2.entry := cp0.tlb(1).info 152 | 153 | memoryStage.ctrl.allow_to_go := ctrl.memoryUnit.allow_to_go 154 | memoryStage.ctrl.clear := ctrl.memoryUnit.do_flush 155 | 156 | memoryUnit.memoryStage <> memoryStage.memoryUnit 157 | memoryUnit.cp0 <> cp0.memoryUnit 158 | memoryUnit.writeBackStage <> writeBackStage.memoryUnit 159 | 160 | memoryUnit.dataMemory.in.tlb <> tlbL1D.tlb1 161 | memoryUnit.dataMemory.in.rdata := io.data.rdata 162 | io.data.en := memoryUnit.dataMemory.out.en 163 | io.data.rlen := memoryUnit.dataMemory.out.rlen 164 | io.data.wen := memoryUnit.dataMemory.out.wen 165 | io.data.wdata := memoryUnit.dataMemory.out.wdata 166 | io.data.addr := memoryUnit.dataMemory.out.addr 167 | 168 | writeBackStage.memoryUnit <> memoryUnit.writeBackStage 169 | writeBackStage.ctrl.allow_to_go := ctrl.writeBackUnit.allow_to_go 170 | writeBackStage.ctrl.clear := ctrl.writeBackUnit.do_flush 171 | 172 | writeBackUnit.writeBackStage <> writeBackStage.writeBackUnit 173 | writeBackUnit.ctrl <> ctrl.writeBackUnit 174 | regfile.write <> writeBackUnit.regfile 175 | 176 | io.debug <> writeBackUnit.debug 177 | 178 | io.inst.fence := executeUnit.executeStage.inst0.inst_info.ifence 179 | io.inst.fence_addr := executeUnit.executeStage.inst0.inst_info.mem_addr 180 | io.data.fence := memoryUnit.memoryStage.inst0.inst_info.dfence 181 | io.data.fence_addr := memoryUnit.memoryStage.inst0.inst_info.mem_addr 182 | io.data.execute_addr := executeUnit.memoryStage.inst0.mem.addr 183 | io.inst.req := !instFifo.full 184 | io.inst.cpu_stall := !ctrl.fetchUnit.allow_to_go 185 | io.data.cpu_stall := !ctrl.memoryUnit.allow_to_go 186 | 187 | // ===----------------------------------------------------------------=== 188 | // statistic 189 | // ===----------------------------------------------------------------=== 190 | if (!config.build) { 191 | io.statistic.get.soc <> writeBackUnit.statistic.get 192 | io.statistic.get.bpu <> executeUnit.statistic.get 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/defines/Bundles.scala: -------------------------------------------------------------------------------- 1 | package cpu.defines 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Const._ 6 | import cpu.CpuConfig 7 | 8 | class TlbEntry extends Bundle { 9 | val vpn2 = UInt(VPN2_WID.W) 10 | val asid = UInt(ASID_WID.W) 11 | val g = Bool() 12 | val pfn = Vec(2, UInt(PFN_WID.W)) 13 | val c = Vec(2, Bool()) 14 | val d = Vec(2, Bool()) 15 | val v = Vec(2, Bool()) 16 | } 17 | 18 | class ExceptionInfo extends Bundle { 19 | val flush_req = Bool() 20 | val tlb_refill = Bool() 21 | val eret = Bool() 22 | val badvaddr = UInt(PC_WID.W) 23 | val bd = Bool() 24 | val excode = UInt(EXCODE_WID.W) 25 | } 26 | 27 | class SrcInfo extends Bundle { 28 | val src1_data = UInt(DATA_WID.W) 29 | val src2_data = UInt(DATA_WID.W) 30 | } 31 | 32 | class RdInfo extends Bundle { 33 | val wdata = UInt(DATA_WID.W) 34 | } 35 | 36 | class InstInfo extends Bundle { 37 | val inst_valid = Bool() 38 | val reg1_ren = Bool() 39 | val reg1_raddr = UInt(REG_ADDR_WID.W) 40 | val reg2_ren = Bool() 41 | val reg2_raddr = UInt(REG_ADDR_WID.W) 42 | val fusel = UInt(FU_SEL_WID.W) 43 | val op = UInt(OP_WID.W) 44 | val reg_wen = Bool() 45 | val reg_waddr = UInt(REG_ADDR_WID.W) 46 | val imm32 = UInt(DATA_WID.W) 47 | val cp0_addr = UInt(CP0_ADDR_WID.W) 48 | val dual_issue = Bool() 49 | val whilo = Bool() 50 | val rmem = Bool() 51 | val wmem = Bool() 52 | val mul = Bool() 53 | val div = Bool() 54 | val branch_link = Bool() 55 | val ifence = Bool() 56 | val dfence = Bool() 57 | val tlbfence = Bool() 58 | val mem_addr = UInt(DATA_ADDR_WID.W) 59 | val mem_wreg = Bool() 60 | val inst = UInt(INST_WID.W) 61 | } 62 | 63 | class MemRead extends Bundle { 64 | val mem_wreg = Bool() 65 | val reg_waddr = UInt(REG_ADDR_WID.W) 66 | } 67 | 68 | class SrcReadSignal extends Bundle { 69 | val ren = Bool() 70 | val raddr = UInt(REG_ADDR_WID.W) 71 | } 72 | 73 | class CacheCtrl extends Bundle { 74 | val iCache_stall = Output(Bool()) 75 | val dCache_stall = Output(Bool()) 76 | } 77 | 78 | class FetchUnitCtrl extends Bundle { 79 | val allow_to_go = Input(Bool()) 80 | val do_flush = Input(Bool()) 81 | } 82 | 83 | class InstFifoCtrl extends Bundle { 84 | val delay_slot_do_flush = Input(Bool()) 85 | 86 | val has2insts = Output(Bool()) 87 | } 88 | 89 | class DecoderUnitCtrl extends Bundle { 90 | val inst0 = Output(new Bundle { 91 | val src1 = new SrcReadSignal() 92 | val src2 = new SrcReadSignal() 93 | }) 94 | val branch = Output(Bool()) 95 | 96 | val allow_to_go = Input(Bool()) 97 | val do_flush = Input(Bool()) 98 | } 99 | 100 | class ExecuteFuCtrl extends Bundle { 101 | val allow_to_go = Input(Bool()) 102 | val do_flush = Input(Bool()) 103 | val eret = Input(Bool()) 104 | } 105 | 106 | class ExecuteCtrl(implicit val config: CpuConfig) extends Bundle { 107 | val inst = Output(Vec(config.fuNum, new MemRead())) 108 | val fu_stall = Output(Bool()) 109 | val branch = Output(Bool()) 110 | 111 | val allow_to_go = Input(Bool()) 112 | val do_flush = Input(Bool()) 113 | 114 | val fu = new ExecuteFuCtrl() 115 | } 116 | 117 | class MemoryCtrl extends Bundle { 118 | val flush_req = Output(Bool()) 119 | val eret = Output(Bool()) 120 | 121 | val allow_to_go = Input(Bool()) 122 | val do_flush = Input(Bool()) 123 | } 124 | 125 | class WriteBackCtrl extends Bundle { 126 | val allow_to_go = Input(Bool()) 127 | val do_flush = Input(Bool()) 128 | } 129 | 130 | class Tlb1InfoI extends Bundle { 131 | val invalid = Bool() 132 | val refill = Bool() 133 | } 134 | 135 | class Tlb1InfoD extends Tlb1InfoI { 136 | val modify = Bool() 137 | } 138 | 139 | class Tlb2Info extends Bundle { 140 | val vpn2 = Input(UInt(19.W)) 141 | val found = Output(Bool()) 142 | val entry = Output(new TlbEntry()) 143 | } 144 | 145 | class Tlb_ICache extends Bundle { 146 | val fill = Input(Bool()) 147 | val icache_is_save = Input(Bool()) 148 | val uncached = Output(Bool()) 149 | 150 | val translation_ok = Output(Bool()) 151 | val hit = Output(Bool()) 152 | val tag = Output(UInt(20.W)) 153 | val pa = Output(UInt(32.W)) 154 | } 155 | 156 | class Tlb_DCache extends Bundle { 157 | val fill = Input(Bool()) 158 | val dcache_is_idle = Input(Bool()) 159 | val dcache_is_save = Input(Bool()) 160 | val uncached = Output(Bool()) 161 | val tlb1_ok = Output(Bool()) 162 | 163 | val translation_ok = Output(Bool()) 164 | val hit = Output(Bool()) 165 | val tag = Output(UInt(20.W)) 166 | val pa = Output(UInt(32.W)) 167 | } 168 | 169 | // cpu to icache 170 | class Cache_ICache(implicit 171 | val config: CpuConfig, 172 | ) extends Bundle { 173 | // read inst request from cpu 174 | val req = Output(Bool()) 175 | val addr = Output(Vec(config.instFetchNum, UInt(32.W))) // virtual address and next virtual address 176 | 177 | // read inst result 178 | val inst = Input(Vec(config.instFetchNum, UInt(32.W))) 179 | val inst_valid = Input(Vec(config.instFetchNum, Bool())) 180 | 181 | // control 182 | val cpu_stall = Output(Bool()) 183 | val icache_stall = Input(Bool()) 184 | 185 | val tlb = new Tlb_ICache() 186 | 187 | val fence = Output(Bool()) 188 | val fence_addr = Output(UInt(32.W)) 189 | } 190 | 191 | // cpu to dcache 192 | class Cache_DCache extends Bundle { 193 | val cpu_stall = Output(Bool()) 194 | val dcache_stall = Input(Bool()) 195 | 196 | val execute_addr = Output(UInt(32.W)) 197 | // 连接 mem unit 198 | val rdata = Input(UInt(32.W)) 199 | val en = Output(Bool()) 200 | val wen = Output(UInt(4.W)) 201 | val rlen = Output(UInt(2.W)) 202 | val wdata = Output(UInt(32.W)) 203 | val addr = Output(UInt(32.W)) 204 | 205 | val tlb = new Tlb_DCache() 206 | 207 | val fence = Output(Bool()) 208 | val fence_addr = Output(UInt(32.W)) 209 | } 210 | 211 | // axi 212 | // master 213 | 214 | class AR extends Bundle { 215 | val addr = UInt(32.W) 216 | val len = UInt(8.W) 217 | val size = UInt(3.W) 218 | } 219 | 220 | class R extends Bundle { 221 | val data = UInt(32.W) 222 | val last = Bool() 223 | } 224 | 225 | class AW extends Bundle { 226 | val addr = UInt(32.W) 227 | val len = UInt(8.W) 228 | val size = UInt(3.W) 229 | } 230 | 231 | class W extends Bundle { 232 | val data = UInt(32.W) 233 | val strb = UInt(4.W) 234 | val last = Bool() 235 | } 236 | 237 | class ICache_AXIInterface extends Bundle { 238 | val ar = Decoupled(new AR()) 239 | val r = Flipped(Decoupled(new R())) 240 | } 241 | 242 | class DCache_AXIInterface extends ICache_AXIInterface { 243 | val aw = Decoupled(new AW()) 244 | 245 | val w = Decoupled(new W()) 246 | 247 | val b = Flipped(Decoupled()) 248 | } 249 | 250 | class Cache_AXIInterface extends Bundle { 251 | // axi read channel 252 | val icache = new ICache_AXIInterface() 253 | val dcache = new DCache_AXIInterface() 254 | } 255 | 256 | // AXI read address channel 257 | class AXI_AR extends Bundle { 258 | val id = UInt(4.W) // transaction ID 259 | val addr = UInt(32.W) // address 260 | val len = UInt(8.W) // burst length 261 | val size = UInt(3.W) // transfer size 262 | val burst = UInt(2.W) // burst type 263 | val lock = UInt(2.W) // lock type 264 | val cache = UInt(4.W) // cache type 265 | val prot = UInt(3.W) // protection type 266 | } 267 | 268 | // AXI read data channel 269 | class AXI_R extends Bundle { 270 | val id = UInt(4.W) // transaction ID 271 | val data = UInt(32.W) // read data 272 | val resp = UInt(2.W) // response type 273 | val last = Bool() // last beat of burst 274 | } 275 | 276 | // AXI write address channel 277 | class AXI_AW extends Bundle { 278 | val id = UInt(4.W) // transaction ID 279 | val addr = UInt(32.W) // address 280 | val len = UInt(8.W) // burst length 281 | val size = UInt(3.W) // transfer size 282 | val burst = UInt(2.W) // burst type 283 | val lock = UInt(2.W) // lock type 284 | val cache = UInt(4.W) // cache type 285 | val prot = UInt(3.W) // protection type 286 | } 287 | 288 | // AXI write data channel 289 | class AXI_W extends Bundle { 290 | val id = UInt(4.W) // transaction ID 291 | val data = UInt(32.W) // write data 292 | val strb = UInt(4.W) // byte enable 293 | val last = Bool() // last beat of burst 294 | } 295 | 296 | // AXI write response channel 297 | class AXI_B extends Bundle { 298 | val id = UInt(4.W) // transaction ID 299 | val resp = UInt(2.W) // response type 300 | } 301 | 302 | // AXI interface 303 | class AXI extends Bundle { 304 | val ar = Decoupled(new AXI_AR()) // read address channel 305 | val r = Flipped(Decoupled(new AXI_R())) // read data channel 306 | val aw = Decoupled(new AXI_AW()) // write address channel 307 | val w = Decoupled(new AXI_W()) // write data channel 308 | val b = Flipped(Decoupled(new AXI_B())) // write response channel 309 | } 310 | 311 | class DEBUG(implicit config: CpuConfig) extends Bundle { 312 | val wb_pc = Output(UInt(32.W)) 313 | val wb_rf_wen = Output(UInt(4.W)) 314 | val wb_rf_wnum = Output(UInt(5.W)) 315 | val wb_rf_wdata = Output(UInt(32.W)) 316 | } 317 | 318 | class Ctrl_Sram extends Bundle { 319 | val do_flush = Output(Bool()) 320 | } 321 | 322 | class Ctrl_Stage extends Bundle { 323 | val do_flush = Output(Bool()) 324 | val after_ex = Output(Bool()) 325 | } 326 | 327 | class Sram_Ctrl extends Bundle { 328 | val sram_discard = Output(UInt(2.W)) 329 | } 330 | 331 | class Pipeline_Ctrl extends Bundle { 332 | val ex = Output(Bool()) 333 | } 334 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/pipeline/decoder/DecoderUnit.scala: -------------------------------------------------------------------------------- 1 | package cpu.pipeline.decoder 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines._ 6 | import cpu.defines.Const._ 7 | import cpu.{CpuConfig, BranchPredictorConfig} 8 | import cpu.pipeline.execute.DecoderUnitExecuteUnit 9 | import cpu.pipeline.fetch.BufferUnit 10 | 11 | class InstFifoDecoderUnit(implicit val config: CpuConfig) extends Bundle { 12 | val allow_to_go = Output(Vec(config.decoderNum, Bool())) 13 | val inst = Input(Vec(config.decoderNum, new BufferUnit())) 14 | val info = Input(new Bundle { 15 | val inst0_is_in_delayslot = Bool() 16 | val empty = Bool() 17 | val almost_empty = Bool() 18 | }) 19 | 20 | val jump_branch_inst = Output(Bool()) 21 | } 22 | 23 | class DataForwardToDecoderUnit extends Bundle { 24 | val exe = new RegWrite() 25 | val mem_wreg = Bool() 26 | val mem = new RegWrite() 27 | } 28 | 29 | class Cp0DecoderUnit extends Bundle { 30 | val access_allowed = Bool() 31 | val kernel_mode = Bool() 32 | val intterupt_allowed = Bool() 33 | val cause_ip = UInt(8.W) 34 | val status_im = UInt(8.W) 35 | } 36 | 37 | class DecoderUnit(implicit val config: CpuConfig) extends Module { 38 | val io = IO(new Bundle { 39 | // 输入 40 | val instFifo = new InstFifoDecoderUnit() 41 | val regfile = Vec(config.decoderNum, new Src12Read()) 42 | val forward = Input(Vec(config.fuNum, new DataForwardToDecoderUnit())) 43 | val cp0 = Input(new Cp0DecoderUnit()) 44 | // 输出 45 | val fetchUnit = new Bundle { 46 | val branch = Output(Bool()) 47 | val target = Output(UInt(PC_WID.W)) 48 | } 49 | val bpu = new Bundle { 50 | val bpuConfig = new BranchPredictorConfig() 51 | val pc = Output(UInt(PC_WID.W)) 52 | val decoded_inst0 = Output(new InstInfo()) 53 | val id_allow_to_go = Output(Bool()) 54 | val pht_index = Output(UInt(bpuConfig.phtDepth.W)) 55 | 56 | val branch_inst = Input(Bool()) 57 | val pred_branch = Input(Bool()) 58 | val branch_target = Input(UInt(PC_WID.W)) 59 | val update_pht_index = Input(UInt(bpuConfig.phtDepth.W)) 60 | } 61 | val executeStage = Output(new DecoderUnitExecuteUnit()) 62 | val ctrl = new DecoderUnitCtrl() 63 | }) 64 | 65 | val issue = Module(new Issue()).io 66 | val decoder = Seq.fill(config.decoderNum)(Module(new Decoder())) 67 | val jumpCtrl = Module(new JumpCtrl()).io 68 | val forwardCtrl = Module(new ForwardCtrl()).io 69 | 70 | io.regfile(0).src1.raddr := decoder(0).io.out.reg1_raddr 71 | io.regfile(0).src2.raddr := decoder(0).io.out.reg2_raddr 72 | io.regfile(1).src1.raddr := decoder(1).io.out.reg1_raddr 73 | io.regfile(1).src2.raddr := decoder(1).io.out.reg2_raddr 74 | 75 | forwardCtrl.in.forward := io.forward 76 | forwardCtrl.in.regfile := io.regfile // TODO:这里的连接可能有问题 77 | 78 | issue.allow_to_go := io.ctrl.allow_to_go 79 | issue.instFifo := io.instFifo.info 80 | 81 | jumpCtrl.in.allow_to_go := io.ctrl.allow_to_go 82 | jumpCtrl.in.decoded_inst0 := decoder(0).io.out 83 | jumpCtrl.in.forward := io.forward 84 | jumpCtrl.in.pc := io.instFifo.inst(0).pc 85 | jumpCtrl.in.reg1_data := io.regfile(0).src1.rdata 86 | 87 | val jump_branch_inst0 = jumpCtrl.out.jump_inst || io.bpu.branch_inst 88 | val inst0_branch = jumpCtrl.out.jump || io.bpu.pred_branch 89 | 90 | io.fetchUnit.branch := inst0_branch 91 | io.fetchUnit.target := Mux(io.bpu.pred_branch, io.bpu.branch_target, jumpCtrl.out.jump_target) 92 | 93 | io.instFifo.allow_to_go(0) := io.ctrl.allow_to_go 94 | io.instFifo.allow_to_go(1) := issue.inst1.allow_to_go 95 | io.instFifo.jump_branch_inst := jump_branch_inst0 96 | 97 | io.bpu.id_allow_to_go := io.ctrl.allow_to_go 98 | io.bpu.pc := io.instFifo.inst(0).pc 99 | io.bpu.decoded_inst0 := decoder(0).io.out 100 | io.bpu.pht_index := io.instFifo.inst(0).pht_index 101 | 102 | io.ctrl.inst0.src1.ren := decoder(0).io.out.reg1_ren 103 | io.ctrl.inst0.src1.raddr := decoder(0).io.out.reg1_raddr 104 | io.ctrl.inst0.src2.ren := decoder(0).io.out.reg2_ren 105 | io.ctrl.inst0.src2.raddr := decoder(0).io.out.reg2_raddr 106 | io.ctrl.branch := inst0_branch 107 | 108 | val pc = io.instFifo.inst.map(_.pc) 109 | val inst = io.instFifo.inst.map(_.inst) 110 | val inst_info = decoder.map(_.io.out) 111 | val tlb_refill = io.instFifo.inst.map(_.tlb.refill) 112 | val tlb_invalid = io.instFifo.inst.map(_.tlb.invalid) 113 | val interrupt = io.cp0.intterupt_allowed && (io.cp0.cause_ip & io.cp0.status_im).orR() && !io.instFifo.info.empty 114 | 115 | for (i <- 0 until (config.decoderNum)) { 116 | decoder(i).io.in.inst := inst(i) 117 | issue.decodeInst(i) := inst_info(i) 118 | issue.execute(i).mem_wreg := io.forward(i).mem_wreg 119 | issue.execute(i).reg_waddr := io.forward(i).exe.waddr 120 | } 121 | 122 | io.executeStage.inst0.pc := pc(0) 123 | io.executeStage.inst0.inst_info := inst_info(0) 124 | io.executeStage.inst0.inst_info.reg_wen := MuxLookup( 125 | inst_info(0).op, 126 | inst_info(0).reg_wen, 127 | Seq( 128 | EXE_MOVN -> (io.executeStage.inst0.src_info.src2_data =/= 0.U), 129 | EXE_MOVZ -> (io.executeStage.inst0.src_info.src2_data === 0.U), 130 | ), 131 | ) 132 | io.executeStage.inst0.inst_info.mem_addr := 133 | io.executeStage.inst0.src_info.src1_data + Util.signedExtend(io.executeStage.inst0.inst_info.inst(15, 0)) 134 | io.executeStage.inst0.src_info.src1_data := Mux( 135 | inst_info(0).reg1_ren, 136 | forwardCtrl.out.inst(0).src1.rdata, 137 | decoder(0).io.out.imm32, 138 | ) 139 | io.executeStage.inst0.src_info.src2_data := Mux( 140 | inst_info(0).reg2_ren, 141 | forwardCtrl.out.inst(0).src2.rdata, 142 | decoder(0).io.out.imm32, 143 | ) 144 | io.executeStage.inst0.ex.flush_req := 145 | io.executeStage.inst0.ex.excode =/= EX_NO || 146 | io.executeStage.inst0.ex.tlb_refill || 147 | io.executeStage.inst0.ex.eret 148 | io.executeStage.inst0.ex.tlb_refill := tlb_refill(0) 149 | io.executeStage.inst0.ex.eret := inst_info(0).op === EXE_ERET 150 | io.executeStage.inst0.ex.badvaddr := pc(0) 151 | io.executeStage.inst0.ex.bd := io.instFifo.info.inst0_is_in_delayslot 152 | val inst0_ex_cpu = 153 | !io.cp0.access_allowed && VecInit(EXE_MFC0, EXE_MTC0, EXE_TLBR, EXE_TLBWI, EXE_TLBWR, EXE_TLBP, EXE_ERET, EXE_WAIT) 154 | .contains(inst_info(0).op) 155 | io.executeStage.inst0.ex.excode := MuxCase( 156 | EX_NO, 157 | Seq( 158 | interrupt -> EX_INT, 159 | (tlb_refill(0) || tlb_invalid(0)) -> EX_TLBL, 160 | (pc(0)(1, 0).orR() || (pc(0)(31) && !io.cp0.kernel_mode)) -> EX_ADEL, 161 | (inst_info(0).inst_valid === INST_INVALID) -> EX_RI, 162 | (inst_info(0).op === EXE_SYSCALL) -> EX_SYS, 163 | (inst_info(0).op === EXE_BREAK) -> EX_BP, 164 | (inst0_ex_cpu) -> EX_CPU, 165 | ), 166 | ) 167 | io.executeStage.inst0.jb_info.jump_regiser := jumpCtrl.out.jump_register 168 | io.executeStage.inst0.jb_info.branch_inst := io.bpu.branch_inst 169 | io.executeStage.inst0.jb_info.pred_branch := io.bpu.pred_branch 170 | io.executeStage.inst0.jb_info.branch_target := io.bpu.branch_target 171 | io.executeStage.inst0.jb_info.update_pht_index := io.bpu.update_pht_index 172 | 173 | io.executeStage.inst1.allow_to_go := issue.inst1.allow_to_go 174 | io.executeStage.inst1.pc := pc(1) 175 | io.executeStage.inst1.inst_info := inst_info(1) 176 | io.executeStage.inst1.inst_info.reg_wen := MuxLookup( 177 | inst_info(1).op, 178 | inst_info(1).reg_wen, 179 | Seq( 180 | EXE_MOVN -> (io.executeStage.inst1.src_info.src2_data =/= 0.U), 181 | EXE_MOVZ -> (io.executeStage.inst1.src_info.src2_data === 0.U), 182 | ), 183 | ) 184 | io.executeStage.inst1.inst_info.mem_addr := 185 | io.executeStage.inst1.src_info.src1_data + Util.signedExtend(io.executeStage.inst1.inst_info.inst(15, 0)) 186 | io.executeStage.inst1.src_info.src1_data := Mux( 187 | inst_info(1).reg1_ren, 188 | forwardCtrl.out.inst(1).src1.rdata, 189 | decoder(1).io.out.imm32, 190 | ) 191 | io.executeStage.inst1.src_info.src2_data := Mux( 192 | inst_info(1).reg2_ren, 193 | forwardCtrl.out.inst(1).src2.rdata, 194 | decoder(1).io.out.imm32, 195 | ) 196 | io.executeStage.inst1.ex.flush_req := io.executeStage.inst1.ex.excode =/= EX_NO || io.executeStage.inst1.ex.tlb_refill 197 | io.executeStage.inst1.ex.tlb_refill := tlb_refill(1) 198 | io.executeStage.inst1.ex.eret := inst_info(1).op === EXE_ERET 199 | io.executeStage.inst1.ex.badvaddr := pc(1) 200 | io.executeStage.inst1.ex.bd := issue.inst1.is_in_delayslot 201 | val inst1_ex_cpu = 202 | !io.cp0.access_allowed && VecInit(EXE_MFC0, EXE_MTC0, EXE_TLBR, EXE_TLBWI, EXE_TLBWR, EXE_TLBP, EXE_ERET, EXE_WAIT) 203 | .contains(inst_info(1).op) 204 | io.executeStage.inst1.ex.excode := MuxCase( 205 | EX_NO, 206 | Seq( 207 | (tlb_refill(1) || tlb_invalid(1)) -> EX_TLBL, 208 | (pc(1)(1, 0).orR() || (pc(1)(31) && !io.cp0.kernel_mode)) -> EX_ADEL, 209 | (inst_info(1).inst_valid === INST_INVALID) -> EX_RI, 210 | (inst_info(1).op === EXE_SYSCALL) -> EX_SYS, 211 | (inst_info(1).op === EXE_BREAK) -> EX_BP, 212 | (inst1_ex_cpu) -> EX_CPU, 213 | ), 214 | ) 215 | } 216 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/defines/Const.scala: -------------------------------------------------------------------------------- 1 | package cpu.defines 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import cpu.defines.Instructions 6 | import cpu.CpuConfig 7 | 8 | trait Constants { 9 | val config = new CpuConfig 10 | // 全局 11 | val PC_WID = 32 12 | val PC_INIT = "hbfc00000".U(PC_WID.W) 13 | 14 | val EXT_INT_WID = 6 15 | 16 | val WRITE_ENABLE = true.B 17 | val WRITE_DISABLE = false.B 18 | val READ_ENABLE = true.B 19 | val READ_DISABLE = false.B 20 | val INST_VALID = false.B 21 | val INST_INVALID = true.B 22 | val SINGLE_ISSUE = false.B 23 | val DUAL_ISSUE = true.B 24 | 25 | // AluOp 26 | private val OP_NUM = 77 27 | val OP_WID = log2Ceil(OP_NUM) 28 | // NOP 29 | val EXE_NOP = 0.U(OP_WID.W) 30 | // 位操作 31 | val EXE_AND = 1.U(OP_WID.W) 32 | val EXE_OR = 2.U(OP_WID.W) 33 | val EXE_XOR = 3.U(OP_WID.W) 34 | val EXE_NOR = 4.U(OP_WID.W) 35 | // 移位 36 | val EXE_SLL = 5.U(OP_WID.W) 37 | val EXE_SLLV = 6.U(OP_WID.W) 38 | val EXE_SRL = 7.U(OP_WID.W) 39 | val EXE_SRLV = 8.U(OP_WID.W) 40 | val EXE_SRA = 9.U(OP_WID.W) 41 | val EXE_SRAV = 10.U(OP_WID.W) 42 | // Move 43 | val EXE_MOVZ = 11.U(OP_WID.W) 44 | val EXE_MOVN = 12.U(OP_WID.W) 45 | // HILO 46 | val EXE_MFHI = 13.U(OP_WID.W) 47 | val EXE_MTHI = 14.U(OP_WID.W) 48 | val EXE_MFLO = 15.U(OP_WID.W) 49 | val EXE_MTLO = 16.U(OP_WID.W) 50 | // CP0 Move 51 | val EXE_MFC0 = 17.U(OP_WID.W) 52 | val EXE_MTC0 = 18.U(OP_WID.W) 53 | // 比较 54 | val EXE_SLT = 19.U(OP_WID.W) 55 | val EXE_SLTU = 20.U(OP_WID.W) 56 | // 算数 57 | val EXE_ADD = 21.U(OP_WID.W) 58 | val EXE_ADDU = 22.U(OP_WID.W) 59 | val EXE_SUB = 23.U(OP_WID.W) 60 | val EXE_SUBU = 24.U(OP_WID.W) 61 | val EXE_CLZ = 25.U(OP_WID.W) 62 | val EXE_CLO = 26.U(OP_WID.W) 63 | val EXE_MULT = 27.U(OP_WID.W) 64 | val EXE_MULTU = 28.U(OP_WID.W) 65 | val EXE_MUL = 29.U(OP_WID.W) 66 | val EXE_MADD = 30.U(OP_WID.W) 67 | val EXE_MADDU = 31.U(OP_WID.W) 68 | val EXE_MSUB = 32.U(OP_WID.W) 69 | val EXE_MSUBU = 33.U(OP_WID.W) 70 | val EXE_DIV = 34.U(OP_WID.W) 71 | val EXE_DIVU = 35.U(OP_WID.W) 72 | // 跳转 73 | val EXE_J = 36.U(OP_WID.W) 74 | val EXE_JAL = 37.U(OP_WID.W) 75 | val EXE_JALR = 38.U(OP_WID.W) 76 | val EXE_JR = 39.U(OP_WID.W) 77 | val EXE_BEQ = 40.U(OP_WID.W) 78 | val EXE_BGEZ = 41.U(OP_WID.W) 79 | val EXE_BGEZAL = 42.U(OP_WID.W) 80 | val EXE_BGTZ = 43.U(OP_WID.W) 81 | val EXE_BLEZ = 44.U(OP_WID.W) 82 | val EXE_BLTZ = 45.U(OP_WID.W) 83 | val EXE_BLTZAL = 46.U(OP_WID.W) 84 | val EXE_BNE = 47.U(OP_WID.W) 85 | // 访存 86 | val EXE_LB = 48.U(OP_WID.W) 87 | val EXE_LBU = 49.U(OP_WID.W) 88 | val EXE_LH = 50.U(OP_WID.W) 89 | val EXE_LHU = 51.U(OP_WID.W) 90 | val EXE_LL = 52.U(OP_WID.W) 91 | val EXE_LW = 53.U(OP_WID.W) 92 | val EXE_LWL = 54.U(OP_WID.W) 93 | val EXE_LWR = 55.U(OP_WID.W) 94 | val EXE_SB = 56.U(OP_WID.W) 95 | val EXE_SC = 57.U(OP_WID.W) 96 | val EXE_SH = 58.U(OP_WID.W) 97 | val EXE_SW = 59.U(OP_WID.W) 98 | val EXE_SWL = 60.U(OP_WID.W) 99 | val EXE_SWR = 61.U(OP_WID.W) 100 | // Trap 101 | val EXE_TEQ = 62.U(OP_WID.W) 102 | val EXE_TGE = 63.U(OP_WID.W) 103 | val EXE_TGEU = 64.U(OP_WID.W) 104 | val EXE_TLT = 65.U(OP_WID.W) 105 | val EXE_TLTU = 66.U(OP_WID.W) 106 | val EXE_TNE = 67.U(OP_WID.W) 107 | // 例外 108 | val EXE_SYSCALL = 68.U(OP_WID.W) 109 | val EXE_BREAK = 69.U(OP_WID.W) 110 | val EXE_ERET = 70.U(OP_WID.W) 111 | val EXE_WAIT = 71.U(OP_WID.W) 112 | // tlb 113 | val EXE_TLBP = 72.U(OP_WID.W) 114 | val EXE_TLBR = 73.U(OP_WID.W) 115 | val EXE_TLBWI = 74.U(OP_WID.W) 116 | val EXE_TLBWR = 75.U(OP_WID.W) 117 | // cache 118 | val EXE_CACHE = 76.U(OP_WID.W) 119 | 120 | // FUSel 121 | val FU_SEL_NUM = 8 122 | val FU_SEL_WID = log2Ceil(FU_SEL_NUM) 123 | 124 | val FU_ALU = 0.U(FU_SEL_WID.W) 125 | val FU_MEM = 1.U(FU_SEL_WID.W) 126 | val FU_BR = 2.U(FU_SEL_WID.W) 127 | val FU_EX = 3.U(FU_SEL_WID.W) 128 | val FU_MTHILO = 4.U(FU_SEL_WID.W) 129 | val FU_MFHILO = 5.U(FU_SEL_WID.W) 130 | val FU_MUL = 6.U(FU_SEL_WID.W) 131 | val FU_DIV = 7.U(FU_SEL_WID.W) 132 | 133 | // div 134 | val DIV_CTRL_WID = 2 135 | val DIV_FREE = 0.U(DIV_CTRL_WID.W) 136 | val DIV_BY_ZERO = 1.U(DIV_CTRL_WID.W) 137 | val DIV_ON = 2.U(DIV_CTRL_WID.W) 138 | val DIV_END = 3.U(DIV_CTRL_WID.W) 139 | val DIV_RESULT_READY = true.B 140 | val DIV_RESULT_NOT_READY = false.B 141 | val DIV_START = true.B 142 | val DIV_STOP = false.B 143 | 144 | // inst rom 145 | val INST_WID = 32 146 | 147 | // data ram 148 | val DATA_ADDR_WID = 32 149 | 150 | // GPR RegFile 151 | val AREG_NUM = 32 152 | val REG_ADDR_WID = 5 153 | val DATA_WID = 32 154 | val HILO_WID = 64 155 | 156 | // CP0寄存器 157 | // CP0 Register (5.w), Select (3.w) 158 | val CP0_INDEX_ADDR = "b00000_000".U(8.W) // 0,0 159 | val CP0_RANDOM_ADDR = "b00001_000".U(8.W) // 1,0 160 | val CP0_ENTRYLO0_ADDR = "b00010_000".U(8.W) // 2,0 161 | val CP0_ENTRYLO1_ADDR = "b00011_000".U(8.W) // 3,0 162 | val CP0_CONTEXT_ADDR = "b00100_000".U(8.W) // 4,0 163 | // val CP0_CONTEXT_CONFIG_ADDR = "b00100_001".U(8.W) // 4,1 164 | // val CP0_USER_LOCAL_ADDR = "b00100_010".U(8.W) // 4,2 165 | val CP0_PAGE_MASK_ADDR = "b00101_000".U(8.W) // 5,0 166 | // val CP0_PAGE_GRAIN_ADDR = "b00101_001".U(8.W) // 5,1 167 | val CP0_WIRED_ADDR = "b00110_000".U(8.W) // 6,0 168 | // val CP0_HWRENA_ADDR = "b00111_000".U(8.W) // 7,0 169 | val CP0_BADV_ADDR = "b01000_000".U(8.W) // 8,0 170 | val CP0_COUNT_ADDR = "b01001_000".U(8.W) // 9,0 (sel保留 6or7) 171 | val CP0_ENTRYHI_ADDR = "b01010_000".U(8.W) // 10,0 172 | val CP0_COMPARE_ADDR = "b01011_000".U(8.W) // 11,0 (sel保留 6or7) 173 | val CP0_STATUS_ADDR = "b01100_000".U(8.W) // 12,0 174 | // val CP0_INTCTL_ADDR = "b01100_001".U(8.W) // 12,1 175 | // val CP0_SRSCTL_ADDR = "b01100_010".U(8.W) // 12,2 176 | // val CP0_SRSMAP_ADDR = "b01100_011".U(8.W) // 12,3 177 | val CP0_CAUSE_ADDR = "b01101_000".U(8.W) // 13,0 178 | val CP0_EPC_ADDR = "b01110_000".U(8.W) // 14,0 179 | val CP0_PRID_ADDR = "b01111_000".U(8.W) // 15,0 180 | val CP0_EBASE_ADDR = "b01111_001".U(8.W) // 15,1 181 | // val CP0_CDMMBASE_ADDR = "b01111_010".U(8.W) // 15,2 182 | // val CP0_CMGCRBASE_ADDR = "b01111_011".U(8.W) // 15,3 183 | val CP0_CONFIG_ADDR = "b10000_000".U(8.W) // 16,0 184 | val CP0_CONFIG1_ADDR = "b10000_001".U(8.W) // 16,1 185 | // val CP0_CONFIG2_ADDR = "b10000_010".U(8.W) // 16,2 186 | // val CP0_CONFIG3_ADDR = "b10000_011".U(8.W) // 16,3 187 | // val CP0_CONFIG4_ADDR = "b10000_100".U(8.W) // 16,4 (sel保留 6or7) 188 | // val CP0_LOAD_LINKED_ADDR = "b10001_000".U(8.W) // 17,0 189 | val CP0_TAGLO_ADDR = "b11100_000".U(8.W) // 28,0 190 | val CP0_TAGHI_ADDR = "b11101_000".U(8.W) // 29,0 191 | val CP0_ERROR_EPC_ADDR = "b11110_000".U(8.W) // 30,0 192 | 193 | val CP0_ADDR_WID = 8 194 | 195 | val PTEBASE_WID = 9 196 | 197 | // 例外类型 198 | val EXCODE_WID = 5 199 | 200 | val EX_NO = 0.U(EXCODE_WID.W) // 无异常 201 | val EX_INT = 1.U(EXCODE_WID.W) // 中断异常 202 | val EX_MOD = 2.U(EXCODE_WID.W) // TLB 条目修改异常 203 | val EX_TLBL = 3.U(EXCODE_WID.W) // TLB 非法取指令或访问异常 204 | val EX_TLBS = 4.U(EXCODE_WID.W) // TLB 非法存储访问异常 205 | val EX_ADEL = 5.U(EXCODE_WID.W) // 地址未对齐异常(取指令或访问异常) 206 | val EX_ADES = 6.U(EXCODE_WID.W) // 地址未对齐异常(存储访问异常) 207 | val EX_SYS = 7.U(EXCODE_WID.W) // 系统调用异常 208 | val EX_BP = 8.U(EXCODE_WID.W) // 断点异常 209 | val EX_RI = 9.U(EXCODE_WID.W) // 保留指令异常 210 | val EX_CPU = 10.U(EXCODE_WID.W) // 协处理器不可用异常 211 | val EX_OV = 11.U(EXCODE_WID.W) // 算术溢出异常 212 | 213 | val EXC_INT = "h00".U(EXCODE_WID.W) // 中断异常 214 | val EXC_MOD = "h01".U(EXCODE_WID.W) // TLB 条目修改异常 215 | val EXC_TLBL = "h02".U(EXCODE_WID.W) // TLB 非法取指令或访问异常 216 | val EXC_TLBS = "h03".U(EXCODE_WID.W) // TLB 非法存储访问异常 217 | val EXC_ADEL = "h04".U(EXCODE_WID.W) // 地址未对齐异常(取指令或访问异常) 218 | val EXC_ADES = "h05".U(EXCODE_WID.W) // 地址未对齐异常(存储访问异常) 219 | val EXC_SYS = "h08".U(EXCODE_WID.W) // 系统调用异常 220 | val EXC_BP = "h09".U(EXCODE_WID.W) // 断点异常 221 | val EXC_RI = "h0a".U(EXCODE_WID.W) // 保留指令异常 222 | val EXC_CPU = "h0b".U(EXCODE_WID.W) // 协处理器不可用异常 223 | val EXC_OV = "h0c".U(EXCODE_WID.W) // 算术溢出异常 224 | val EXC_NO = "h1f".U(EXCODE_WID.W) // 无异常 225 | 226 | val EX_ENTRY = "h_bfc00380".U(32.W) 227 | val EX_TLB_REFILL_ENTRY = "h_bfc00200".U(32.W) 228 | 229 | // TLB MMU 230 | val TLB_NUM = if (config.build) 8 else 32 // for sys 32, other 8 231 | val PFN_WID = 20 232 | val C_WID = 3 233 | val ASID_WID = 8 234 | val VPN2_WID = 19 235 | } 236 | trait OptionConst { 237 | 238 | // 写寄存器目标 Write Register Address type 239 | val WRA_T1 = 0.U(2.W) // 取inst(15,11) 240 | val WRA_T2 = 1.U(2.W) // 取inst(20,16) 241 | val WRA_T3 = 2.U(2.W) // 取"b11111", 即31号寄存器 242 | val WRA_X = 0.U(2.W) // not care 243 | val AREG_31 = "b11111".U(5.W) 244 | 245 | // 立即数类型 246 | private val IL = 3 247 | val IMM_N = 0.U(IL.W) 248 | val IMM_LSE = 1.U(IL.W) // 立即数取inst(15,0)作为低16位,符号扩展,适用于ADDI,ADDIU,SLTI,和SLTIU 249 | val IMM_LZE = 2.U(IL.W) // 立即数取inst(15,0)作为低16位,零扩展,适用于位操作指令 250 | val IMM_HZE = 3.U(IL.W) // 立即数取inst(15,0)作为高16位,零扩展,适用于LUI (是否有必要?) 251 | val IMM_SHT = 4.U(IL.W) // 立即数取inst(10,6)作为低5位,不关心扩展,适用于SLL,SRL,SRA 252 | } 253 | 254 | object Const extends Constants with Instructions with OptionConst 255 | -------------------------------------------------------------------------------- /chisel/src/main/scala/cpu/cache/ICache.scala: -------------------------------------------------------------------------------- 1 | // * Cache 设计借鉴了nscscc2021 cqu的cdim * // 2 | package cache 3 | 4 | import chisel3._ 5 | import chisel3.util._ 6 | import memory._ 7 | import cpu.CacheConfig 8 | import cpu.defines._ 9 | import cpu.CpuConfig 10 | import cpu.defines.Const._ 11 | 12 | class ICache(cacheConfig: CacheConfig)(implicit config: CpuConfig) extends Module { 13 | val nway: Int = cacheConfig.nway 14 | val nset: Int = cacheConfig.nset 15 | val nbank: Int = cacheConfig.nbank 16 | val ninst: Int = cacheConfig.ninst // 取指令的数量 17 | val bankOffsetWidth: Int = cacheConfig.bankOffsetWidth 18 | val bankWidth: Int = cacheConfig.bankWidth 19 | val tagWidth: Int = cacheConfig.tagWidth 20 | val indexWidth: Int = cacheConfig.indexWidth 21 | val offsetWidth: Int = cacheConfig.offsetWidth 22 | val io = IO(new Bundle { 23 | val cpu = Flipped(new Cache_ICache()) 24 | val axi = new ICache_AXIInterface() 25 | val statistic = if (!config.build) Some(new ICacheStatistic()) else None 26 | }) 27 | require(isPow2(ninst), "ninst must be power of 2") 28 | // * addr organization * // 29 | // ====================================== 30 | // | tag | index |offset| 31 | // |31 12|11 6|5 0| 32 | // ====================================== 33 | // | offset | 34 | // | bank index | bank offset | 35 | // | 5 4 | 3 2 | 36 | // ============================ 37 | 38 | val tlb_fill = RegInit(false.B) 39 | // * fsm * // 40 | val s_idle :: s_uncached :: s_replace :: s_save :: Nil = Enum(4) 41 | val state = RegInit(s_idle) 42 | 43 | // * nway * nset * // 44 | // * 128 bit for 4 inst * // 45 | // ========================================================= 46 | // | valid | tag | bank 0 | bank 1 | bank 2 | bank 3 | 47 | // | 1 | 20 | 128 | 128 | 128 | 128 | 48 | // ========================================================= 49 | // | bank | 50 | // | inst 0 | inst 1 | inst 2 | inst 3 | 51 | // | 32 | 32 | 32 | 32 | 52 | // ===================================== 53 | val instperbank = bankWidth / 4 // 每个bank存储的指令数 54 | val valid = RegInit(VecInit(Seq.fill(nset * nbank)(VecInit(Seq.fill(instperbank)(false.B))))) 55 | 56 | val data = Wire(Vec(nway, Vec(instperbank, UInt(DATA_WID.W)))) 57 | val tag = RegInit(VecInit(Seq.fill(nway)(0.U(tagWidth.W)))) 58 | 59 | // * should choose next addr * // 60 | val should_next_addr = (state === s_idle && !tlb_fill) || (state === s_save) 61 | 62 | val data_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, bankOffsetWidth) 63 | val data_wstrb = RegInit(VecInit(Seq.fill(nway)(VecInit(Seq.fill(instperbank)(0.U(4.W)))))) 64 | 65 | val tag_raddr = io.cpu.addr(should_next_addr)(indexWidth + offsetWidth - 1, offsetWidth) 66 | val tag_wstrb = RegInit(VecInit(Seq.fill(nway)(false.B))) 67 | val tag_wdata = RegInit(0.U(tagWidth.W)) 68 | 69 | // * lru * // 70 | val lru = RegInit(VecInit(Seq.fill(nset * nbank)(false.B))) 71 | 72 | // * itlb * // 73 | when(tlb_fill) { tlb_fill := false.B } 74 | io.cpu.tlb.fill := tlb_fill 75 | io.cpu.tlb.icache_is_save := (state === s_save) 76 | 77 | // * fence * // 78 | val fence_index = io.cpu.fence_addr(indexWidth + offsetWidth - 1, offsetWidth) 79 | when(io.cpu.fence && !io.cpu.icache_stall && !io.cpu.cpu_stall) { 80 | valid(fence_index) := VecInit(Seq.fill(instperbank)(false.B)) 81 | } 82 | 83 | // * replace set * // 84 | val rset = RegInit(0.U(6.W)) 85 | 86 | // * virtual set * // 87 | val vset = io.cpu.addr(0)(indexWidth + offsetWidth - 1, offsetWidth) 88 | 89 | // * cache hit * // 90 | val tag_compare_valid = VecInit(Seq.tabulate(nway)(i => tag(i) === io.cpu.tlb.tag && valid(vset)(i))) 91 | val cache_hit = tag_compare_valid.contains(true.B) 92 | val cache_hit_available = cache_hit && io.cpu.tlb.translation_ok && !io.cpu.tlb.uncached 93 | val sel = tag_compare_valid(1) 94 | 95 | val bank_offset = io.cpu.addr(0)(log2Ceil(instperbank) + 1, 2) 96 | val inst = VecInit( 97 | Seq.tabulate(instperbank)(i => Mux(i.U <= (3.U - bank_offset), data(sel)(i.U + bank_offset), 0.U)), 98 | ) 99 | val inst_valid = VecInit(Seq.tabulate(instperbank)(i => cache_hit_available && i.U <= (3.U - bank_offset))) 100 | 101 | val saved = RegInit(VecInit(Seq.fill(instperbank)(0.U.asTypeOf(new Bundle { 102 | val inst = UInt(PC_WID.W) 103 | val valid = Bool() 104 | })))) 105 | 106 | val axi_cnt = Counter(cacheConfig.burstSize) 107 | 108 | // bank tag ram 109 | for { i <- 0 until nway; j <- 0 until instperbank } { 110 | val bank = Module(new SimpleDualPortRam(nset * nbank, DATA_WID, byteAddressable = true)) 111 | bank.io.ren := true.B 112 | bank.io.raddr := data_raddr 113 | data(i)(j) := bank.io.rdata 114 | 115 | bank.io.wen := data_wstrb(i)(j).orR 116 | bank.io.waddr := Cat(rset, axi_cnt.value(log2Ceil(cacheConfig.burstSize) - 1, log2Ceil(instperbank))) 117 | bank.io.wdata := Mux(j.U === axi_cnt.value(log2Ceil(instperbank) - 1, 0), io.axi.r.bits.data, 0.U) 118 | bank.io.wstrb := data_wstrb(i)(j) 119 | } 120 | 121 | for { i <- 0 until ninst } { 122 | io.cpu.inst_valid(i) := Mux(state === s_idle && !tlb_fill, inst_valid(i), saved(i).valid) && io.cpu.req 123 | io.cpu.inst(i) := Mux(state === s_idle && !tlb_fill, inst(i), saved(i).inst) 124 | } 125 | 126 | for { i <- 0 until nway } { 127 | val tag_bram = Module(new LUTRam(nset, tagWidth)) 128 | tag_bram.io.raddr := tag_raddr 129 | tag(i) := tag_bram.io.rdata 130 | 131 | tag_bram.io.wen := tag_wstrb(i) 132 | tag_bram.io.waddr := rset 133 | tag_bram.io.wdata := tag_wdata 134 | } 135 | 136 | io.cpu.icache_stall := Mux(state === s_idle && !tlb_fill, (!cache_hit_available && io.cpu.req), state =/= s_save) 137 | 138 | val ar = RegInit(0.U.asTypeOf(new AR())) 139 | val arvalid = RegInit(false.B) 140 | ar <> io.axi.ar.bits 141 | arvalid <> io.axi.ar.valid 142 | 143 | val r = RegInit(0.U.asTypeOf(new R())) 144 | val rready = RegInit(false.B) 145 | r <> io.axi.r.bits 146 | rready <> io.axi.r.ready 147 | 148 | when(tlb_fill === true.B) { 149 | tlb_fill := false.B 150 | } 151 | 152 | switch(state) { 153 | is(s_idle) { 154 | when(tlb_fill) { 155 | when(!io.cpu.tlb.hit) { 156 | state := s_save 157 | saved(0).inst := 0.U 158 | saved(0).valid := true.B 159 | } 160 | }.elsewhen(io.cpu.req) { 161 | when(!io.cpu.tlb.translation_ok) { 162 | tlb_fill := true.B 163 | }.elsewhen(io.cpu.tlb.uncached) { 164 | state := s_uncached 165 | ar.addr := io.cpu.tlb.pa 166 | ar.len := 0.U(log2Ceil((nbank * bankWidth) / 4).W) 167 | ar.size := 2.U(bankOffsetWidth.W) 168 | arvalid := true.B 169 | }.elsewhen(!cache_hit) { 170 | state := s_replace 171 | ar.addr := Cat(io.cpu.tlb.pa(31, 6), 0.U(6.W)) 172 | ar.len := 15.U(log2Ceil((nbank * bankWidth) / 4).W) 173 | ar.size := 2.U(bankOffsetWidth.W) 174 | arvalid := true.B 175 | 176 | rset := vset 177 | (0 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := Mux(i.U === 0.U, 0xf.U, 0x0.U)) 178 | tag_wstrb(lru(vset)) := true.B 179 | tag_wdata := io.cpu.tlb.tag 180 | valid(vset)(lru(vset)) := true.B 181 | axi_cnt.reset() 182 | }.elsewhen(!io.cpu.icache_stall) { 183 | lru(vset) := ~sel 184 | when(io.cpu.cpu_stall) { 185 | state := s_save 186 | (1 until instperbank).foreach(i => saved(i).inst := data(sel)(i)) 187 | (0 until instperbank).foreach(i => saved(i).valid := inst_valid(i)) 188 | } 189 | } 190 | } 191 | } 192 | is(s_uncached) { 193 | when(io.axi.ar.valid) { 194 | when(io.axi.ar.ready) { 195 | arvalid := false.B 196 | rready := true.B 197 | } 198 | }.elsewhen(io.axi.r.fire) { 199 | // * uncached not support burst transport * // 200 | state := s_save 201 | saved(0).inst := io.axi.r.bits.data 202 | saved(0).valid := true.B 203 | rready := false.B 204 | } 205 | } 206 | is(s_replace) { 207 | when(io.axi.ar.valid) { 208 | when(io.axi.ar.ready) { 209 | arvalid := false.B 210 | rready := true.B 211 | } 212 | }.elsewhen(io.axi.r.fire) { 213 | // * burst transport * // 214 | when(!io.axi.r.bits.last) { 215 | axi_cnt.inc() 216 | data_wstrb(lru(vset))(0) := data_wstrb(lru(vset))(instperbank - 1) 217 | (1 until instperbank).foreach(i => data_wstrb(lru(vset))(i) := data_wstrb(lru(vset))(i - 1)) 218 | }.otherwise { 219 | rready := false.B 220 | data_wstrb(lru(vset)) := 0.U.asTypeOf(Vec(instperbank, UInt(4.W))) 221 | tag_wstrb(lru(vset)) := false.B 222 | } 223 | }.elsewhen(!io.axi.r.ready) { 224 | state := s_idle 225 | } 226 | } 227 | is(s_save) { 228 | when(!io.cpu.cpu_stall && !io.cpu.icache_stall) { 229 | state := s_idle 230 | (0 until instperbank).foreach(i => saved(i).valid := false.B) 231 | } 232 | } 233 | } 234 | 235 | // ===----------------------------------------------------------------=== 236 | // statistic 237 | // ===----------------------------------------------------------------=== 238 | val req_cnt = RegInit(0.U(32.W)) 239 | when(io.cpu.req) { 240 | req_cnt := req_cnt + 1.U 241 | } 242 | val hit_cnt = RegInit(0.U(32.W)) 243 | when(io.cpu.req && cache_hit) { 244 | hit_cnt := hit_cnt + 1.U 245 | } 246 | if (!config.build) { 247 | io.statistic.get.request := req_cnt 248 | io.statistic.get.hit := hit_cnt 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /doc/src/main.typ: -------------------------------------------------------------------------------- 1 | #import "template.typ": * 2 | #import "@preview/tablex:0.0.4": tablex, rowspanx, colspanx, hlinex 3 | #set par(first-line-indent:2em) 4 | #show par: set block(spacing: 0.65em) 5 | #show link: underline 6 | #set heading(numbering: "1.1") 7 | #set text(font: "Fira Code", lang: "zh") 8 | #set page( 9 | margin: (top: 48pt, bottom: 48pt), 10 | numbering: "1", 11 | ) 12 | \ 13 | \ 14 | \ 15 | #align(center, text(50pt)[*NSCSCC2023*]) 16 | 17 | 18 | \ 19 | \ 20 | #align(center, text(30pt)[ 21 | *杭州电子科技大学 PUA-MIPS 队* 22 | ]) 23 | 24 | #align(center, text(30pt)[ 25 | *决赛设计报告* 26 | ]) 27 | 28 | 29 | \ 30 | \ 31 | \ 32 | \ 33 | \ 34 | #figure( 35 | image("pua-mips.png", width: 100%), 36 | 37 | ) 38 | 39 | \ 40 | \ 41 | \ 42 | #align(center, text(17pt)[ 43 | * 叶剑豪 奚力丰 胡致尧* 44 | ]) 45 | 46 | #pagebreak() 47 | #outline() 48 | #pagebreak() 49 | 50 | 51 | = 概述 52 | 我们基于Chisel语言设计并实现了一个MIPS32指令集架构的处理器:PUA-MIPS(Powerful Ultra Architecture MIPS)处理器。能够使用比赛方提供的大部分外设,并且成功通过了龙芯杯官方框架提供的功能测试、性能测试和系统测试。该处理器可以运行PMON引导程序,启动Linux操作系统。 53 | 54 | - 顺序双发射六级流水线处理器PUA-MIPS(Powerful Ultra Architecture MIPS)。 55 | - 处理器参数 56 | - IPC几何平均值 1.25 57 | - 极限频率 88M 58 | - 六级流水(2取指,1译码,1执行,1访存,1写回) 59 | - 几何平均双发率 59.42% 60 | - 分支预测准确度 94.48% 61 | - 处理器设计 62 | - 指令集:MIPS32 Release1的除BranchLikely和浮点指令以外的所有指令。 63 | - 流水线结构:乘法4级流水,除法8级流水,两级访存。非对称双发射。 64 | - 缓存 65 | - ICache为两路组相连8KB设计。命中率99.72% 66 | - DCache为两路组相连8KB设计。命中率95.00% 67 | - Soc 68 | - CPU:PUA-MIPS CPU(含Cache) 69 | - DRAM支持:使用板载 DDR3 SDRAM作为主存 70 | - 串口:实现串口控制器以调整波特率 71 | - 以太网:使用IP核构建以太网控制器 72 | - GPIO:使用confreg组件控制LED,数码管等组件 73 | - 图像输出:实现VGA传输协议 74 | - OS 75 | - 支持PMON引导程序 76 | - 支持最新Linux主线v6.5-rc3版本 77 | - 支持OS驱动Soc 78 | - Chisel语言 79 | - 参数化设计,支持流水线长度、取指宽度、访存宽度等参数的配置。 80 | - KISS设计理念加持,单个文件大小不超过200行。 81 | - 面向对象带来更高的自由度,同一种算法,四种不同实现随意组合。 82 | - 高性能模拟器带来更快速的设计空间探索。 83 | 84 | #pagebreak() 85 | = 处理器详细设计 86 | PUA-MIPS是顺序双发射结构设计,支持MIPS32 Release1扩展,处理器前端流水级包括取指单元、指令队列等单元,顺序取指。后端包括译码、分支预测、运算单元、寄存器堆、访存控制和写回等单元。顶端有一个控制单元,负责控制各个流水级之间的同步和冲突。缓存分为ICache、DCache等模块。 87 | 88 | == 前端 89 | #figure( 90 | image("front-end.png", width: 100%), 91 | caption: [ 92 | Frontend Design 93 | ], 94 | ) 95 | === 取指(Fetch Unit) 96 | 前端流水级包括取指单元、指令缓存等单元。取指单元(FetchUnit)用于产生下两条指令的PC并发送到指令缓存(Instruction Cache)。我们通过取指队列(Instruction Fifo)实现了前端(Frontend)和后端(Backend)的充分解耦,使得开发过程能够尽量保持独立。指令队列是一个深度为8的先进先出队列: 对于前端它负责存储从指令缓存发来的指令;对于后端它负责维护转移延迟槽解,与发射控制模块(Issue Control)一同管理指令的发射与保留。在PC发往指令缓存的同时也会并行访问分支预测器获取PHT的下标,并将PHT下标与指令一起存入取指队列中,这个操作将大大优化访问分支预测器的时延,使得我们的分支预测器容量可以尽可能增加。 97 | 98 | === 译码(Decoder Unit) 99 | 由于我们是双发射处理器,译码级每周期需要译码两条指令。它会对取指队列的首两条指令进行译码并通过发射控制模块决定是否发射指令。 100 | - 对于简单的Jump指令,我们进行直接跳转,倘若存在JR等与寄存器相关的跳转指令,若在译码级没有成功获得寄存器值,我们将把该跳转指令延迟到Execute级进行处理。 101 | - 对于Branch相关的指令,我们将PHT下标发往分支预测器,获取是否跳转的信息;当分支预测器选择跳转时,我们的Decoder级会发出跳转请求,这时将清空Decoder级前的流水线,而取值队列会帮助我们处理延迟槽相关的问题。 102 | - 在译码阶段还将使用发射控制(Issue Control)模块动态处理指令的双发,这部分信息会反馈给取指队列以控制取指队列的出队。 103 | - 寄存器堆的访问也在这一级进行。这一级所有待执行的指令会得到所有流水级的目的操作数相关信息,通过数据前递模块(Forward Control)处理数据前递。 104 | - 因为chisel会帮助我们自动删除无用的信号,所以我们在译码级尽可能产生后面所有可能用到的控制信号,并包装在inst info结构体中向后传递。 105 | 106 | == 后端 107 | #figure( 108 | image("back-end.png", width: 100%), 109 | caption: [ 110 | Backend Design 111 | ], 112 | ) 113 | 114 | === 执行(Execute Unit) 115 | 执行阶段中使用译码生成的信号对数据进行运算处理。 116 | - 对于普通的运算指令,我们使用两个ALU运算单元来实现双发射极限情况下的普通运算,对于特殊运算,我们使用不同的运算单元。 117 | - 针对乘除法指令,我们使用Xilinx ip核进行运算,其中乘法两周期,除法四周期。通过握手信号与执行单元进行通信,并在该级将运算内容写回HILO寄存器。 118 | - 针对分支指令,我们在该阶段处理寄存器相关的分支指令,同时也对分支预测器的结果进行修正,当结果不一致时将清空流水线,重新取指运行,并将判断的结果返回分支预测单元,对BHT和PHT的值进行更新。 119 | - 针对访存指令,由于访存需要两个周期,因此我们在执行级同时需要向数据缓存(Data Cache)发送访存信号。 120 | - 我们还需要进行TLB的第一级地址转换来减少TLB翻译带来的时延。 121 | - MTC0和MFC0指令将在这一级进行。 122 | - 在该级实例化了HILO寄存器,MTHI、MTLO、MFHI和MTLO指令将在这一级进行。 123 | - 在该级实例化了LLBit寄存器,用于处理LL和SC相关指令。 124 | 125 | === 访存(Memory Unit) 126 | - 对于Load指令,访存级接受数据缓存返回的数据。 127 | - 对于Store指令,访存级向数据缓存发出写请求。 128 | - 由于所有的例外都会在这一级得到结果,我们的CP0的访问也在这一级发生,倘若遇到例外或者碰到可能修改TLB的指令,我们都会清空流水线,保证执行的正确性。 129 | 130 | === 写回(Write Back Unit) 131 | - 写回阶段进行双发射写回,若遇到了读写冲突则进行前递操作。我们还实现了一个双进一出的提交队列,用于在Vivado上使用官方提供的差分测试。 132 | 133 | == 双发策略(Issue Control) 134 | - inst1不发射指令的情况 135 | + 对于例外相关指令(SYSCALL、ERET等)以及可能修改TLB的相关指令(MTC0、TLBWI等)都只会进行单发操作,在inst0中发射,inst1不发射任何指令,这样可以减少大量判断逻辑(如inst1发出清空流水线的请求,但inst0无异常,此时inst0需要保留,而保留一条指令刷去另一条指令的操作会增加许多不必要的逻辑判断而造成延迟)。 136 | - inst1可能发射指令的情况 137 | + 由于我们是顺序双发处理器,绝对不能进行乱序运行,所以当inst0不发射时,inst1肯定不发射。 138 | + 对CP0、HILO或寄存器堆的同一个操作数inst0发出写请求,而inst1为读请求,由于没有同一级内部指令间的数据前递,此时会产生RAW冲突,这时inst1不予发射。 139 | + 当执行级为访存指令,由于访存指令只有在访存级才会得到结果,这时我们也不予发射指令。因为最多只会停顿一拍,所以我们没实现访存相关的提前唤醒。 140 | + 乘除单元以及访存单元只有一个,当两条指令都需同时占用这类单元时,inst1延迟到下一拍发射。 141 | + 对于跳转指令,由于MIPS的延迟槽的存在:当inst0为跳转指令时,倘若inst1不能发射,将会被延迟到下一拍作为inst0发射;当inst1为跳转指令时,由于跳转指令会刷新流水线,我们也将该指令延迟到下一拍的inst0发射。 142 | 143 | == 分支预测(Branch Predictor Unit) 144 | 我们实现了多种分支预测器,包括局部历史分支预测、全局分支预测器以及卷积神经网络分支预测器(因为过拟合严重而被放弃)。在除了卷积神经网络的分支预测方法中,局部分支预测器取得的预测效果最好,因此我们最终使用了局部分支预测器。 145 | \ 146 | \ 147 | 148 | #align(center, table( 149 | columns: (auto, auto), 150 | inset: 10pt, 151 | align: horizon, 152 | [*策略*], [*几何平均准确度*], 153 | [卷积神经网络分支预测], $96.58%$, 154 | [局部历史分支预测], text(red)[$94.48%$], 155 | [全局分支历史预测], $75.86%$, 156 | )) 157 | 158 | \ 159 | 160 | 同时,我们对局部历史分支预测进行了设计空间探索。由于局部历史分支预测受到分支历史寄存器查找表(Branch History Register Table,BHT)和模式历史查找表(Pattern History Table,PHT)的深度的影响。因此我们对每一个分支历史寄存器表的深度和模式历史表的深度计算出分支预测的失败率如下图: 161 | #figure( 162 | image("bpu.png", width: 100%), 163 | caption: [ 164 | BPU miss rate 165 | ], 166 | ) 167 | \ 168 | 169 | 可以看到,在4-8的范围内优化BHT的深度更具性价比。最终出于频率的考虑,我们将BHT的深度设置为6,PHT的深度设置为4。 170 | 171 | == 缓存(Cache) 172 | #figure( 173 | image("cache.png", width: 100%), 174 | caption: [ 175 | Cache Design 176 | ], 177 | ) 178 | 在PUA-MIPS中,设计实现了指令缓存和数据缓存。 179 | - PLRU替换策略 180 | - VIPT 181 | - AXI突发传输 182 | - 十六字突发 183 | - 两级TLB平衡延迟 184 | - 两路8KB大缓存 185 | - 高度自由可配置项 186 | - 指令缓存 187 | - 二宽度取指(可通过参数快速配置) 188 | - 数据缓存 189 | - 写队列 190 | - victim Cache 191 | 192 | 我们通过性能计数器对Cache的命中率做了统计: 193 | 194 | #align(center, tablex( 195 | columns: 3, 196 | align: center + horizon, 197 | auto-vlines: false, 198 | auto-hlines: false, 199 | 200 | /* --- header --- */ 201 | hlinex(start: 0, end: 3), 202 | rowspanx(1)[*测试集*], colspanx(1)[*ICache hit*], rowspanx(1)[*DCache hit*], 203 | hlinex(start: 0, end: 3), 204 | /* -------------- */ 205 | [bitcount],$99.83%$, $96.91%$, 206 | [bubble sort], $99.97%$, $99.79%$, 207 | [coremark], $99.36%$, $95.59%$, 208 | [crc32], $99.98%$, $97.85%$, 209 | [dhrystone], $99.89%$, $81.88%$, 210 | [quick sort], $99.96%$, $97.85%$, 211 | [select sort], $99.98%$, $99.33%$, 212 | [sha], $99.94%$, $98.89%$, 213 | [stream copy], $99.65%$, $93.37%$, 214 | [stringsearch], $99.96%$, $90.52%$, 215 | [几何平均值], text(red)[$99.72%$], text(red)[$95.00%$], 216 | hlinex(start: 0, end: 3), 217 | )) 218 | 219 | === 指令缓存(Instruction Cache) 220 | 指令缓存提供了取指支持,大小和相连度可配置,我们使用状态机来控制缓存。 221 | - IDLE:Cache空闲状态。地址L1TLB查询或直接翻译。若L1TLB发生缺失则暂停,等待L2TLB并处理有可能遇到的异常。 222 | - UNCACHE:向AXI发送读请求或写请求(无突发传输),并等待AXI访存结束。 223 | - REPLACE:cache缺失后,若没有脏位,需要重新向AXI发送读请求(突发传输),访存并替换对应路的缓存行。其中替换策略采用PLRU。 224 | - SAVE:当Cache因为TLB缺失或Uncached导致Cache暂时无法正常访问数据时的保留状态。 225 | 226 | === 数据缓存(Data Cache) 227 | 数据缓存相比指令缓存的状态机多了一级。 228 | - WRITEBACK:当需要清除或替换缓存行时,若存在脏位,则需要将脏数据写回,即向AXI发送写请求(突发传输)。 229 | ==== Victim Cache 230 | 我们通过统计访存指令发现测试集中经常出现一个程序频繁地使用3个数据,而且恰好处在同一个缓存行中,这导致一个way中的数据经常被踢出数据缓存后又马上读入,发生了“抖动”。Victim Cache可以将被踢出数据缓存的数据暂时保存起来,这一操作极大的减少了数据缓存的缺失率。 231 | ==== 写队列(Write Fifo) 232 | 在我们的设计中,对于Uncached的AXI写请求,我们设置了写请求队列,将AXI写请求数据缓存下来,再逐个写回,该期间如果没有Cached的访问AXI请求,则流水线不会因为写回数据而阻塞,提高了流水线的执行效率。 233 | 234 | == CP0与例外 235 | 我们实现了启动Linux所必需的所有CP0寄存器和例外。 236 | 237 | #align(center, tablex( 238 | columns: 3, 239 | align: center + horizon, 240 | auto-vlines: false, 241 | auto-hlines: false, 242 | column-gutter:1.5em, 243 | 244 | /* --- header --- */ 245 | hlinex(start: 0, end: 3), 246 | rowspanx(1)[*CP0编号*], colspanx(1)[*CP0名称*], rowspanx(1)[*CP0描述*], 247 | hlinex(start: 0, end: 3), 248 | /* -------------- */ 249 | [0], [Index ], [TLB 数组的索引], 250 | [1], [Random ], [随机数], 251 | [2], [EntryLo0], [TLB 项的低位], 252 | [3], [EntryLo1], [TLB 项的低位], 253 | [4], [Context ], [指向内存中页表入口的指针], 254 | [5], [PageMask], [控制 TLB 的虚拟页大小], 255 | [6], [Wired ], [控制 TLB 中固定的页数], 256 | [8], [BadVAddr], [记录最新地址相关例外的出错地址], 257 | [9], [Count ], [处理器内部计数器], 258 | [10], [EntryHi ], [TLB 项的高位], 259 | [11], [Compare ], [计时中断控制器], 260 | [12], [Status ], [处理器状态与控制寄存器], 261 | [13], [Cause ], [存放上一次例外原因], 262 | [14], [EPC ], [存放上一次发生例外指令的 PC], 263 | [15], [PRId ], [处理器版本和标识符], 264 | [15], [EBase ], [中断向量基地址寄存器], 265 | [16], [Config0 ], [处理器配置], 266 | [28], [TagLo ], [缓存标签 Tag 的低位], 267 | [29], [TagHi ], [缓存标签 Tag 的高位], 268 | [30], [ErrorEPC], [上一次发生例外的计数器数值], 269 | hlinex(start: 0, end: 3), 270 | )) 271 | 272 | #align(center, tablex( 273 | columns: 3, 274 | align: center + horizon, 275 | auto-vlines: false, 276 | auto-hlines: false, 277 | column-gutter:1em, 278 | 279 | /* --- header --- */ 280 | hlinex(start: 0, end: 3), 281 | colspanx(1)[*例外编号*],rowspanx(1)[*例外名称*], rowspanx(1)[*例外描述*], 282 | hlinex(start: 0, end: 3), 283 | /* -------------- */ 284 | [00], [INT], [中断异常], 285 | [01], [MOD], [TLB 条目修改异常], 286 | [02], [TLBL], [TLB 非法取指令或访问异常], 287 | [03], [TLBS], [TLB 非法存储访问异常], 288 | [04], [ADEL], [地址未对齐异常(取指令或访问异常)], 289 | [05], [ADES], [地址未对齐异常(存储访问异常)], 290 | [08], [SYS], [系统调用异常], 291 | [09], [BP], [断点异常], 292 | [0a], [RI], [保留指令异常], 293 | [0b], [CPU], [协处理器不可用异常], 294 | [0c], [OV], [算术溢出异常], 295 | [1f], [NO], [无异常], 296 | 297 | hlinex(start: 0, end: 3), 298 | 299 | )) 300 | 301 | #pagebreak() 302 | == 内存管理 303 | CPU使用内存管理单元(MMU)以及相应的地址映射关系来进行虚地址到实地址的转换。针对TLB的设计,我们参考了#link("https://github.com/Maxpicca-Li/CDIM")[CDIM]和#link("https://github.com/gilgamsh/GenshinCPU")[GenshinCPU]的二级TLB设计。这一设计使得我们在使用8项二级TLB的情况下仍能保持较高频率。 304 | 305 | = 系统外设 306 | == 外设 307 | 外设方面,我们修改了官方提供的Soc Up框架,构建了我们的Soc。这套SOC框架通过AXI总线与处理器核进行交互。外设包括: 308 | - DRAM支持:使用板载 DDR3 SDRAM作为主存, DDR3分配的虚拟地址段为外设剩余的虚拟地址。大小为128MB。 309 | - UART串口:实现串口控制器以调整波特率 310 | - Ethernet以太网:使用IP核构建以太网控制器 311 | - GPIO:使用confreg组件控制LED,数码管等组件 312 | - 图像输出:实现静态图片传输 313 | 314 | 地址映射如下: 315 | 316 | #align(center, tablex( 317 | columns: 4, 318 | align: center + horizon, 319 | auto-vlines: false, 320 | auto-hlines: false, 321 | column-gutter:1em, 322 | 323 | /* --- header --- */ 324 | hlinex(start: 0, end: 4), 325 | colspanx(1)[*设备*],rowspanx(1)[*起始地址*], rowspanx(1)[*终止地址*], rowspanx(1)[*大小*], 326 | hlinex(start: 0, end: 4), 327 | /* -------------- */ 328 | [Flash], [0xBFC00000],[0xBFC0FFFF],[1MB], 329 | [GPIO], [0xBFD00000],[0xBFD0FFFF],[64KB], 330 | [UART], [0xBFE40000],[0xBFE43FFF],[16KB], 331 | [Flash Controller], [0xBFE40000],[0xBFE4FFFF],[64KB], 332 | [Nand Flash], [0xBFE78000],[0xBFE7BFFF],[16KB], 333 | [MAC], [0xBFF00000],[0xBFF0FFFF],[64KB], 334 | 335 | hlinex(start: 0, end: 4), 336 | )) 337 | 338 | #pagebreak() 339 | == 系统 340 | 当前PUA MIPS可以启动uboot,ucore和最新Linux 6.5.0-rc3。Linux中去除了Branch-likely指令。我们通过串口与Linux进行交互。 341 | #figure( 342 | image("uboot.jpg", width: 90%), 343 | caption: [ 344 | U-BOOT 345 | ], 346 | ) 347 | 348 | #figure( 349 | image("linux.jpg", width: 90%), 350 | caption: [ 351 | LINUX 352 | ], 353 | ) 354 | 355 | #pagebreak() 356 | = 参考资料 357 | - NSCSCC往届处理器设计 358 | - #link("https://github.com/trivialmips/nontrivial-mips")[nontrivial-mips] 给了我们如何启动Linux操作系统的良好示范。 359 | - #link("https://github.com/trivialmips/TrivialMIPS")[TrivialMIPS] 中有许多顺序处理器设计的技巧。 360 | - #link("https://github.com/amadeus-mips")[amadeus-mips] 帮助我们更好的使用Chisel语言。 361 | - #link("https://github.com/SocialistDalao/UltraMIPS_NSCSCC")[UltraMIPS] 给了我们Cache设计的良好示范。 362 | - #link("https://github.com/Maxpicca-Li/CDIM")[CDIM] 为我们提供了高效的测试框架。 363 | - #link("https://github.com/gilgamsh/GenshinCPU")[GenshinCPU] 有许多频率优化的设计方法。 364 | - #link("https://github.com/zencove-thu/zencove-zoom")[zenCove] 在高级语言的使用上启发了我们。 365 | - 开源处理器 366 | - #link("https://github.com/OpenXiangShan/XiangShan")[香山处理器] 给了我们很多架构设计上的启发。 367 | - #link("https://github.com/OSCPU/NutShell")[果壳处理器] 一个优秀处理器的示范。 368 | - #link("https://github.com/jlpteaching/dinocpu")[dinocpu] 一个以Chisel语言为基础开设的处理器设计课程,使用基于Scala的交互式调试框架给了我们很多帮助。 369 | - 书籍 370 | - #link("https://book.douban.com/subject/35088440/")[计算机组成与设计:硬件/软件接口] 371 | - #link("https://book.douban.com/subject/36108789/")[计算机体系结构:量化研究方法(第6版)] 372 | - #link("https://book.douban.com/subject/35414112/")[CPU设计实战] 373 | - #link("https://book.douban.com/subject/26293546/")[超标量处理器设计] 374 | - #link("https://book.douban.com/subject/25960657/")[自己动手写CPU] 375 | 376 | --------------------------------------------------------------------------------