├── .gitignore ├── .vscode └── settings.json ├── README.md ├── doc ├── 16337269_颜彬_16337237_王永锋.rar ├── Component │ ├── ALUs.md │ ├── CommonDataBus.md │ ├── InstructionQueue.md │ ├── Memory.md │ ├── RegisterFile.md │ └── ReservationStation.md ├── InstructionSet.md ├── ShiningPoint.md ├── TODO.md ├── TestCaseKnownBug.md ├── pic │ ├── overview.png │ └── 通路图.vsdx ├── pre.md ├── tex │ ├── .vscode │ │ └── settings.json │ ├── document.bib │ ├── document.pdf │ ├── document.tex │ ├── figure │ │ ├── CDB.png │ │ ├── dataPathDiagram.png │ │ ├── flawDiagram.png │ │ ├── mulALU.jpg │ │ └── simulationiDiagram.png │ └── temp.pdf └── thoughts.md ├── rom ├── rom.mem ├── testcase1.md ├── testcase2.md ├── testcase2.mem ├── testcase3.md ├── testcase3.mem ├── testcase4.md ├── testcase4.mem ├── testcase5.md ├── testcase5.mem ├── testcase6.md └── testcase6.mem ├── source ├── CDB.v ├── CU.v ├── Memory.v ├── PC.v ├── Queue.v ├── RAM.v ├── ROM.v ├── RegFile.v ├── ReservationStation.v ├── decoder.v ├── dfALU.v ├── head.v ├── mfALU.v ├── mux4to1_4.v ├── pmfALU.v └── top.v └── test ├── Memory_tb.v ├── Queue_tb.v ├── ReservationStation_tb.v ├── mdfALU_tb.v ├── register_tb.v ├── tomasulo_tb.v └── top_tb.v /.gitignore: -------------------------------------------------------------------------------- 1 | *ROM.v 2 | Skip to content 3 | This repository 4 | Search 5 | Pull requests 6 | Issues 7 | Marketplace 8 | Explore 9 | @WalkerYF 10 | Sign out 11 | Watch 2,344 12 | Star 59,450 Fork 26,557 github/gitignore 13 | Code Pull requests 153 Projects 0 Insights 14 | Branch: master Find file Copy pathgitignore/TeX.gitignore 15 | 825714c on 29 Nov 16 | @thiminhnhut thiminhnhut Add *.sta (standalone packages) for TeX (#2484) 17 | 40 contributors @maieul @shiftkey @koppor @arcresu @habi @lszeremeta @tjgrilley @seungwonpark @dopefishh @Konfekt @153957 @wojciechwasko @thomwiggers @thiminhnhut @thilaire @rogierslag @ptigwe @orzechow @vkusvody @masgo @lucasgautheron @lighght @izuzak @Gurmeet-Singh @JelteF @flashspys and others 18 | RawBlameHistory 19 | 226 lines (177 sloc) 2.18 KB 20 | ## Core latex/pdflatex auxiliary files: 21 | *.aux 22 | *.lof 23 | *.log 24 | *.lot 25 | *.fls 26 | *.out 27 | *.toc 28 | *.fmt 29 | *.fot 30 | *.cb 31 | *.cb2 32 | 33 | ## Intermediate documents: 34 | *.dvi 35 | *.xdv 36 | *-converted-to.* 37 | # these rules might exclude image files for figures etc. 38 | *.ps 39 | *.eps 40 | *.pdf 41 | 42 | ## Generated if empty string is given at "Please type another file name for output:" 43 | .pdf 44 | 45 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 46 | *.bbl 47 | *.bcf 48 | *.blg 49 | *-blx.aux 50 | *-blx.bib 51 | *.run.xml 52 | 53 | ## Build tool auxiliary files: 54 | *.fdb_latexmk 55 | *.synctex 56 | *.synctex(busy) 57 | *.synctex.gz 58 | *.synctex.gz(busy) 59 | *.pdfsync 60 | 61 | ## Auxiliary and intermediate files from other packages: 62 | # algorithms 63 | *.alg 64 | *.loa 65 | 66 | # achemso 67 | acs-*.bib 68 | 69 | # amsthm 70 | *.thm 71 | 72 | # beamer 73 | *.nav 74 | *.pre 75 | *.snm 76 | *.vrb 77 | 78 | # changes 79 | *.soc 80 | 81 | # cprotect 82 | *.cpt 83 | 84 | # elsarticle (documentclass of Elsevier journals) 85 | *.spl 86 | 87 | # endnotes 88 | *.ent 89 | 90 | # fixme 91 | *.lox 92 | 93 | # feynmf/feynmp 94 | *.mf 95 | *.mp 96 | *.t[1-9] 97 | *.t[1-9][0-9] 98 | *.tfm 99 | 100 | #(r)(e)ledmac/(r)(e)ledpar 101 | *.end 102 | *.?end 103 | *.[1-9] 104 | *.[1-9][0-9] 105 | *.[1-9][0-9][0-9] 106 | *.[1-9]R 107 | *.[1-9][0-9]R 108 | *.[1-9][0-9][0-9]R 109 | *.eledsec[1-9] 110 | *.eledsec[1-9]R 111 | *.eledsec[1-9][0-9] 112 | *.eledsec[1-9][0-9]R 113 | *.eledsec[1-9][0-9][0-9] 114 | *.eledsec[1-9][0-9][0-9]R 115 | 116 | # glossaries 117 | *.acn 118 | *.acr 119 | *.glg 120 | *.glo 121 | *.gls 122 | *.glsdefs 123 | 124 | # gnuplottex 125 | *-gnuplottex-* 126 | 127 | # gregoriotex 128 | *.gaux 129 | *.gtex 130 | 131 | # hyperref 132 | *.brf 133 | 134 | # knitr 135 | *-concordance.tex 136 | # TODO Comment the next line if you want to keep your tikz graphics files 137 | *.tikz 138 | *-tikzDictionary 139 | 140 | # listings 141 | *.lol 142 | 143 | # makeidx 144 | *.idx 145 | *.ilg 146 | *.ind 147 | *.ist 148 | 149 | # minitoc 150 | *.maf 151 | *.mlf 152 | *.mlt 153 | *.mtc[0-9]* 154 | *.slf[0-9]* 155 | *.slt[0-9]* 156 | *.stc[0-9]* 157 | 158 | # minted 159 | _minted* 160 | *.pyg 161 | 162 | # morewrites 163 | *.mw 164 | 165 | # nomencl 166 | *.nlo 167 | 168 | # pax 169 | *.pax 170 | 171 | # pdfpcnotes 172 | *.pdfpc 173 | 174 | # sagetex 175 | *.sagetex.sage 176 | *.sagetex.py 177 | *.sagetex.scmd 178 | 179 | # scrwfile 180 | *.wrt 181 | 182 | # sympy 183 | *.sout 184 | *.sympy 185 | sympy-plots-for-*.tex/ 186 | 187 | # pdfcomment 188 | *.upa 189 | *.upb 190 | 191 | # pythontex 192 | *.pytxcode 193 | pythontex-files-*/ 194 | 195 | # thmtools 196 | *.loe 197 | 198 | # TikZ & PGF 199 | *.dpth 200 | *.md5 201 | *.auxlock 202 | 203 | # todonotes 204 | *.tdo 205 | 206 | # easy-todo 207 | *.lod 208 | 209 | # xindy 210 | *.xdy 211 | 212 | # xypic precompiled matrices 213 | *.xyc 214 | 215 | # endfloat 216 | *.ttt 217 | *.fff 218 | 219 | # Latexian 220 | TSWLatexianTemp* 221 | 222 | ## Editors: 223 | # WinEdt 224 | *.bak 225 | *.sav 226 | 227 | # Texpad 228 | .texpadtmp 229 | 230 | # Kile 231 | *.backup 232 | 233 | # KBibTeX 234 | *~[0-9]* 235 | 236 | # auto folder when using emacs and auctex 237 | ./auto/* 238 | *.el 239 | 240 | # expex forward references with \gathertags 241 | *-tags.tex 242 | 243 | # standalone packages 244 | *.sta 245 | © 2017 GitHub, Inc. 246 | Terms 247 | Privacy 248 | Security 249 | Status 250 | Help 251 | Contact GitHub 252 | API 253 | Training 254 | Shop 255 | Blog 256 | About 257 | source/ROM.v 258 | doc/PrePreparation.md 259 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "latex-workshop.chktex.enabled": true, 3 | "todohighlight.isEnable": true, 4 | "latex-workshop.latex.toolchain": [ 5 | { 6 | "command": "xelatex", 7 | "args": [ 8 | "-synctex=1", 9 | "-shell-escape", 10 | "%DOC%" 11 | ] 12 | }, { 13 | "command": "bibtex", 14 | "args": [ 15 | "%DOCFILE%" 16 | ] 17 | }, { 18 | "command": "xelatex", 19 | "args": [ 20 | "-synctex=1", 21 | "-shell-escape", 22 | "%DOC%" 23 | ] 24 | }, { 25 | "command": "xelatex", 26 | "args": [ 27 | "-synctex=1", 28 | "-shell-escape", 29 | "%DOC%" 30 | ] 31 | } 32 | ], 33 | "todohighlight.include": [ 34 | "**/*.js", 35 | "**/*.jsx", 36 | "**/*.ts", 37 | "**/*.tsx", 38 | "**/*.html", 39 | "**/*.php", 40 | "**/*.css", 41 | "**/*.scss", 42 | "**/*.tex", 43 | "**/*.cpp" 44 | ] 45 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tomasulo 2 | [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/dwyl/esta/issues) 3 | 4 | End-term project for Computer Organisation Principle Course. 5 | An efficient pipeline CPU based on tomasulo algorithm, implemented in `verilog` 6 | 7 | ## Installation 8 | ``` bash 9 | $ git clone git@github.com:YanB25/Tomasulo.git 10 | ``` 11 | ## Usage 12 | Manually add all the files in `scource/` into `vivado` and just run. 13 | Add files in `test/`, set one of them *as top* to try the testcases. 14 | ## Algorithm Introduction 15 | From [wiki](wiki) 16 | > Tomasulo’s algorithm is a computer architecture hardware algorithm for dynamic scheduling of instructions that allows out-of-order execution and enables more efficient use of multiple execution units. It was developed by Robert Tomasulo at IBM in 1967. 17 | 18 | ### Description 19 | Tomasulo algorithm is an out-of-order execution algorithm for pipeline CPU, which dynamically rearanges the order of instructions to minimize the idle time of execution units such as ALU and RAM. 20 | The major innovations are below 21 | 22 | - Register renaming in hardware 23 | - Reservation stations for all excution units 24 | - Common Data Bus to broadcast signals asychronously 25 | 26 | The Algorithm is a superior virsion for parellel compared to the use of `scoreboarding` or other earlier algorithms. 27 | 28 | ### The Whole Picture 29 | 30 | 31 | ### Terminology 32 | 1. 块 33 | 存储信息的单位。若干有关联的数据放在一起称为块。例如op和func和rs,rd,rt等存储在一起,称为一个块。 34 | 1. 标志位 35 | 用于标志“是”或“否”的位。 36 | 1. 行 37 | 一行包括一个块和对应的标志位 38 | ## Instruction Set 39 | 支持除分支指令外的大部分常用MIPS指令。 40 | [指令集编码][is] 41 | 想要[支持更多指令][todo]? 42 | ## Component 43 | ### PC & ROM 44 | 没有从图中画出来。向指令队列发射指令(当指令队列非满时。) 45 | ### Instruction Queue 46 | 指令队列。由于Tomasulo算法顺序发射指令,故由指令队列保证其发射的顺序性。当一个指令(例如`add`)对应的器件(`ALU`)不忙时,指令发射;否则发生结构冲突,需要阻塞等待。 47 | [more detail][iq] 48 | ### Commom Data Bus 49 | CDB.所有刚执行完得到的数据的广播都要通过该总线完成。 50 | 每个执行器件(如各个ALU),向CDB Helper发送`require`信号请求广播。 51 | CDB保证其广播信号在一个周期内不发生更改。 52 | [more detail][cdb] 53 | ### CDB Queue 54 | **deprecated.** 55 | 不使用CDB队列。 56 | 见CDB Helper 57 | 58 | ### Register File 59 | 寄存器文件。时序电路。 60 | 当时钟下降沿到达后: 61 | 检查CDB的广播,若该广播的数据被监听,则将数据更新入寄存器文件中。 62 | 检查当前指令的`rd`,记录`rd`所等待的label。 63 | [Details here][rf] 64 | ### Reservation Station 65 | 保留站。包括加减ALU保留站和乘除的保留站。 66 | 67 | 每个CPU周期,一条算逻运算指令将发射到对应ALU保留站处。 68 | 该指令要么所有的操作数都已准备好(立即可以被执行,label==0),或部分操作数由`label`(label != 0)代替,正等待`CDB`的广播。 69 | [more detail][rs] 70 | ### ALU(FPU) 71 | 算术逻辑单或和浮点运算单元。 72 | 不同的运算需要不同的CPU周期完成。由于该种延迟,基于[保留站][rs]的乱序执行可以为其大大提速。 73 | [more detail][alu] 74 | 75 | ## Shining Points 76 | 传统Tomasulo教材资料只给出了算法的软件模拟实现或伪代码实现。具体的硬件实现会遇到许多瓶颈。本项目对其中的一些难点做了突破,体现了一些创新性。 77 | [more detail][sp] 78 | 79 | ## Testcase & Known Bugs 80 | all testcase(s) have been passed. 81 | welcome to pull request to contribute more testcases. 82 | for testcases information or known bugs, 83 | [check here][tckb] 84 | 85 | ## Bugs & Helps 86 | To report a bug or get help, you can [Issues page][issue]. 87 | 88 | ## Contribute 89 | To offer codes, please contact us in the [Issues page][issue]. 90 | You can refer to [TODO-List][todo] find out what the project still need. 91 | 92 | 93 | [rs]:doc/Component/ReservationStation.md 94 | [is]:doc/InstructionSet.md 95 | [iq]:doc/Component/InstructionQueue.md 96 | [cdb]:doc/Component/CommonDataBus.md 97 | [rf]:doc/Component/RegisterFile.md 98 | [alu]:doc/Component/ALUs.md 99 | [wiki]:https://en.wikipedia.org/wiki/Tomasulo_algorithm 100 | [issue]:https://github.com/YanB25/Tomasulo/issues 101 | [todo]:doc/TODO.md 102 | [sp]:doc/ShiningPoint.md 103 | [tckb]:doc/TestCaseKnownBug.md -------------------------------------------------------------------------------- /doc/16337269_颜彬_16337237_王永锋.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/16337269_颜彬_16337237_王永锋.rar -------------------------------------------------------------------------------- /doc/Component/ALUs.md: -------------------------------------------------------------------------------- 1 | # ALUs 2 | Tomasulo算法利用动态调度的方法,充分发挥出多个ALU的效率。 3 | 本项目现实现了`定点数加减ALU`,`定点数乘法ALU`,和`定点数除法ALU`。 4 | 其中各个运算指令所消耗的CPU周期如下。 5 | |Instruction|Cycle(s)| 6 | |:-:|:-:| 7 | |add|1| 8 | |sub|2| 9 | |and|1| 10 | |or|1| 11 | |sll|1| 12 | |slt|1| 13 | |multiplication|5| 14 | |division|32| 15 | Tomasulo算法将动态调度指令执行顺序、避免读写冲突,尽可能地减少ALU的闲置,提高ALU效率。 16 | ## Common Signal 17 | 各个ALU都具有如下的各个信号。 18 | ``` verilog 19 | input WEN; 20 | input requireAC; // for require accepted 21 | output require; 22 | output busy; 23 | ``` 24 | `WEN`信号表示是否有来自上一级的请求。例如列队中是否存在等待计算的数据。若`WEN`为0,下一个周期后,ALU将进入`idle`(闲置)状态,知道新任务来临。 25 | `require`信号表示ALU已工作完毕,请求(require)`CDB`总线广播数据。 26 | `requireAC`表示`CDB`总线接受请求,予以广播。由于总线采用分时复用的方式运作,当`requireAC`返回0时,代表总线忙,广播请求被拒绝。 27 | `busy`表示ALU正在工作。busy为1时将拒绝来自上一级的运算请求。 28 | ## ALU for Add and Sub 29 | 负责加减运算。 30 | //TODO 31 | ``` verilog 32 | module state( 33 | input clk, 34 | input nRST, 35 | output reg [1:0] stateOut, 36 | input WEN, // input ENable from reservation 37 | input resultAC, //whether result is ACcepted by CDB 38 | output require, // send to CDB 39 | input op 40 | ); 41 | module pmALU ( // plus/minus ALU 42 | input clk, 43 | input nRST, 44 | input EN, 45 | input [31:0] dataIn1, 46 | input [31:0] dataIn2, 47 | input [1:0] state, 48 | output reg [31:0] result 49 | ); 50 | ``` 51 | ## ALU for multiple 52 | 与多周期加减ALU一致,只是实现方式和状态转换不同 53 | ## ALU for division -------------------------------------------------------------------------------- /doc/Component/CommonDataBus.md: -------------------------------------------------------------------------------- 1 | # Common Data Bus 2 | ## CDB 3 | ``` verilog 4 | module CDB( 5 | input [31:0] data0, 6 | input [4:0] label0, 7 | input [31:0] data1, 8 | input [4:0] label1, 9 | input [31:0] data2, 10 | input [4:0] label2, 11 | input [31:0] data3, 12 | input [4:0] label3, 13 | input [3:0] sel, 14 | output [31:0] dataOut, 15 | output [4:0] labelOut, 16 | output EN 17 | ); 18 | ``` 19 | ## CDB Helper 20 | 一个优先译码器。组合逻辑。 21 | 当各个器件向CDB发送传播请求时(传送1),只有一个器件能得到接受回应(1),其余器件都得到拒绝回应(0)。 22 | ``` verilog 23 | module CDBHelper( 24 | input [3:0] requires, 25 | output reg [3:0] accepts 26 | ); 27 | ``` -------------------------------------------------------------------------------- /doc/Component/InstructionQueue.md: -------------------------------------------------------------------------------- 1 | # Instruction Queue 2 | ## IO Prots 3 | ``` verilog 4 | module Queue( 5 | input clk, 6 | input insIn[31:0], 7 | output isFull, 8 | output insOut[31:0] 9 | ) 10 | ``` -------------------------------------------------------------------------------- /doc/Component/Memory.md: -------------------------------------------------------------------------------- 1 | # Memory 2 | [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/dwyl/esta/issues) 3 | 4 | ## summary 5 | 1. 第一个 6 | 7 | 8 | ## I/O port 9 | ``` 10 | 11 | module Memory( 12 | input clk, // 时钟信号 13 | input WEN, // 可写信号,高电平有效 14 | input [31:0] dataIn1, // 操作数1, 来自rs寄存器 15 | input [31:0] dataIn2, // 操作数2, 来自立即数 16 | input op, // for example, 1 is load, 0 is write 17 | input [31:0] writeData, // 要写的数据 18 | input [3:0] labelIn, // 当前指令的保留站 19 | output reg [3:0] labelOut, // 当前指令的保留站 20 | output [31:0] loadData, // 当前指令lw, 取出的操作数 21 | output reg available, // 存储器是否可用 22 | output reg require, // 向CDB请求写入 23 | input requireAC // 向CDB获取写入状态 24 | ); 25 | 26 | 27 | ``` -------------------------------------------------------------------------------- /doc/Component/RegisterFile.md: -------------------------------------------------------------------------------- 1 | # Register File 2 | ## IO Ports 3 | ``` verilog 4 | module RegisterFile( 5 | input clk, 6 | input nRST, 7 | input [4:0] ReadAddr1, 8 | input [4:0] ReadAddr2, 9 | input RegWr, 10 | input [4:0] WriteAddr, 11 | input [31:0] WriteLabel, 12 | output [31:0] DataOut1, 13 | output [31:0] DataOut2, 14 | output [4:0] LabelOut1, 15 | output [4:0] LabelOut2, 16 | input BCEN, 17 | input [4:0] BClabel, 18 | input [31:0] BCdata 19 | ); 20 | ``` -------------------------------------------------------------------------------- /doc/Component/ReservationStation.md: -------------------------------------------------------------------------------- 1 | # Reservation Station 2 | [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/dwyl/esta/issues) 3 | ## summary 4 | 清零信号到达后: 5 | 6 | 设置了三个保留站 7 | 1. 当时钟上升沿到达的时候, 8 | 1. 由信号isFull反映是否可写及写成功,将对应的值写进保留站中 9 | (不提供控制写地址的端口,只向外界告知是否写成功) 10 | 2. 从CDB中读取信息,若CDB可用,则上升沿写入对应保留站中的寄存器,并修改相应Qi/Qj 11 | 2. 时钟下降沿到达时,若保留站中存在操作数就绪的指令,则对外输出就绪指令数据及保留站号 12 | 1. 输入信号EXEable若反映ALU不可用, 则对应Busy位不修改,否则将已输出的指令对应的Busy清零 13 | 2. 输出信号OutEn为0反映输出不可用(指令处于未就绪状态),反之则就绪,ALU可写 14 | 15 | ## 编号 16 | |保留站名称|保留站编号| 17 | |:-:|:-:| 18 | |alu0|0100| 19 | |alu1|0101| 20 | |alu2|0110| 21 | |mul0|1000| 22 | |mul1|1001| 23 | |mul2|1010| 24 | |data0|1100| 25 | |data1|1101| 26 | |data2|1110| 27 | 28 | 33 | 34 | ## IO Ports 35 | > 前提:用于索引label的地址的位数为5 36 | 37 | ``` verilog 38 | module ReservationStation( 39 | input clk, 40 | input nRST, 41 | input EXEable, // whether the ALU is available and ins can be issued 42 | input WEN, // Write ENable 43 | 44 | input [4:0] opCode, 45 | input [4:0] func, 46 | input [31:0] dataIn1, 47 | input [4:0] label1, 48 | input [31:0] dataIn2, 49 | input [4:0] label2, 50 | 51 | input BCEN, // BroadCast ENable 52 | input [4:0] BClabel, // BoradCast label 53 | input [31:0] BCdata, //BroadCast value 54 | 55 | output reg [4:0] opOut, 56 | output reg [31:0] dataOut1, 57 | output reg [31:0] dataOut2, 58 | output isFull, // whether the buffer is full 59 | output OutEn, // whether output is valid 60 | output [4:0]labelOut 61 | ); 62 | ``` -------------------------------------------------------------------------------- /doc/InstructionSet.md: -------------------------------------------------------------------------------- 1 | # Instruction Set 2 | ## R-Format 3 | |op(6)|rs(5)|rt(5)|rd(5)|rev(5)|func(6)| 4 | |-|-|-|-|-|-| 5 | 6 | |Instruction|Function Code| 7 | |:--:|:--:| 8 | |add|100000| 9 | |sub|100011| 10 | |and|100100| 11 | |or|100101| 12 | 13 | ## I-Fromat 14 | |op(6)|rs(5)|rt(5)|immd(16)| 15 | |-|-|-|-|-|-| 16 | 17 | |Instruction|Operation Code| 18 | |:--:|:--:| 19 | |slti|010001| 20 | |addi|001000| 21 | |ori|001101| 22 | |sw|101011| 23 | |lw|100011| 24 | 25 | ## J-Format 26 | |op(6)|immd(26)| 27 | |-|-|-|-|-|-| 28 | 29 | |Instruction|Operation Code| 30 | |:--:|:--:| 31 | |halt|111111| 32 | `halt` is the only instruction that is supported in this pj by now. -------------------------------------------------------------------------------- /doc/ShiningPoint.md: -------------------------------------------------------------------------------- 1 | # Shining Point 2 | 传统Tomasulo教材资料只给出了算法的软件模拟实现或伪代码实现。具体的硬件实现会遇到许多瓶颈。本项目对其中的一些难点做了突破,体现了一些创新性。 3 | ## Architecture 4 | 框架流水线,局部并行化,部件多周期。 5 | - 流水线 6 | 总框架大致可以分为`指令发射`,`执行`,`广播`等三个阶段。各个阶段流水执行。即每个时钟周期(几乎)保证有一条指令被执行,一份数据被广播。 7 | - 并行化 8 | 多个ALU并行地执行数据。一旦指令的操作数准备完毕,即可从保留站发射到ALU处。各个ALU的运算独立进行,互不干涉。 9 | - 多周期 10 | 部件多周期更符合实际情况,本设计中各个执行单元都有`state`部件用于控制状态。所有执行和存储器件都在各个周期内分步骤完成。 11 | 12 | ## mALU 13 | 利用阵列乘法器加速定点数乘法。 14 | 采用$32 + 16 +... +1=63$个简易的加法电路,按5层的方式排布成阵列,并行地计算乘法。将乘法的运行时间缩短至5个CPU时钟。 15 | ## Queue in Hardware 16 | 利用硬件实现队列。注意到并解决了所有的所有的难点。包括 17 | 18 | - 计算空余位置号 19 | 利用组合电路正确计算队列中的空余位置号 20 | - 分配保留站号 21 | 每次新指令进队时,正确地分配唯一的保留站号 22 | - 处理广播冲突 23 | 当进队的指令中的保留站号恰好为正在广播的保留站号时,队列能正确地将广播中的数据替换指令的数据,再写进队列里 24 | - 正确判断“伪满” 25 | 若队列已满,但下一个周期到来时队列能发射一条指令,则队列实质上仍可以接受指令,并没有处于真正满的状态。本设计能正确识别“伪满”现象,最大限度保证指令流动。 26 | ## Passing Extreme Testcase 27 | 通过了所有的边界条件测试样例。 28 | 在算法的实际设计中,受到硬件时序的约束,会产生极其多的边界条件。例如 29 | - 指令队列流出 30 | 指令队列需要判断当前指令所在的保留站是否满。当满时,指令无法流出。 31 | - 广播与写入 32 | 当前广播的信号,恰好对应着当前写入信号的保留站号。此时器件应能正确捕获广播,避免遗漏。 33 | - 执行单元的状态转换 34 | 当执行单元(例如ALU)将运算执行完毕时,它需要考虑以下几种状况:`CDB`总线是否忙碌,保留站是否仍发来请求。 35 | ALU的附属器件`state`模块需要对其进行分析,判断其接下来进入的状态。 36 | - CDB繁忙 37 | CDB是所有“写”操作的唯一总线。当多个器件同时企图写总线时,将会引发冲突,此时需要一个优先译码器决定哪个执行器件的输出可以被广播。被拒绝广播的器件必须阻塞等待,直到CDB总线接受广播。 38 | 39 | ## Start From Scratch 40 | 从零开始。 41 | 本架构与传统的单周期、多周期和流水线CPU的架构完全不同。几乎无代码可以重用。本项目所有代码从零写起。 42 | 43 | ## Good Coding Style 44 | 良好的代码风格。 45 | - 采用宏定义增强可读性 46 | 将所有常量写入头文件中,便于管理。所有常数都用宏定义代替,增强可读性。 47 | - generate 语法 48 | 当大量产生相同器件,或进行相同的连线时,采用`verilog 2001`标准中加入的`generate`语法,形如 49 | ``` verilog 50 | generate 51 | genvar i 52 | for (i = 0; i < n; i = i + 1) begin: Loop 53 | // codes here 54 | end 55 | endgenerate 56 | ``` 57 | 以达到效率、准确地描述硬件的效果。 -------------------------------------------------------------------------------- /doc/TODO.md: -------------------------------------------------------------------------------- 1 | # TODO List 2 | ## 支持分支指令 3 | ### 阻塞型 4 | 通过阻塞防止冲突。当一条分支指令发射时,阻止所有指令发射,直到该分支指令完成。 5 | ### 支持前瞻执行 6 | 在分支指令`A`发射后,所有其后的指令都带一个`tag A`。在广播时,所有带有`tag`的信号都不直接写入存储单元中,而是先写入缓冲区中。直到该分支指令`A`的结果确定后,分支结果向所有器件广播,所有器件决定将缓冲区中的内容丢弃或写入。 7 | ## 支持指令预测 8 | ### 低地址预测跳转 9 | 低地址永远预测跳转,高地址永远预测不跳转 10 | ### 历史表 11 | 采用历史表的方式作分支预测。 12 | ## 支持FPU 13 | ### IEEE754转换 14 | 在一个CPU周期内,实现将32位浮点数*分割、转换*成机器能直接处理的数据段,例如`尾数`和`阶数`等。 15 | ### 浮点加减 16 | ### 浮点乘除 17 | ## 非阻塞 cache 18 | 非阻塞 cache 在 cache 缺失时能继续提供 cache 访问服务。为了使指令在 cache 缺失时能继续执行,乱序执行处理器需要非阻塞 cache 的支持。 19 | -------------------------------------------------------------------------------- /doc/TestCaseKnownBug.md: -------------------------------------------------------------------------------- 1 | # Test Case & Known Bugs 2 | Known Bugs are ordered by priority. 3 | ## Test Case 4 | - [first test case - walker_yf][1] **Passing** 5 | - [second test case - yanb25][2] **Passing** 6 | - [third test case - walker_yf][3] **Passing** 7 | - [forth test case - walker_yf][4] **Passing** 8 | - [fifth test case - yanb25][5] **Passing** 9 | - [sixth test case - walker_yf][6] **Passing** 10 | 11 | 12 | [1]:/rom/testcase1.md 13 | [2]:/rom/testcase2.md 14 | [3]:/rom/testcase3.md 15 | [4]:/rom/testcase4.md 16 | [5]:/rom/testcase5.md 17 | [6]:/rom/testcase6.md 18 | 19 | ## Known Bugs 20 | ### Critical 21 | - ~~pmfALU~~ (fixed) 22 | pmfALU does not correctly deal with substraction 23 | - ~~Reservation~~ (fixed) 24 | Reservation does not recognise `halt` and keeps getting instructions. 25 | - ~~Reservation~~(fixed) 26 | 当数据没有流动时(即没有新指入站和(或)没有指令发射时),保留站无法根据广播更新指令的数据。 27 | - ~~error in issue~~(fixed) 28 | 所有的器件的“流出”时序错误。一个器件不能在“下游器件接受请求”就马上把busy清零,而应等到CDB将该指令的执行结果广播完毕后再清零。 29 | 与寄存器换名问题相关。 30 | - ~~PC & PCHelper~~(fix) 31 | halt doesn't work 32 | ### Warning 33 | - ReservationStation 34 | 没有在清零信号到来时,将所有的Qk,Qj,Vk,Vj等寄存器清零 35 | 36 | ### Coding Style 37 | - Reservation Label 38 | 由于0保留站号是不可用的。由于某些历史原因,所有保留站号的高两位+1以暂时解决冲突。 39 | 正确的冲突解决方法应该改写头文件,并让各个器件的保留站号“天然”地从正确的序号开始。 40 | 暂时不会带来Warning或更严重的报错。 41 | - isLastState 42 | 使用了不优美的isLastState信号来同步Queue和RAM之间的工作。 43 | isLastState不是最后一个阶段,而是倒数第二个阶段。 44 | 本质原因是缺少流水的段寄存器。 45 | - ResStationDst 46 | 在op为sw和lw时,将ResStationDst设为`2'b11`会出错,导致halt指令无法被识别。故在top模块采用了变通的方法。 -------------------------------------------------------------------------------- /doc/pic/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/pic/overview.png -------------------------------------------------------------------------------- /doc/pic/通路图.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/pic/通路图.vsdx -------------------------------------------------------------------------------- /doc/pre.md: -------------------------------------------------------------------------------- 1 | 16337269 颜彬 2 | 16337237 王永锋 3 | # 1. 选题及主要工作 4 | 基于Tomasolu算法的乱序执行多周期CPU的实现(采用verilog语言) 5 | 6 | 7 | # 2. Tomasolu算法简单介绍 8 | 1. 目前的CPU的局限主要在于各种冲突带来的CPU停顿时间所导致的性能限制。而tomasolu算法主要针对数据冲突进行了优化,让指令能够做到乱序执行,数据无关的指令尽早运行,数据相关的指令在解决相关性后马上运行,从而让ALU一直处于工作状态,减少了CPU停顿时间 9 | 2. 该算法的创新点有两个 10 | 1. 使用寄存器换名技术 11 | 2. 使用数据总线发送数据,每一个寄存器和保留站会在每一个时钟上升沿都根据数据总线的信号更新自身的状态 12 | 3. 在数据通路图上,该算法如此实现(展示数据通路) 13 | 1. 每一条指令的运行过程:更新PC-指令发射-执行-广播写回 14 | 1. 指令流入保留站,并更新寄存器的状态表 15 | 1. 已就绪的操作数,直接从寄存器读进来,从而与寄存器文件解耦合 16 | 2. 对于操作数未就绪的指令,在保留站中等待数据写入 17 | 2. 由保留站确定并发射当前操作数**就绪**指令给ALU执行 18 | 1. 谁先准备好谁发射,如果没有准备 19 | 3. ALU执行后,将数据发送到公共数据总线上,各保留站和寄存器根据公共数据总线的信息决定是否更新自身数值 20 | (此过程,保留站中一些需要等待操作数写入指令的就会进入就绪状态) 21 | 22 | 23 | # 3. 使用硬件实现Tomasolu算法,技术难点 24 | [ShingPoint][sp] 25 | 26 | # 4. 效果评价,分析 27 | 在现在tomasolu算法资料相当匮乏的情况下,我们仅凭书本还有维基百科上的介绍,先是证明了算法的正确性,然后进行硬件的模块设计,时序设计, 28 | 其中硬件的整体架构,参考了书本的结构图,而硬件的接口设计以及时序设计,网上找不到成熟的教程,都是我们从零开始,一步一步摸索实现出来的。 29 | 在这个过程中,我们也踩了很多坑,也认识到硬件设计中时序设计的困难。 30 | 希望我们完成的这一个项目,不是这个CPU设计的结束,而是乱序执行CPU实现的起点,我们需要进一步完善模块的接口设计,我们也还有很多工作需要做(todo list), 我希望,这一个有着详尽文档的设计项目,能够给之后的同学启发,在此基础上,实现分支预测,实现前瞻执行等特性。 31 | 32 | # 5. 参考资料 33 | 1. 张晨曦 《计算机体系结构》 34 | 2. 维基百科,Tomasulo Algorithm 35 | 36 | 37 | [sp]:ShiningPoint.md 38 | -------------------------------------------------------------------------------- /doc/tex/.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/.vscode/settings.json -------------------------------------------------------------------------------- /doc/tex/document.bib: -------------------------------------------------------------------------------- 1 | @article{test, 2 | title={The effect on mice of minute doses of B. anthracis}, 3 | author={Barber, Marshall Albert}, 4 | journal={The Journal of Infectious Diseases}, 5 | pages={634--661}, 6 | year={1909}, 7 | publisher={JSTOR} 8 | } 9 | 10 | @misc{book:zcs, 11 | title={计算机体系结构}, 12 | author={张晨曦 and 王志英 and 张春元 and 戴葵}, 13 | year={2000}, 14 | publisher={北京: 高等教育出版社} 15 | } 16 | 17 | @misc{wiki:tomasulo, 18 | author = "Wikipedia contributors", 19 | title = "Tomasulo algorithm --- Wikipedia{,} The Free Encyclopedia", 20 | year = "2018", 21 | url = "\url{https://en.wikipedia.org/w/index.php?title=Tomasulo_algorithm&oldid=818527174}", 22 | note = "[Online; accessed 7-January-2018]" 23 | } -------------------------------------------------------------------------------- /doc/tex/document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/document.pdf -------------------------------------------------------------------------------- /doc/tex/document.tex: -------------------------------------------------------------------------------- 1 | %---------------------------------------------------------------------------------------- 2 | % PACKAGES AND OTHER DOCUMENT CONFIGURATIONS 3 | %---------------------------------------------------------------------------------------- 4 | \documentclass[twoside]{article} 5 | \usepackage{url} 6 | \usepackage{minted} 7 | 8 | \newminted{verilog}{mathescape, 9 | linenos, 10 | numbersep=5pt, 11 | gobble=2, 12 | frame=lines, 13 | framesep=2mm, 14 | breaklines=true} 15 | \usepackage{lipsum} % Package to generate dummy text throughout this template 16 | \usepackage[table,svgnames]{xcolor} 17 | \usepackage{ctex} 18 | \usepackage[sc]{mathpazo} % Use the Palatino font 19 | \usepackage[T1]{fontenc} % Use 8-bit encoding that has 256 glyphs 20 | \linespread{1.3} % Line spacing - Palatino needs more space between lines 21 | 22 | \usepackage[hmarginratio=1:1,top=32mm,columnsep=20pt]{geometry} % Document margins 23 | \usepackage[hang, small,labelfont=bf,up,textfont=it,up]{caption} % Custom captions under/above floats in tables or figures 24 | \usepackage{booktabs} % Horizontal rules in tables 25 | \usepackage{float} % Required for tables and figures in the multi-column environment - they need to be placed in specific locations with the [H] (e.g. \begin{table}[H]) 26 | \usepackage[colorlinks, 27 | linkcolor=blue, 28 | anchorcolor=blue, 29 | citecolor=blue 30 | ]{hyperref} 31 | \usepackage{lettrine} % The lettrine is the first enlarged letter at the beginning of the text 32 | \usepackage{paralist} % Used for the compactitem environment which makes bullet points with less space between them 33 | 34 | \usepackage{abstract} % Allows abstract customization 35 | \renewcommand{\abstractnamefont}{\normalfont\bfseries} % Set the "Abstract" text to bold 36 | \renewcommand{\abstracttextfont}{\normalfont\small\itshape} % Set the abstract itself to small italic text 37 | \usepackage[super,square]{natbib} % 修改文献引用样式 38 | \usepackage{titlesec} % Allows customization of titles 39 | \renewcommand\thesection{\Roman{section}} % Roman numerals for the sections 40 | \renewcommand\thesubsection{\Roman{subsection}} % Roman numerals for subsections 41 | \titleformat{\section}[block]{\large\scshape\centering}{\thesection.}{1em}{} % Change the look of the section titles 42 | \titleformat{\subsection}[block]{\large}{\thesubsection.}{1em}{} % Change the look of the section titles 43 | \usepackage{fancyhdr} % Headers and footers 44 | \pagestyle{fancy} % All pages have headers and footers 45 | \fancyhead{} % Blank out the default header 46 | \fancyfoot{} % Blank out the default footer 47 | \fancyhead[C]{计算机组成与设计$\bullet$ 1月 2018 $\bullet$ 课程设计 } % Custom header text 48 | \fancyfoot[RO,LE]{\thepage} % Custom footer text 49 | 50 | 51 | \def\equationautorefname{式}% 52 | \def\footnoteautorefname{脚注}% 53 | \def\itemautorefname{项}% 54 | \def\figureautorefname{图}% 55 | \def\tableautorefname{表}% 56 | \def\partautorefname{篇}% 57 | \def\appendixautorefname{附录}% 58 | \def\chapterautorefname{章}% 59 | \def\sectionautorefname{节}% 60 | \def\subsectionautorefname{小小节}% 61 | \def\subsubsectionautorefname{subsubsection}% 62 | \def\paragraphautorefname{段落}% 63 | \def\subparagraphautorefname{子段落}% 64 | \def\FancyVerbLineautorefname{行}% 65 | \def\theoremautorefname{定理}% 66 | 67 | 68 | %---------------------------------------------------------------------------------------- 69 | % TITLE SECTION 70 | %---------------------------------------------------------------------------------------- 71 | 72 | \title{\vspace{-15mm}\fontsize{24pt}{10pt}\selectfont\textbf{基于Tomasulo算法的\\乱序执行CPU的实现}} % Article title 73 | 74 | \author{ 75 | \large 76 | \textsc{颜彬 \(\quad\) 王永锋}\\[2mm] % Your name 77 | \textsc{16337269 16337237} \\ [2mm] 78 | \normalsize 中山大学 教务三班 \\ % Your institution 79 | \vspace{-5mm} 80 | } 81 | \date{2017年1月3日} 82 | %---------------------------------------------------------------------------------------- 83 | 84 | \begin{document} 85 | 86 | \maketitle % Insert title 87 | 88 | \thispagestyle{fancy} % All pages have headers and footers 89 | %---------------------------------------------------------------------------------------- 90 | % ABSTRACT 91 | %---------------------------------------------------------------------------------------- 92 | 93 | \begin{abstract} 94 | 本文主要描述了一种基于Tomasolu算法的支持乱序执行的CPU的实现。网络上很少关于该算法的verilog实现,本文创新性地根据该算法的理论,从模块设计,时序设计等不同角度,独立进行分析、设计该算法硬件层面的实现。在最后,本文还对该算法的优点及不足及可能的提高方法做出了进一步的阐述,并为之后该CPU效率的提高从分支预测,前瞻执行等方面做出了适当的预测和展望。 95 | 96 | \textbf{关键词: }\textbf{乱序执行} \textbf{动态调度} \textbf{$\ $verilog} \textbf{ $\ $ CPU运行时间} 97 | \end{abstract} 98 | 99 | %---------------------------------------------------------------------------------------- 100 | % ARTICLE CONTENTS 101 | %---------------------------------------------------------------------------------------- 102 | 103 | \section{选题背景} 104 | \lettrine[nindent=0em,lines=3]{T}omasulo算法是一种在乱序执行流水线CPU中使用的,用于对指令的顺序进行动态调度的算法。在流水线处理器中,先后执行的指令往往具有相关性,如上一条指令将一个数字写进寄存器中,下一条指令马上就要用这一个寄存器中存有的值来进行下一步的计算。这样的数据相关会导致流水线处理器中运行的冲突,这个时候可能可以通过旁路解决,但更多的时候,处理器只能够通过插入一个气泡,堵塞处理器的运行,才能够解决这一种数据冲突。由此,我们可以看出,传统顺序执行的流水线处理器,在处理具有极多数据相关的代码的时候,只能以较低的效率进行计算。\\ 105 | 106 | 为了降低数据相关带来的处理器停顿时间,一般有两种处理方式。\cite{book:zcs} 107 | \begin{description} 108 | \item[静态调度] 静态调度的流水线依靠编译器对代码进行静态调度,以减少相关冲突。此类调度方式,是通过程序在进行编译的时候就把相关的指令拉开距离,来减少可能产生的停顿。由于是在编译期间进行的指令调度,在程序执行阶段,指令的顺序不能够进行改变,“静态”由此而来。 109 | \item[动态调度] 动态的指令调度是在程序的执行过程中,依靠专门的硬件对指令进行调度。该种调度方式能够处理一些编译时情况不明的相关,还能够让代码的执行效率与产生指令的编译器解构。目前许多现代的处理器都采用了这种技术。 110 | \end{description} 111 | 112 | 在本文中,我们关注对指令的动态调度的算法实现。首先探讨一下指令动态调度算法的可行性,要实现指令的动态调度,也就是说我们需要做到在运行过程中,由CPU自行判断哪些指令能够提前运行,同时还能够做到保持数据流和控制异常行为。为了实现这个目的,一个典型的算法是记分牌算法,该算法能够做到与前文无关的指令可以尽早进入执行阶段,从而让处理器的停顿时间减少,但此算法并不能真正解决指令中常常存在的反相关和输出相关,反而,还有可能会让原本的伪相关在乱序执行下变为真相关,从而又从另一个角度增加了处理器的停顿时间。 113 | 114 | 另一类实现动态调度的算法是Tomasulo算法,该算法解决了记分牌算法的缺陷。相比起记分牌算法,Tomasulo算法中有两个重要的突破\cite{wiki:tomasulo}让它不仅能够最大限度的减少真相关带来的处理器停顿时间,同时直接解决了反相关和输出相关。这两种技术是 115 | \begin{itemize} 116 | \item \textbf{寄存器换名技术} 117 | \item \textbf{CDB公共数据总线} 118 | \end{itemize} 119 | 在下文\autoref{algo:tomasulo}中,将会详细描述算法的原理及实现。 120 | 121 | 122 | %------------------------------------------------ 123 | 124 | \section{主要工作} 125 | 本次课程设计,我们主要完成了以下工作 126 | \begin{compactitem} 127 | \item 对Tomasulo算法,进行模块设计及时序设计,从而使用硬件的方式实现 128 | \item 将Tomasulo算法的实现,与CPU的设计结合,整合为一个能够做到支持乱序执行的流水线CPU 129 | \item 编写机器代码样例,测试CPU的可用性 130 | \item 分析Tomasulo算法优点及不足,并提出可行的改进方案 131 | \end{compactitem} 132 | 133 | %------------------------------------------------ 134 | \section{技术路线} 135 | 136 | \subsection{数据通路图设计} 137 | \begin{figure}[htp] 138 | \centering 139 | \includegraphics[width=13cm]{"./figure/dataPathDiagram.png"} 140 | \caption{乱序执行流水线CPU的数据通路示意图} 141 | \label{fig:dataPathDiagram} 142 | \end{figure} 143 | 144 | \subsection{关键模块设计} 145 | 在实现该数据通路的过程中,以下3个关键模块的实现为Tomasulo算法的良好运行打下了良好的基础。 146 | \paragraph{保留站} 147 | 148 | \begin{table}[htbp] 149 | \centering 150 | \caption{保留站端口说明表} 151 | \label{tab:reservationStation} 152 | \rowcolors{1}{White}{Lavender} 153 | \begin{tabular}{llll} 154 | \hline 155 | 端口类型 & 位宽 & 端口名称 & 端口说明 \\ 156 | input & 1 & clk & 时钟信号 \\ 157 | input & 1 & nRST & 清零信号 \\ 158 | input & 1 & EXEable & 该保留站对应的ALU是否可执行 \\ 159 | input & 1 & WEN & 该保留站是否可写 \\ 160 | input & [1:0] & ResStationDst & 该保留站的编号 \\ 161 | input & [1:0] & opCode & 指令对应操作码 \\ 162 | input & [31:0] & dataIn1 & 数据输入端口 \\ 163 | input & [3:0] & label1 & 暴露站好输入端口 \\ 164 | input & [31:0] & dataIn2 & 数据输入端口 \\ 165 | input & [3:0] & label2 & 保留站号输入端口 \\ 166 | input & 1 & BCEN & 广播是否可用 \\ 167 | input & [3:0] & BClabel & 广播数据对应的保留站号 \\ 168 | input & [31:0] & BCdata & 广播的数据 \\ 169 | output & [1:0] & opOut & 给ALU输出对应的操作码 \\ 170 | output & [31:0] & dataOut1 & 给ALU输出对应的数据 \\ 171 | output & [31:0] & dataOut2 & 给ALU输出对应的数据 \\ 172 | output & 1 & isFull & 保留站是否满 \\ 173 | output & 1 & OutEn & 该保留站输出是否有效 \\ 174 | output & [3:0] & ready\_labelOut & 保留站就绪指令的保留站号 \\ 175 | output & [3:0] & writeable\_labelOut & 可写的保留站号 \\ 176 | \hline 177 | \hiderowcolors 178 | \end{tabular} 179 | \end{table} 180 | 181 | \begin{itemize} 182 | \item 关于保留站的端口设计,可见\autoref{tab:reservationStation} 183 | \item 当时钟上升沿到达的时候 \\ 184 | 由信号isFull反映是否可写及写成功,将对应的值写进保留站中 185 | (不提供控制写地址的端口,只向外界告知是否写成功)并从CDB中读取信息,若CDB可用,则上升沿写入对应保留站中的寄存器,并修改相应Qi/Qj。 186 | \item 时钟下降沿到达时,若保留站中存在操作数就绪的指令,则对外输出就绪指令数据及保留站号 \\ 187 | 输入信号EXEable若反映ALU不可用, 则对应Busy位不修改,否则将已输出的指令对应的Busy清零。若输出信号OutEn为0反映输出不可用(指令处于未就绪状态),反之则就绪,ALU可写。 188 | \end{itemize} 189 | 190 | 191 | \paragraph{公共数据总线CDB} 192 | 193 | \begin{figure}[htp] 194 | \centering 195 | \includegraphics[width=13cm]{"./figure/CDB.png"} 196 | \caption{公共数据总线端口图} 197 | \label{fig:CDB} 198 | \end{figure} 199 | 200 | \subparagraph{CDB设计} 201 | CDB中,我们实现了一个优先译码器。当各个器件向CDB发送传播请求时(传送1),只有一个器件能得到接受回应(1),其余器件都得到拒绝回应(0)。通过这种做法,解决CDB总线繁忙的情况,每个时钟周期确保只有一个数据被广播出去。 202 | 203 | \paragraph{寄存器文件} 204 | 205 | \begin{table}[htbp] 206 | \centering 207 | \caption{寄存器端口说明表} 208 | \label{tab:regfile} 209 | \rowcolors{1}{White}{Lavender} 210 | \begin{tabular}{llll} 211 | \hline 212 | 端口类型 & 位宽 & 端口名称 & 端口说明 \\ 213 | input & 1 & clk & 时钟信号 \\ 214 | input & 1 & nRST & 清零信号 \\ 215 | input & [4:0] & ReadAddr1 & 读取的寄存器号 \\ 216 | input & [4:0] & ReadAddr2 & 读取的寄存器号\\ 217 | input & 1 & RegWr & 寄存器是否可写 \\ 218 | input & [4:0] & WriteAddr & 写寄存器号 \\ 219 | input & [3:0] & WriteLabel & 写寄存器对应的保留站号 \\ 220 | output & [31:0] & DataOut1 & 读取寄存器对应的数据 \\ 221 | output & [31:0] & DataOut2 & 读取寄存器对应的数据 \\ 222 | output & [3:0] & LabelOut1 & 读取寄存器对应的保留站号 \\ 223 | output & [3:0] & LabelOut2 & 读取寄存器对应的保留站号 \\ 224 | input & 1 & BCEN & 广播的数据是否可用 \\ 225 | input & [3:0] & BClabel & 广播的数据对应的保留站号 \\ 226 | input & [31:0] & BCdata & 广播的数据 \\ 227 | input & 1 & BCEN & 广播是否可用 \\ 228 | input & [3:0] & BClabel & 广播数据对应的保留站号 \\ 229 | input & [31:0] & BCdata & 广播的数据 \\ 230 | \hline 231 | \hiderowcolors 232 | \end{tabular} 233 | \end{table} 234 | 235 | \subparagraph{寄存器文件设计} 236 | 为了能够监视CDB数据总线上的数据,寄存器文件里的每一个寄存器都与CDB直接相连,并在每个时钟上升沿时根据数据总线的信号决定是否更新该寄存器。 237 | 238 | 239 | \subsection{算法原理} 240 | \label{algo:tomasulo} 241 | 242 | \begin{figure}[htp] 243 | \centering 244 | \includegraphics[width=13cm]{"./figure/flawDiagram.png"} 245 | \caption{指令运行流程:更新PC-指令发射-执行-广播写回} 246 | \label{fig:flowDiagram} 247 | \end{figure} 248 | 249 | 结合该数据通路图,该算法的实施流程见\autoref{fig:flowDiagram} 250 | \paragraph{更新PC} 251 | 在时钟上升沿时,若当前指令所对应的保留站未满,则更新PC,以备下一条指令写入保留站。 252 | 253 | \paragraph{指令流入} 254 | 在时钟上升沿,若当前指令对应的保留站未满,则当前指令写入保留站中,并将寄存器的状态表更新为当前写入的保留站号。当前指令中已就绪的操作数,直接从寄存器读进来,从而与寄存器文件解耦合。对于操作数未就绪的指令,在保留站中存好该数据的的最新来源(保留站号,从寄存器状态表读取),等待数据写入。 255 | 256 | \paragraph{执行阶段} 257 | 保留站能够根据保留站中的所有指令的情况,通过组合逻辑电路判断当前是否有指令所有操作数都就绪,一旦就绪,在时钟上升沿时这条指令就会写到ALU中。 258 | 259 | ALU在完成计算并且结果已写入寄存器文件和操作数中后,就会将写成功信号返回给保留站,保留站将该条指令的Busy为置为0,清除该条指令。 260 | 261 | \paragraph{广播写回} 262 | ALU执行后,将数据发送到公共数据总线上,当公共数据总线返回成功写入信号后,ALU返回空闲状态等待下一次执行,各保留站和寄存器根据公共数据总线的信息决定是否更新自身数值。 263 | (此过程,保留站中一些需要等待操作数写入指令的就会进入就绪状态) 264 | 265 | 266 | % \subsection{CPU实现指令集} 267 | % 3. CPU支持指令集的简要介绍 268 | 269 | 270 | \subsection{仿真及测试} 271 | 272 | \paragraph{测试样例1} 273 | 274 | \begin{verilogcode} 275 | 001000 00000 00010 0000000000000101 //addi r2, r0, 5 // r2 <- 5 276 | 001000 00001 00011 0000000000001000 //addi r3, r1, 8 // r3 <- 8 277 | 101011 00011 00010 0000000000000100 //sw r2, r3(4) 278 | 100011 00011 00111 0000000000000100 //lw r7, r3(4) 279 | 11111111 00000000 00000000 00000000 //halt 280 | \end{verilogcode} 281 | 282 | \paragraph{仿真波形图} 283 | 284 | \begin{figure}[htp] 285 | \centering 286 | \includegraphics[width=15cm]{"./figure/simulationiDiagram.png"} 287 | \caption{仿真波形图} 288 | \label{fig:simulationiDiagram} 289 | \end{figure} 290 | 291 | 292 | 经过检查各时钟周期各模块的控制信号,以及最后寄存器的结果,验证了该CPU在此样例的正确性。 293 | 294 | 其他测试样例由于空间限制,就不在此一一放出了。 295 | 296 | %------------------------------------------------ 297 | 298 | \section{项目亮点} 299 | 传统Tomasulo教材资料只给出了算法的软件模拟实现或伪代码实现。具体的硬件实现会遇到许多瓶颈。本项目对其中的一些难点做了突破,体现了一些创新性。 300 | \subsection{体系结构特点} 301 | \textbf{该CPU整体体系结构特点为:框架流水线,局部并行化,部件多周期。} 302 | \begin{description} 303 | \item[流水线] 总框架大致可以分\textbf{指令发射},\textbf{执行},\textbf{广播}等三个阶段。各个阶段流水执行。即每个时钟周期(几乎)保证有一条指令被执行,一份数据被广播。 304 | \item[并行化] 多个ALU并行地执行数据。一旦指令的操作数准备完毕,即可从保留站发射到ALU处。各个ALU的运算独立进行,互不干涉。 305 | \item[多周期] 部件多周期更符合实际情况,本设计中各个执行单元都有~state~部件用于控制状态。所有执行和存储器件都在各个周期内分步骤完成。 306 | \end{description} 307 | 308 | \subsection{阵列乘法器} 309 | 在ALU的设计中,乘法器的设计利用阵列乘法器加速定点数乘法。 310 | 采用$32 + 16 +... +1=63$个简易的加法电路,按5层的方式排布成阵列,并行地计算乘法。将乘法的运行时间缩短至5个CPU时钟。 311 | 312 | 313 | 314 | \begin{figure}[htp] 315 | \centering 316 | \includegraphics[width=12cm]{"./figure/mulALU.jpg"} 317 | \caption{阵列乘法器示意图} 318 | \label{fig:mulALU} 319 | \end{figure} 320 | 321 | 322 | 323 | \subsection{Queue in Hardware} 324 | 利用硬件实现队列。注意到并解决了所有的所有的难点。包括 325 | \begin{enumerate} 326 | \item \textbf{计算空余位置号 }利用组合电路正确计算队列中的空余位置号 327 | \item \textbf{分配保留站号 } 每次新指令进队时,正确地分配唯一的保留站号 328 | \item \textbf{处理广播冲突 }当进队的指令中的保留站号恰好为正在广播的保留站号时,队列能正确地将广播中的数据替换指令的数据,再写进队列里 329 | \item \textbf{正确判断“伪满” }若队列已满,但下一个周期到来时队列能发射一条指令,则队列实质上仍可以接受指令,并没有处于真正满的状态。本设计能正确识别“伪满”现象,最大限度保证指令流动。 330 | \end{enumerate} 331 | 332 | \subsection{Passing Extreme Testcase} 333 | 通过了所有的边界条件测试样例。 334 | 在算法的实际设计中,受到硬件时序的约束,会产生极其多的边界条件。例如 335 | \begin{enumerate} 336 | \item \textbf{指令队列流出 }指令队列需要判断当前指令所在的保留站是否满。当满时,指令无法流出。 337 | \item \textbf{广播与写入 }当前广播的信号,恰好对应着当前写入信号的保留站号。此时器件应能正确捕获广播,避免遗漏。 338 | \item \textbf{执行单元的状态转换 }当执行单元(例如ALU)将运算执行完毕时,它需要考虑以下几种状况:~CDB~总线是否忙碌,保留站是否仍发来请求。 339 | ALU的附属器件~state~模块需要对其进行分析,判断其接下来进入的状态。 340 | \item \textbf{CDB繁忙 }~CDB~是所有“写”操作的唯一总线。当多个器件同时企图写总线时,将会引发冲突,此时需要一个优先译码器决定哪个执行器件的输出可以被广播。被拒绝广播的器件必须阻塞等待,直到CDB总线接受广播。 341 | \end{enumerate} 342 | 343 | \subsection{Good Coding Style} 344 | 本项目有着良好的代码风格,以确保代码的可读性和可维护性,以备之后项目的进一步发展。 345 | \begin{enumerate} 346 | \item \textbf{采用宏定义增强可读性 }将所有常量写入头文件中,便于管理。所有常数都用宏定义代替,增强可读性。 347 | \item \textbf{generate 语法 }当大量产生相同器件,或进行相同的连线时,采用~verilog 2001~标准中加入的~generate~语法,以达到效率、准确地描述硬件的效果。 348 | \begin{verilogcode} 349 | generate 350 | genvar i 351 | for (i = 0; i < n; i = i + 1) begin: loop 352 | // codes here 353 | end 354 | endgenerate 355 | \end{verilogcode} 356 | \end{enumerate} 357 | 358 | %------------------------------------------------ 359 | \section{效果评价} 360 | 361 | \subsection{实现价值} 362 | 在现在tomasolu算法资料相当匮乏的情况下,我们仅凭书本还有维基百科上的介绍,先是证明了算法的正确性,然后进行硬件的模块设计,时序设计。 363 | 364 | 其中硬件的整体架构,参考了书本的结构图,而硬件的接口设计以及时序设计,网上找不到成熟的教程,都是我们从零开始,一步一步摸索实现出来的。 365 | 在这个过程中,我们也踩了很多坑,也认识到硬件设计中时序设计的困难。 366 | 367 | 我们完成的这一个项目,并不是这个CPU设计的结束,而是乱序执行流水线CPU(一个更符合现代处理器架构的CPU)实现的起点,我们需要进一步完善模块的接口设计,我们也还有很多工作需要做(todo list), 我希望,这一个有着详尽文档的设计项目,能够给之后的同学启发,在此基础上,实现分支预测,实现前瞻执行等特性。 368 | 369 | \subsection{不足及反思} 370 | 我们对于该算法的实现中,由于时间的限制,并不能将CPU的性能做到最佳,以下是我们发现了该CPU可能存在的一些不足,并思考了相应的解决方案。 371 | \begin{itemize} 372 | \item ALU数量不足,并行化仍有待提高,在有多条加法语句同时需要执行的时候还可以提高并行化(而且这样的情形存在的概率不低)。因此,我们可以设置多个加法器,多个乘法器,当有多条相同运算的指令同时流入到保留站中时,并且有多条相同运算的指令同时就绪,这些指令就可以在一个时钟周期内并行完成,进而降低了CPU的停顿时间,提高了效率。 373 | \item 保留站数量的设置并没有达到最佳。保留站的设置数量,关系到能够流入的指令的数量,进而影响到CPU能够并行执行的指令数量,CPU的效率也会受到影响。不难发现,保留站设置得越大,每个周期内保留站内有指令就绪的概率就越高,此时CPU内工作的ALU数量就越大,并行化程度就越高。但从另一方面来看,部件的增加,带来的是硬件的复杂度以及功耗的增加(作为CPU的设计,不仅要考虑仿真时的速度,更重要的是在硬件上运行时的复杂度及功耗),这两者之间的权衡关系,目前以我们的能力还很难把握。 374 | \item CDB公共数据总线的设计仍可优化。目前的设计是每一个时钟周期CDB接受一个ALU的计算结果并广播出去,这样的设计并非有问题,但效率的降低,源于我们的CDB没有设置结果缓存的区域。试想一下这样的情况,多个ALU同时完成运算,但是此时只能够一个一个广播出去,此时等候的ALU就会处于停顿状态,增加了CPU的停顿时间。要解决这个问题,我们可以在CDB中设置一个硬件队列,这样一来,ALU一旦计算完毕,直接将结果放进CDB中的队列中就自动进行下一次运算,不会再由于无法广播而增加CPU的停顿时间。 375 | \end{itemize} 376 | 377 | 378 | 379 | \section{鸣谢} 380 | 这个项目的完成,需要感谢李国桢老师对我们的大力支持与鼓励,您给了我们很多的学习资料,我们在这些学习资料里收获到了很多实现这个项目必备的知识。 381 | 382 | 感谢何朝东老师在“计算机组成与设计实验课”课程中的教学,何老师在课堂上讲解了很多与verilog语言有关的知识,同时还详细的说明了vivado软件的用法,让我们受益匪浅。 383 | 384 | 同时,还需要感谢张晨曦老师及其所著的《计算机体系结构》,该本教材知识点覆盖面广,同时知识讲解透彻,其配套的教学软件更是加深了我们对该算法的理解,这是我们的项目能够成功的前提。 385 | 386 | 再次感谢给我们支持与鼓励的大家! 387 | 388 | 389 | %---------------------------------------------------------------------------------------- 390 | % REFERENCE LIST 391 | %---------------------------------------------------------------------------------------- 392 | \bibliographystyle{unsrt} 393 | \bibliography{document} 394 | %---------------------------------------------------------------------------------------- 395 | 396 | 397 | \end{document} -------------------------------------------------------------------------------- /doc/tex/figure/CDB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/figure/CDB.png -------------------------------------------------------------------------------- /doc/tex/figure/dataPathDiagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/figure/dataPathDiagram.png -------------------------------------------------------------------------------- /doc/tex/figure/flawDiagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/figure/flawDiagram.png -------------------------------------------------------------------------------- /doc/tex/figure/mulALU.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/figure/mulALU.jpg -------------------------------------------------------------------------------- /doc/tex/figure/simulationiDiagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/figure/simulationiDiagram.png -------------------------------------------------------------------------------- /doc/tex/temp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanB25/Tomasulo/a3c3ac8fbbffe48882a1a6b6bf775c23760ce9ff/doc/tex/temp.pdf -------------------------------------------------------------------------------- /doc/thoughts.md: -------------------------------------------------------------------------------- 1 | 写了这一个CPU,更加明白在设计硬件的过程中所需要注意的一些问题 2 | 1. 时序逻辑与组合逻辑 3 | 1. 在时序逻辑中,**当前的输出与当前的状态和当前的输入有关**,在这里,当前的状态,其实就是该器件过去的状态 4 | 2. 在组合逻辑中,**输出只与输入有关** 5 | 3. 问题:某器件接受一个信号,该信号在时钟上升沿的时候会变,当时钟上升沿到达的时候,到底是变化前的信号影响着输出,还是变化后的信号影响着输出 6 | 4. 回答:变化前的信号影响输出。 7 | 2. 控制中心的解耦合作用 8 | 这一次的CPU,CU做的工作并不多,我们写的很多模块都是相互依赖控制信号 9 | 例子: 10 | 1. 保留站依赖ALU是否可用,ALU依赖保留站输出信号是否可用 11 | 2. 能够写保留站,条件: 12 | 1. 保留站未满 13 | 2. 保留站可写(可能不是写当前这个保留站,写其他的保留站,留有一个WEN信号端口) 14 | 3. 能够从保留站写到ALU 15 | 1. ALU可用(可用又依赖于ALU自身状态以及ALU结果是否广播成功,抽象成一个available信号) 16 | 2. 保留站存在就绪指令 17 | 3. 我也不知道自己写啥 -------------------------------------------------------------------------------- /rom/rom.mem: -------------------------------------------------------------------------------- 1 | 00100000 00000010 00000000 00000101 2 | 00100000 00100011 00000000 00001000 3 | 10101100 01100010 00000000 00000100 4 | 10001100 01100111 00000000 00000100 5 | 11111111 00000000 00000000 00000000 6 | -------------------------------------------------------------------------------- /rom/testcase1.md: -------------------------------------------------------------------------------- 1 | 001000 00000 00001 0000000000001010 add r1, r0, 10 2 | 000000 00001 00000 0001000000100011 sub r2, r1, r0 3 | 000000 00010 00000 0010000000100000 add r4, r2, r0 4 | 001000 00000 00011 0000000000011110 add r3, r0, 30 5 | 001000 00001 00010 0000000000010100 add r2, r1, 20 6 | 001000 00000 00101 0000000000010100 add r5, r0, 20 7 | 111111 00000 00000 0000000000000000 halt -------------------------------------------------------------------------------- /rom/testcase2.md: -------------------------------------------------------------------------------- 1 | # test case 2 2 | ## Codes 3 | ``` arm 4 | addi $1, $2, 10 // $1 <- $2 + 10 5 | 001000 00010 00001 0000000000001010 6 | 7 | // ReservationStation deal with bc and write conflict 8 | add $3, $1, $2 // $3 <- $1(depend) + $2 9 | 000000 00001 00010 00011 00000 100000 10 | 11 | sub $1, $3, $0 // $1 <- $3(depend) - $0 12 | 000000 00011 00000 00001 00000 100011 13 | ``` 14 | ## Coverage 15 | -------------------------------------------------------------------------------- /rom/testcase2.mem: -------------------------------------------------------------------------------- 1 | 00100000 01000001 00000000 00001010 2 | 00000000 00100010 00011000 00100000 3 | 00000000 01100000 00001000 00100011 4 | 11111111 00000000 00000000 00000000 -------------------------------------------------------------------------------- /rom/testcase3.md: -------------------------------------------------------------------------------- 1 | 2 | ``` 3 | 001000 00000 00001 00000000 00001010 add r1, r0, 10 4 | 001000 00000 00010 00000000 00010000 add r2, r0, 16 5 | 000000 00001 00010 00011 00000 001011 mul r3, r1, r2 6 | ``` -------------------------------------------------------------------------------- /rom/testcase3.mem: -------------------------------------------------------------------------------- 1 | 00100000 00000001 00000000 00001010 2 | 00100000 00000010 00000000 00010000 3 | 00000000 00100010 00011000 00001011 4 | 11111100 00000000 00000000 00000000 -------------------------------------------------------------------------------- /rom/testcase4.md: -------------------------------------------------------------------------------- 1 | ``` 2 | 001000 00000 00001 00000000 00001010 add r1, r0, 10 3 | 001000 00000 00010 00000000 00010000 add r2, r0, 16 4 | 000000 00001 00010 00011 00000 001011 mul r3, r1, r2 5 | 001000 00000 00100 00000000 00010001 add r4, r0, 17 6 | 001000 00000 00101 00000000 00010010 add r5, r0, 18 7 | 001000 00000 00110 00000000 00010011 add r6, r0, 19 8 | 001000 00000 00111 00000000 00010100 add r7, r0, 20 9 | 001000 00000 01000 00000000 00010101 add r8, r0, 21 10 | 001000 00000 01001 00000000 00010110 add r9, r0, 22 11 | 111111 000 00000000 00000000 00000000 12 | ``` 13 | 14 | ### testcase4 15 | 测出问题:保留站busy位在有指令流入保留站的时候,没有在对应的busy位里写入1 16 | (从而导致写寄存器状态表的保留站号有误) 17 | 问题来源: 18 | 1. 保留站根据当前的CDB(准确的说是上一周期的CDB信号)的label,将对应的保留站的busy清零,表示该保留站的指令计算的数据已写入,指令可释放 19 | 2. 保留站BCEN信号来源于pmfALU,设计的问题,导致运行一条指令的时候,BCEN信号会有两个周期有效 20 | 结果: 21 | 1. 两个周期有效,那就会在下一个周期的之后的两个上升沿都会进行更新CDB,**给busy清零**的操作,这里两次清零就会出问题(刚好下一条指令就要放到对应的位置呢?) 22 | 修改:修改了pmfalu的状态转移方式,本来处于完成状态的alu有机会直接去到执行1阶段,现在强制先回到idle状态 -------------------------------------------------------------------------------- /rom/testcase4.mem: -------------------------------------------------------------------------------- 1 | 00100000 00000001 00000000 00001010 2 | 00100000 00000010 00000000 00010000 3 | 00000000 00100010 00011000 00001011 4 | 00100000 00000100 00000000 00010001 5 | 00100000 00000101 00000000 00010010 6 | 00100000 00000110 00000000 00010011 7 | 00100000 00000111 00000000 00010100 8 | 00100000 00001000 00000000 00010101 9 | 00100000 00001001 00000000 00010110 10 | 11111100 00000000 00000000 00000000 11 | 12 | -------------------------------------------------------------------------------- /rom/testcase5.md: -------------------------------------------------------------------------------- 1 | addi $2, $0, 5 // $2 <- 5 2 | 001000 00000 00010 0000000000000101 3 | addi $3, $1, 8 // $3 <- 8 4 | 001000 00001 00011 0000000000001000 5 | sw $2, $3(4) 6 | 101011 00011 00010 0000000000000100 7 | lw $7, $3(4) 8 | 100011 00011 00111 0000000000000100 9 | halt -------------------------------------------------------------------------------- /rom/testcase5.mem: -------------------------------------------------------------------------------- 1 | 00100000 00000010 00000000 00000101 2 | 00100000 00100011 00000000 00001000 3 | 10101100 01100010 00000000 00000100 4 | 10001100 01100111 00000000 00000100 5 | 11111111 00000000 00000000 00000000 -------------------------------------------------------------------------------- /rom/testcase6.md: -------------------------------------------------------------------------------- 1 | 001000 00000 00001 00000000 00001010 add r1, r0, 10 2 | 001000 00001 00001 00000000 00001010 add r1, r1, 10 3 | 111111 00000 00000 00000000 00000000 halt 4 | 5 | 6 | // detect bug in testcase6 when the target of readreg and the target of writereg is the same 7 | 8 | > 寄存器状态表存放最新的寄存器来源 9 | > 因此第二条指令流入的时候,会把r1的来源设置为第二个保留站 10 | > 问题在于,第二条指令操作数r1未就绪,但对应的保留站号并不对 11 | > 说明,在流入的时候, 更新寄存器状态表, 与 确定操作数的来源(译码)的先后顺序暂时不明了 12 | 13 | 以上都是我瞎逼逼 14 | 其实只是一开始指令代码写错了 15 | 这个通过 -------------------------------------------------------------------------------- /rom/testcase6.mem: -------------------------------------------------------------------------------- 1 | 00100000 00000001 00000000 00001010 2 | 00100000 00100001 00000000 00001010 3 | 11111100 00000000 00000000 00000000 4 | 5 | 6 | -------------------------------------------------------------------------------- /source/CDB.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "head.v" 3 | module CDBHelper( 4 | input [3:0] requires, 5 | output reg [3:0] accepts 6 | ); 7 | always@(*) begin 8 | if (requires[3]) 9 | accepts = 4'b1000; 10 | else if (requires[2]) 11 | accepts = 4'b0100; 12 | else if (requires[1]) 13 | accepts = 4'b0010; 14 | else if (requires[0]) 15 | accepts = 4'b0001; 16 | else 17 | accepts = 4'b0000; 18 | end 19 | endmodule 20 | 21 | module CDB( 22 | input [31:0] data0, 23 | input [3:0] label0, 24 | input [31:0] data1, 25 | input [3:0] label1, 26 | input [31:0] data2, 27 | input [3:0] label2, 28 | input [31:0] data3, 29 | input [3:0] label3, 30 | input [3:0] sel, 31 | output reg[31:0] dataOut, 32 | output reg[3:0] labelOut, 33 | output EN 34 | ); 35 | always@(*) begin 36 | if (sel[0]) begin 37 | dataOut = data0; 38 | labelOut = label0; 39 | end else if (sel[1]) begin 40 | dataOut = data1; 41 | labelOut = label1; 42 | end else if (sel[2]) begin 43 | dataOut = data2; 44 | labelOut = label2; 45 | end else begin 46 | dataOut = data3; 47 | labelOut = label3; 48 | end 49 | end 50 | assign EN = | sel; 51 | endmodule -------------------------------------------------------------------------------- /source/CU.v: -------------------------------------------------------------------------------- 1 | `include "head.v" 2 | `timescale 1ns/1ps 3 | module CU( 4 | input [5:0] op, 5 | input [5:0] func, 6 | output reg[1:0]ALUop, 7 | output reg[1:0]ALUSel, 8 | output reg[3:0]ResStationEN, 9 | input [2:0]isFull, 10 | output isFullOut, 11 | output RegDst, 12 | output vkSrc, 13 | output QueueOp 14 | ); 15 | always@(*) begin 16 | case(op) 17 | `opRFormat: 18 | case(func) 19 | `funcADD, `funcMULU: 20 | ALUop = 0; 21 | `funcSUB : ALUop = `ALUSub; 22 | `funcAND : ALUop = `ALUAnd; 23 | default : ALUop = `ALUOr; 24 | endcase 25 | `opADDI : ALUop = `ALUAdd; 26 | `opORI : ALUop = `ALUOr; 27 | default: 28 | ALUop = 1; 29 | endcase 30 | if (op == `opHALT) begin 31 | //ALUSel = 2'b00; 32 | ResStationEN = 4'b0000; 33 | end 34 | else if (func == `funcMULU) begin 35 | ALUSel = `multipleALU; 36 | ResStationEN = 4'b0010; 37 | end 38 | else if (func == `funcDIVU) begin 39 | ALUSel = `divideALU; 40 | ResStationEN = 4'b0100; 41 | end 42 | else if (op == `opLW || op == `opSW) begin 43 | //ALUSel = 2'b11; 44 | ResStationEN = 4'b1000; 45 | end else begin 46 | ALUSel = `addsubALU; 47 | ResStationEN = 4'b0001; 48 | end 49 | end 50 | assign isFullOut = isFull[ALUSel]; 51 | assign RegDst = op == `opRFormat ? `FromRd : `FromRt; 52 | assign vkSrc = op == `opRFormat ? `FromRtData : `FromImmd; 53 | assign QueueOp = op == `opLW ? `opLoad : `opStore; 54 | endmodule -------------------------------------------------------------------------------- /source/Memory.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | 3 | module Memory( 4 | input clk, 5 | input WEN, 6 | input [31:0] dataIn1,// Qj 7 | input [31:0] dataIn2,// A 8 | input op,// for example, 1 is load, 0 is write 9 | input [31:0] writeData, 10 | input [3:0] labelIn, 11 | output reg [3:0] labelOut, 12 | output [31:0] loadData, 13 | output reg available, 14 | output reg require, 15 | input requireAC, 16 | output isLastState 17 | ); 18 | reg [31:0] addr; 19 | reg nRD; 20 | reg nWR; 21 | integer States; 22 | initial begin 23 | States = 0; 24 | nRD = 1; 25 | nWR = 1; 26 | require = 0; 27 | end 28 | wire readStatus; 29 | wire writeStatus; 30 | always@( posedge clk ) begin 31 | if (States == 0) begin 32 | if (WEN == 1) begin 33 | addr <= dataIn1 + dataIn2; 34 | labelOut <= labelIn; 35 | States <= 1; 36 | // States 从0 变成1,进入访存阶段 37 | if (op == 1) begin 38 | nRD <= 0; 39 | end 40 | if (op == 0) begin 41 | nWR <= 0; 42 | end 43 | end 44 | else begin // WEN == 0 45 | States <= 0; 46 | nRD <= 1; 47 | nWR <= 1; 48 | end 49 | end 50 | else if (States == 1) begin 51 | nRD <= 1; 52 | nWR <= 1; 53 | if (readStatus == 1) begin 54 | States <= 2; 55 | require <= 1; 56 | end 57 | if (writeStatus == 1) begin 58 | require <= 0; 59 | States <= 0; 60 | end 61 | end 62 | else if (States == 2) begin 63 | if (requireAC == 1) begin 64 | States <= 0; 65 | end 66 | else begin 67 | States <= 2; 68 | end 69 | end 70 | else 71 | States <= 4; 72 | end 73 | 74 | always@(*) begin 75 | if (States == 1 || States == 2) begin 76 | available = 0; 77 | end 78 | else begin 79 | available = 1; //TODO :maybe bugs not a good implementation 80 | end 81 | end 82 | 83 | RAM my_ram( 84 | .clk(clk), 85 | .address(addr), 86 | .writeData(writeData), 87 | .Dataout(loadData), 88 | .readStatus(readStatus), 89 | .writeStatus(writeStatus), 90 | .nRD(nRD), 91 | .nWR(nWR), 92 | .isLastState(isLastState) 93 | ); 94 | 95 | endmodule -------------------------------------------------------------------------------- /source/PC.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | `include "head.v" 3 | module PC( 4 | input clk, 5 | input nRST, 6 | input [31:0]newpc, 7 | input pcWrite, 8 | output reg [31:0]pc 9 | ); 10 | always@(posedge clk or negedge nRST) begin 11 | if (pcWrite || !nRST) begin 12 | pc <= nRST == 0 ? 0 : newpc; 13 | end else begin 14 | pc <= pc; 15 | end 16 | end 17 | endmodule 18 | 19 | module PCHelper( 20 | input [31:0] pc, 21 | input [15:0] immd16, 22 | input [25:0] immd26, 23 | input [1:0] sel, 24 | input [31:0] rs, 25 | output reg [31:0] newpc 26 | ); 27 | initial begin 28 | newpc = 0; 29 | end 30 | wire [31:0]exd_immd16 = { {16{immd16[15]}}, immd16}; 31 | always@(*) begin 32 | case (sel) 33 | `NextIns : newpc <= pc + 4; 34 | `RelJmp : newpc <= (pc + 4 + (exd_immd16 << 2)); 35 | `AbsJmp : newpc <= {pc[31:28], immd26, 2'b00}; 36 | `RsJmp : newpc <= rs; 37 | endcase 38 | end 39 | endmodule 40 | -------------------------------------------------------------------------------- /source/Queue.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "head.v" 3 | // implement as queue. 4 | module Queue( 5 | input clk, 6 | input nRST, 7 | input requireAC, // whether the ALU is available and ins can be issued 8 | input WEN, // Write ENable 9 | output isFull, // whether the buffer is full 10 | output require, // whether output is valid 11 | 12 | input [31:0] dataIn, 13 | input [3:0] labelIn, 14 | input opIN, 15 | 16 | input BCEN, // BroadCast ENable 17 | input [3:0] BClabel, // BoradCast label 18 | input [31:0] BCdata, //BroadCast value 19 | 20 | output opOut, 21 | output [31:0] dataOut, 22 | output [3:0] labelOut, 23 | input isLastState, 24 | output [3:0] queue_writeable_label 25 | ); 26 | reg [3:0]availableIdLabel; 27 | assign queue_writeable_label = availableIdLabel; 28 | reg [3:0]Busy; 29 | reg [3:0]Label[3:0]; 30 | reg [31:0]Data[3:0]; 31 | reg [3:0]IdLabel[3:0]; 32 | reg [3:0]op; 33 | initial begin 34 | Label[3] = 0; 35 | Busy[3] = 4'b1000; 36 | Data[3] = 0; 37 | IdLabel[3] = 0; 38 | op[3] = 0; 39 | end 40 | assign opOut = op[0]; 41 | assign dataOut = Data[0]; 42 | assign labelOut =IdLabel[0]; 43 | 44 | wire issuable = require && requireAC; 45 | wire wbusy = Busy[0] && Busy[1] && Busy[2]; 46 | assign isFull = !issuable && wbusy; 47 | assign require = Busy[0] && Label[0] == 0; 48 | wire poppable; 49 | assign poppable = isLastState; 50 | 51 | reg [1:0] first_empty; 52 | always@(*) begin 53 | if (!Busy[0]) first_empty = 0; 54 | else if (!Busy[1]) first_empty = 1; 55 | else first_empty = 2; 56 | end 57 | 58 | reg [1:0]lastBusyIndex; 59 | always@(*) begin 60 | if (Busy[2]) 61 | lastBusyIndex = 2; 62 | else if (Busy[1]) 63 | lastBusyIndex = 1; 64 | else if (Busy[0]) 65 | lastBusyIndex = 0; 66 | else lastBusyIndex = -1; 67 | end 68 | 69 | always@(*) begin 70 | if (wbusy) 71 | availableIdLabel = 4'bx; // if busy, it is don't-care signal 72 | else if (IdLabel[0] != `QUE0 && IdLabel[1] != `QUE0 && IdLabel[2] != `QUE0) 73 | availableIdLabel = `QUE0; 74 | else if (IdLabel[0] != `QUE1 && IdLabel[1] != `QUE1 && IdLabel[2] != `QUE1) 75 | availableIdLabel = `QUE1; 76 | else availableIdLabel = `QUE2; 77 | end 78 | 79 | generate 80 | genvar i; 81 | for (i = 0; i <= 2; i = i + 1) begin 82 | always@(posedge clk or negedge nRST) begin 83 | if (!nRST) begin 84 | Busy[i] <= 0; 85 | Label[i] <= 0; 86 | Data[i] <= 0; 87 | IdLabel[i] <= 0; 88 | op[i] <= 0; 89 | end else if (WEN) begin 90 | if (!poppable) begin 91 | if (!wbusy && i == first_empty) begin //Wen && !issuable && !busy 92 | // input data to the first empty position 93 | Busy[i] <= 1; 94 | Data[i] <= BCEN && BClabel==labelIn ? BCdata : dataIn; 95 | Label[i] <= BCEN && BClabel == labelIn ? 0 : labelIn; 96 | op[i] <= opIN; 97 | IdLabel[i] <= availableIdLabel; 98 | end else begin 99 | if (BCEN && BClabel == Label[i]) begin // else watch for bc 100 | Data[i] <= BCdata; 101 | Label[i] <= 0; 102 | end 103 | end 104 | end else begin 105 | if (!wbusy && i == lastBusyIndex) begin // WEN && issuable : queue must be available 106 | // Busy is also 1, so do not change 107 | Data[i] <= BCEN && BClabel == labelIn ? BCdata : dataIn; 108 | Label[i] <= BCEN && BClabel == labelIn ? 0 : labelIn; 109 | op[i] <= opIN; 110 | IdLabel[i] <= availableIdLabel; 111 | end else if (i < lastBusyIndex) begin // queue::pop() 112 | Data[i] <= BCEN && BClabel == Label[i+1]? BCdata : Data[i+1]; 113 | Label[i] <= BCEN && BClabel == Label[i+1] ? 0 : Label[i+1]; 114 | op[i] <= op[i+1]; 115 | IdLabel[i] <= IdLabel[i+1]; 116 | end 117 | end 118 | end else begin 119 | if (poppable) begin 120 | if (i == lastBusyIndex) begin //!Wen && issuable 121 | Busy[i] <= 0; 122 | Data[i] <= 0; 123 | Label[i] <= 0; 124 | op[i] <= 0; 125 | IdLabel[i] <= 0; 126 | end else if (i < lastBusyIndex) begin 127 | Busy[i] <= Busy[i+1]; 128 | Data[i] <= BCEN && BClabel == Label[i+1] ? BCdata : Data[i+1]; 129 | Label[i] <= BCEN && BClabel == Label[i+1] ? 0 : Label[i+1]; 130 | op[i] <= op[i+1]; 131 | IdLabel[i] <= IdLabel[i+1]; 132 | end 133 | end else begin //!WEN && !issuable 134 | if (BCEN && BClabel == Label[i]) begin 135 | Data[i] <= BCdata; 136 | Label[i] <= 0; 137 | end 138 | end 139 | end 140 | end 141 | end 142 | endgenerate 143 | endmodule -------------------------------------------------------------------------------- /source/RAM.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | // 信任提供地址和数据的模块,在内存未完成操作的时候,addr和data不改变 3 | module RAM( 4 | input clk, 5 | input [31:0] address, 6 | input [31:0] writeData, // [31:24], [23:16], [15:8], [7:0] 7 | input nRD, // 为0,正常读;为1,输出高组态 8 | input nWR, // 为0,写;为1,无操作 9 | output reg [31:0] Dataout, 10 | output reg readStatus, // 如果输出有效则为1 11 | output reg writeStatus, 12 | output isLastState 13 | ); 14 | integer R,W; 15 | assign isLastState = R == 9 || W == 9; //TODO 16 | initial begin 17 | R = 0; 18 | W = 0; 19 | readStatus = 0; 20 | writeStatus = 0; 21 | end 22 | reg [7:0] ram [0:60]; //存储器 23 | // 设置状态变量 24 | always@( negedge clk) begin 25 | if (R == 0) begin 26 | if (nRD == 0) begin 27 | R <= 1; 28 | end 29 | else begin // nRD == 1 30 | R <= 0; 31 | end 32 | end 33 | else if (R == 10) begin 34 | R <= 0; 35 | end 36 | else begin 37 | R <= R+1; 38 | end 39 | 40 | if (W == 0) begin 41 | if (nWR == 0) begin 42 | W <= 1; 43 | end 44 | else begin // nWR == 1 45 | W <= 0; 46 | end 47 | end 48 | else if (W == 10) begin 49 | W <= 0; 50 | end 51 | else begin 52 | W <= W+1; 53 | end 54 | end 55 | always@(*) begin 56 | // if (readStatus == 1) begin 57 | if (R == 10) begin 58 | Dataout[7:0] = ram[address + 3]; 59 | Dataout[15:8] = ram[address + 2]; 60 | Dataout[23:16] = ram[address + 1]; 61 | Dataout[31:24] = ram[address ]; 62 | readStatus = 1; 63 | end 64 | else begin 65 | readStatus = 0; 66 | end 67 | if( W == 1 ) begin 68 | ram[address] = writeData[31:24]; 69 | ram[address+1] = writeData[23:16]; 70 | ram[address+2] = writeData[15:8]; 71 | ram[address+3] = writeData[7:0]; 72 | end 73 | if (W == 10) begin 74 | writeStatus = 1; 75 | end 76 | else begin 77 | writeStatus = 0; 78 | end 79 | end 80 | endmodule -------------------------------------------------------------------------------- /source/ROM.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | module ROM ( 3 | input nrd, 4 | output reg [31:0] dataOut, 5 | input [31:0] addr 6 | ); 7 | 8 | reg [7:0] rom [0:99]; 9 | initial begin 10 | //$readmemb ("C:/Users/Administrator/Desktop/workplace/Tomasulo/rom/rom.mem", rom); 11 | $readmemb ("E:/code/Tomasulo/rom/rom.mem", rom); 12 | // $readmemb ("E:/code/Tomasulo/rom/testcase6.mem", rom); 13 | // $readmemb ("C:/Users/Administrator/Desktop/workplace/Tomasulo/rom/testcase5.mem", rom); 14 | // $readmemb ("C:/Users/Administrator/Desktop/workplace/Tomasulo/rom/rom.mem", rom); 15 | end 16 | always @(*) begin 17 | if (nrd == 0) begin 18 | dataOut[31:24] = rom[addr]; 19 | dataOut[23:16] = rom[addr+1]; 20 | dataOut[15:8] = rom[addr+2]; 21 | dataOut[7:0] = rom[addr+3]; 22 | end else begin 23 | dataOut[31:0] = {32{1'bz}}; 24 | end 25 | end 26 | endmodule -------------------------------------------------------------------------------- /source/RegFile.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | `include "head.v" 3 | module RegFile( 4 | input clk, 5 | input nRST, 6 | input [4:0] ReadAddr1, 7 | input [4:0] ReadAddr2, 8 | input RegWr, //labelEN 9 | input [4:0] WriteAddr, 10 | input [3:0] WriteLabel, 11 | output [31:0] DataOut1, 12 | output [31:0] DataOut2, 13 | output [3:0] LabelOut1, 14 | output [3:0] LabelOut2, 15 | input BCEN, 16 | input [3:0] BClabel, 17 | input [31:0] BCdata 18 | ); 19 | reg [31:0] regData[1:31]; 20 | reg [3:0] regLabel[1:31]; 21 | assign DataOut1 = (ReadAddr1 == 0) ? 0 : regData[ReadAddr1]; 22 | assign DataOut2 = (ReadAddr2 == 0) ? 0 : regData[ReadAddr2]; 23 | assign LabelOut1 = (ReadAddr1 == 0) ? 0 : regLabel[ReadAddr1]; 24 | assign LabelOut2 = (ReadAddr2 == 0) ? 0 : regLabel[ReadAddr2]; 25 | generate 26 | genvar i; 27 | for (i = 1; i < 32; i = i + 1) begin: regfile 28 | always @(posedge clk or negedge nRST) begin 29 | if (!nRST) begin 30 | regData[i] <= 32'b0; 31 | regLabel[i] <= 32'b0; 32 | end else begin 33 | if (RegWr && WriteAddr == i) begin 34 | regLabel[i] <= WriteLabel; // don't care whether WriteLabel is the same as BClabel. 35 | // Anyway, it is overriden by WriteLabel at last. 36 | end else if (BCEN && regLabel[i] == BClabel) begin 37 | regLabel[i] <= 5'b0; 38 | regData[i] <= BCdata; 39 | end 40 | end 41 | end 42 | end 43 | endgenerate 44 | endmodule -------------------------------------------------------------------------------- /source/ReservationStation.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "head.v" 3 | 4 | module ReservationStation( 5 | input clk, 6 | input nRST, 7 | input EXEable, // whether the ALU is available and ins can be issued 8 | input WEN, // Write ENable 9 | 10 | input [1:0] ResStationDst,// TODO: 11 | input [1:0] opCode, 12 | input [31:0] dataIn1, 13 | input [3:0] label1, 14 | input [31:0] dataIn2, 15 | input [3:0] label2, 16 | 17 | input BCEN, // BroadCast ENable 18 | input [3:0] BClabel, // BoradCast label 19 | input [31:0] BCdata, //BroadCast value 20 | 21 | output [1:0] opOut, 22 | output [31:0] dataOut1, 23 | output [31:0] dataOut2, 24 | output isFull, // whether the buffer is full 25 | output OutEn, // whether output is valid 26 | output [3:0] ready_labelOut, 27 | output [3:0] writeable_labelOut 28 | ); 29 | 30 | // 设置了三个保留站 31 | // 若使b2'11来索引,无效 32 | reg Busy[2:0]; 33 | reg [1:0]Op[2:0]; 34 | reg [3:0]Qj[2:0]; 35 | reg [31:0]Vj[2:0]; 36 | reg [3:0]Qk[2:0]; 37 | reg [31:0]Vk[2:0]; 38 | 39 | // 当前可写地址 ,2'b11则为不可�?? 40 | reg [1:0] cur_addr ; 41 | // 当前就绪地址,2'b11则为不可�?? 42 | reg [1:0] ready_addr ; 43 | initial begin 44 | Busy[0] = 0; 45 | Busy[1] = 0; 46 | Busy[2] = 0; 47 | end 48 | 49 | always@(posedge clk or negedge nRST) begin 50 | if (nRST == 0) begin 51 | Busy[0] <= 0; 52 | Busy[1] <= 0; 53 | Busy[2] <= 0; 54 | end 55 | else begin 56 | if (WEN == 1) begin 57 | if (cur_addr != 2'b11 && Busy[cur_addr] == 0) begin 58 | Busy[cur_addr] <= 1; 59 | Op[cur_addr] <= opCode; 60 | if (BCEN == 1 & label1 == BClabel) begin 61 | Qj[cur_addr] <= 0; 62 | Vj[cur_addr] <= BCdata; 63 | end 64 | else begin 65 | Qj[cur_addr] <= label1; 66 | Vj[cur_addr] <= dataIn1; 67 | end 68 | if (BCEN == 1 && label2 == BClabel) begin 69 | Qk[cur_addr] <= 0; 70 | Vk[cur_addr] <= BCdata; 71 | end 72 | else begin 73 | Qk[cur_addr] <= label2; 74 | Vk[cur_addr] <= dataIn2; 75 | end 76 | end 77 | // maybe generate latch 78 | end 79 | // watch CDB 80 | if (BCEN == 1 ) begin 81 | if (BClabel[3:2] == ResStationDst) begin 82 | Busy[BClabel[1:0]] <= 0; 83 | end 84 | if (Busy[0] == 1 && Qj[0] == BClabel) begin 85 | Vj[0] = BCdata; 86 | Qj[0] = 0; 87 | end 88 | if (Busy[1] == 1 && Qj[1] == BClabel) begin 89 | Vj[1] = BCdata; 90 | Qj[1] = 0; 91 | end 92 | if (Busy[2] == 1 && Qj[2] == BClabel) begin 93 | Vj[2] = BCdata; 94 | Qj[2] = 0; 95 | end 96 | if (Busy[0] == 1 && Qk[0] == BClabel) begin 97 | Vk[0] = BCdata; 98 | Qk[0] = 0; 99 | end 100 | if (Busy[1] == 1 && Qk[1] == BClabel) begin 101 | Vk[1] = BCdata; 102 | Qk[1] = 0; 103 | end 104 | if (Busy[2] == 1 && Qk[2] == BClabel) begin 105 | Vk[2] = BCdata; 106 | Qk[2] = 0; 107 | end 108 | end 109 | end 110 | end 111 | 112 | 113 | assign opOut = Op[ready_addr]; 114 | assign dataOut1 = Vj[ready_addr]; 115 | assign dataOut2 = Vk[ready_addr]; 116 | 117 | // 优先译码,使用组合�?�辑生成当前可写地址 118 | // 若为2'b11则不可写 119 | always@(*) begin 120 | if (Busy[0] == 0) begin 121 | cur_addr = 2'b00; 122 | end 123 | else if (Busy[1] == 0) begin 124 | cur_addr = 2'b01; 125 | end 126 | else if (Busy[2] == 0) begin 127 | cur_addr = 2'b10; 128 | end 129 | else 130 | cur_addr = 2'b11; 131 | end 132 | 133 | // 保留站是否满 134 | assign isFull = & cur_addr; 135 | 136 | // 是否就绪 137 | // 计算当前就绪地址,以及就绪状�?? 138 | always@(*)begin 139 | if (Busy[0] == 1 && Qj[0] == 0 && Qk[0] == 0) begin 140 | ready_addr = 2'b00; 141 | end 142 | else begin 143 | if(Busy[1] == 1 && Qj[1] == 0 && Qk[1] == 0) begin 144 | ready_addr = 2'b01; 145 | end 146 | else begin 147 | if (Busy[2] == 1 && Qj[2] == 0 && Qk[2] == 0 ) begin 148 | ready_addr = 2'b10; 149 | end 150 | else 151 | ready_addr = 2'b11; 152 | end 153 | end 154 | end 155 | 156 | assign OutEn = ~ (&ready_addr); 157 | 158 | assign ready_labelOut = {ResStationDst,ready_addr};// TODO: 159 | assign writeable_labelOut = {ResStationDst, cur_addr}; 160 | 161 | endmodule -------------------------------------------------------------------------------- /source/decoder.v: -------------------------------------------------------------------------------- 1 | `include "head.v" 2 | `timescale 1ns / 1ps 3 | module Decoder( 4 | input [31:0] ins, 5 | output [5:0] op, 6 | output [5:0] func, 7 | output [4:0] sftamt, 8 | output [4:0] rs, 9 | output [4:0] rt, 10 | output [4:0] rd, 11 | output [15:0] immd16, 12 | output [25:0] immd26 13 | ); 14 | assign op = ins[31:26]; 15 | assign func = ins[5:0]; 16 | assign sftamt = ins[10:6]; 17 | assign rs = ins[25:21]; 18 | assign rt = ins[20:16]; 19 | assign rd = ins[15:11]; 20 | assign immd16 = ins[15:0]; 21 | assign immd26 = ins[25:0]; 22 | endmodule -------------------------------------------------------------------------------- /source/dfALU.v: -------------------------------------------------------------------------------- 1 | //TODO NOT FINISHED 2 | `timescale 1ns/1ps 3 | `include "head.v" 4 | module dfState( 5 | input clk, 6 | input nRST, 7 | output reg stateOut, // to ALU 8 | output reg [5:0]cnt, 9 | input WEN, 10 | input requireAC, 11 | output available, 12 | output dfALUEN, // determine whether mdfALU should work 13 | output require, 14 | output keepLooping // send to dfALU 15 | ); 16 | assign available = (require && requireAC) || stateOut == `sIdle; 17 | assign mdfALUEN = available && WEN; 18 | assign require = stateOut == `sWorking && cnt[5]; 19 | always@(posedge clk or negedge nRST) begin 20 | if (!nRST) begin 21 | stateOut <= `sIdle; 22 | cnt <= 0; 23 | end else begin 24 | case(stateOut) 25 | `sIdle: 26 | if (WEN) 27 | stateOut <= `sWorking; 28 | `sWorking: 29 | if (cnt[5]) begin 30 | if (requireAC) begin 31 | stateOut <= WEN ? `sWorking : `sIdle; 32 | cnt <= 0; 33 | end 34 | end else begin 35 | cnt <= cnt + 1; 36 | end 37 | endcase 38 | end 39 | end 40 | endmodule 41 | -------------------------------------------------------------------------------- /source/head.v: -------------------------------------------------------------------------------- 1 | // ALUopcode 2 | `define ALUAdd 2'b00 3 | `define ALUSub 2'b01 4 | `define ALUAnd 2'b10 5 | `define ALUOr 2'b11 6 | 7 | `define ALUMultiple 1'b0 8 | `define ALUDivide 1'b1 9 | 10 | // ExtSel 11 | `define ZesroExd 1'b0 12 | `define SignExd 1'b1 13 | 14 | // PCSrc 15 | `define NextIns 2'b00 16 | `define RelJmp 2'b01 //relative jump 17 | `define AbsJmp 2'b10 //absolute jump 18 | `define RsJmp 2'b11 // Jump to Rs, by JR instrustion 19 | 20 | // for instruction 21 | // op code 22 | `define opRFormat 6'b000000 23 | `define opADD 6'b000000 24 | `define opSUB 6'b000000 25 | `define opAND 6'b000000 26 | `define opOR 6'b000000 27 | `define opSLL 6'b000000 28 | `define opSLT 6'b000000 29 | `define opJR 6'b000000 30 | `define opSLTI 6'b010001 31 | `define opADDI 6'b001000 32 | `define opORI 6'b001101 33 | `define opSW 6'b101011 34 | `define opLW 6'b100011 35 | `define opBEQ 6'b000100 36 | `define opBNE 6'b000101 37 | `define opBGTZ 6'b000111 38 | `define opJ 6'b000010 39 | `define opJAL 6'b011000 40 | `define opMULIU 6'b000000 41 | `define opDIVU 6'b000000 42 | `define opHALT 6'b111111 43 | // func code 44 | `define funcADD 6'b100000 45 | `define funcSUB 6'b100011 46 | `define funcAND 6'b100100 47 | `define funcOR 6'b100101 48 | `define funcSLL 6'b000000 49 | `define funcSLT 6'b101010 50 | `define funcJR 6'b000001 51 | `define funcMULU 6'b001011 52 | `define funcDIVU 6'b011011 53 | // ALU state 54 | `define sIdle 0 55 | `define sPremitiveIns 2'b01 56 | `define sInverse 2'b10 // for Inverse 57 | `define sMAdd 2'b11 // for Minus Add 58 | 59 | 60 | `define sMul32 3'b001 61 | `define sMul16 3'b010 62 | `define sMul8 3'b011 63 | `define sMul4 3'b100 64 | `define sMul2 3'b101 65 | `define sMulAnswer 3'b110 66 | 67 | `define sFPMatchExp 2'b01 68 | `define sFPSumUp 2'b10 69 | `define sFPNorm 2'b11 70 | 71 | `define sWorking 1'b1 72 | 73 | // for RAMStation 74 | `define opLoad 1'b1 75 | `define opStore 1'b0 76 | 77 | // Labels code 78 | // dd-dd 79 | // category - id 80 | `define ALU0 4'b00_00 81 | `define ALU1 4'b00_01 82 | `define ALU2 4'b00_10 83 | `define MUL0 4'b01_00 84 | `define MUL1 4'b01_01 85 | `define MUL2 4'b01_10 86 | `define DIV0 4'b10_01 87 | `define DIV1 4'b10_10 88 | `define DIV2 4'b10_11 89 | `define QUE0 4'b11_00 90 | `define QUE1 4'b11_01 91 | `define QUE2 4'b11_10 92 | 93 | // for ALUSel 94 | `define addsubALU 2'b00 95 | `define multipleALU 2'b01 96 | `define divideALU 2'b11 97 | 98 | // RegDst 99 | `define FromRd 1'b0 100 | `define FromRt 1'b1 101 | // vkSrc 102 | `define FromRtData 1'b0 103 | `define FromImmd 1'b1 -------------------------------------------------------------------------------- /source/mfALU.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "head.v" 3 | module mfState( 4 | input clk, 5 | input nRST, 6 | output reg [2:0] stateOut, // to ALU 7 | input WEN, 8 | input requireAC, 9 | output available, 10 | output mfALUEN, // determine whether mdfALU should work 11 | input [1:0] op, // do nothing 12 | output require 13 | ); 14 | assign available = (require && requireAC) || stateOut == `sIdle; 15 | assign mfALUEN = available && WEN; 16 | assign require = stateOut == `sMulAnswer; 17 | always@(posedge clk or negedge nRST) begin 18 | if (!nRST) begin 19 | stateOut <= `sIdle; 20 | end else begin 21 | case(stateOut) 22 | `sMulAnswer: 23 | if (requireAC) begin 24 | stateOut <= WEN ? `sMul32 : `sIdle; 25 | end 26 | `sIdle: 27 | if (WEN) 28 | stateOut <= `sMul32; 29 | default: 30 | stateOut <= stateOut + 1; 31 | endcase 32 | end 33 | end 34 | endmodule 35 | 36 | module mfALU( 37 | input clk, 38 | input nRST, 39 | input EN, // linked from state::mfALUEN 40 | input [31:0] dataIn1, 41 | input [31:0] dataIn2, 42 | input [2:0] state, 43 | input [3:0] labelIn, 44 | output reg [31:0] result, 45 | output reg [3:0] labelOut 46 | ); 47 | reg [31:0]temp32[0:31]; 48 | reg [31:0]temp16[0:15]; 49 | reg [31:0]temp8[0:7]; 50 | reg [31:0]temp4[0:3]; 51 | reg [31:0]temp2[0:1]; 52 | 53 | always@(posedge clk or negedge nRST) begin 54 | if (!nRST) begin 55 | labelOut <= 0; 56 | end else if (EN) begin 57 | labelOut <= labelIn; 58 | end 59 | end 60 | 61 | generate 62 | genvar i; 63 | for (i = 0; i <= 31; i=i+1) begin 64 | always@(posedge clk or negedge nRST) begin 65 | if (!nRST) begin 66 | temp32[i] <= 32'b0; 67 | end else if (EN) begin 68 | temp32[i] <= dataIn2[i] == 0 ? 0 : dataIn1 << i; 69 | end 70 | end 71 | end 72 | 73 | for (i = 0; i <= 15; i=i+1) begin 74 | always@(posedge clk or negedge nRST) begin 75 | if (!nRST) begin 76 | temp16[i] <= 32'b0; 77 | end else begin 78 | temp16[i] <= temp32[i] + temp32[i + 16]; 79 | end 80 | end 81 | end 82 | 83 | for (i = 0; i <= 7; i=i+1) begin 84 | always@(posedge clk or negedge nRST) begin 85 | if (!nRST) begin 86 | temp8[i] <= 32'b0; 87 | end else begin 88 | temp8[i] <= temp16[i] + temp16[i + 8]; 89 | end 90 | end 91 | end 92 | 93 | for (i = 0; i <= 3; i=i+1) begin 94 | always@(posedge clk or negedge nRST) begin 95 | if (!nRST) begin 96 | temp4[i] <= 32'b0; 97 | end else begin 98 | temp4[i] <= temp8[i] + temp8[i + 4]; 99 | end 100 | end 101 | end 102 | endgenerate 103 | always@(posedge clk or negedge nRST) begin 104 | if (!nRST) begin 105 | temp2[0] <= 32'b0; 106 | temp2[1] <= 32'b0; 107 | result <= 32'b0; 108 | end else begin 109 | temp2[0] <= temp4[0] + temp4[2]; 110 | temp2[1] <= temp4[1] + temp4[3]; 111 | result <= temp2[0] + temp2[1]; 112 | end 113 | end 114 | endmodule -------------------------------------------------------------------------------- /source/mux4to1_4.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | 3 | module mux4to1_4( 4 | input [1:0] sel, 5 | input [3:0] dataIn0, 6 | input [3:0] dataIn1, 7 | input [3:0] dataIn2, 8 | input [3:0] dataIn3, 9 | output reg[3:0] dataOut 10 | ); 11 | always@(*) begin 12 | case(sel) 13 | 2'b00: dataOut = dataIn0; 14 | 2'b01: dataOut = dataIn1; 15 | 2'b10: dataOut = dataIn2; 16 | 2'b11: dataOut = dataIn3; 17 | endcase 18 | end 19 | endmodule -------------------------------------------------------------------------------- /source/pmfALU.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "head.v" 3 | module pmfState( 4 | input clk, 5 | input nRST, 6 | output reg [1:0] stateOut, 7 | input WEN, 8 | input requireAC, 9 | output available, 10 | output pmfALUEN, // send to pmfALU as EN 11 | input [1:0]op, 12 | output require 13 | ); 14 | assign available = (require && requireAC) || stateOut == `sIdle; 15 | assign pmfALUEN = available && WEN; 16 | assign require = stateOut == `sPremitiveIns || stateOut == `sMAdd; 17 | always@(posedge clk or negedge nRST) begin 18 | if (!nRST) begin 19 | stateOut <= `sIdle; 20 | end else begin 21 | case (stateOut) 22 | `sIdle : 23 | if (WEN) 24 | stateOut <= op == `ALUSub ? `sInverse : `sPremitiveIns; 25 | `sPremitiveIns, `sMAdd : begin 26 | if (requireAC) begin 27 | // if (WEN) begin 28 | // stateOut <= op == `ALUSub ? `sInverse : `sPremitiveIns; 29 | // end else begin 30 | stateOut <= `sIdle; 31 | // end 32 | end 33 | end 34 | `sInverse: 35 | stateOut <= `sMAdd; 36 | endcase 37 | end 38 | end 39 | endmodule 40 | 41 | module pmfALU( 42 | input clk, 43 | input nRST, 44 | input EN, // linked from State::pmfALUEN 45 | input [31:0] dataIn1, 46 | input [31:0] dataIn2, 47 | input [1:0] state, 48 | input [1:0]op, 49 | output reg [31:0] result, 50 | input [3:0] labelIn, 51 | output reg [3:0] labelOut 52 | ); 53 | reg [31:0] data1_latch; 54 | reg [31:0] data2_latch; 55 | reg [31:0] inverseData2_latch; 56 | reg [1:0] op_latch; 57 | always@(posedge clk or negedge nRST) begin 58 | if (!nRST) begin 59 | data1_latch <= 32'b0; 60 | data2_latch <= 32'b0; 61 | inverseData2_latch <= 31'b0; 62 | end else begin 63 | if (EN) 64 | op_latch <= op; 65 | case (state) 66 | `sIdle, `sPremitiveIns, `sMAdd : 67 | if (EN) begin 68 | data1_latch <= dataIn1; 69 | data2_latch <= dataIn2; 70 | labelOut <= labelIn; 71 | end 72 | `sInverse : 73 | inverseData2_latch <= ~data2_latch + 1; 74 | endcase 75 | end 76 | end 77 | 78 | always@(*) begin 79 | case (op_latch) 80 | `ALUAdd : 81 | result = data1_latch + data2_latch; 82 | `ALUSub : 83 | result = data1_latch + inverseData2_latch; 84 | `ALUAnd : 85 | result = data1_latch & data2_latch; 86 | `ALUOr: 87 | result = data1_latch & data2_latch; 88 | default: 89 | result = 32'b0; 90 | endcase 91 | end 92 | endmodule 93 | -------------------------------------------------------------------------------- /source/top.v: -------------------------------------------------------------------------------- 1 | `include "head.v" 2 | `timescale 1ns/1ps 3 | module top( 4 | input clk, 5 | input nRST 6 | ); 7 | wire [5:0] op; 8 | //TODO:: not finished 9 | wire pcWrite = op == `opHALT ? 0 : 1; 10 | wire [1:0]sel = 0; 11 | //TODO END 12 | wire labelEN; 13 | wire [31:0] pc; 14 | wire [31:0] newpc; 15 | wire [31:0] ins; 16 | wire isFullOut; 17 | wire [5:0] func; 18 | wire [4:0] sftamt; 19 | wire [4:0] rs; 20 | wire [4:0] rt; 21 | wire [4:0] rd; 22 | wire [15:0] immd16; 23 | wire [25:0] immd26; 24 | wire [31:0] rsData; 25 | wire [31:0] rtData; 26 | wire [3:0] rsLabel; 27 | wire [3:0] rtLabel; 28 | wire BCEN; 29 | wire [31:0] BCdata; 30 | wire [3:0] BClabel; 31 | wire [3:0] alu_label; 32 | wire mul_EXEable; 33 | wire [1:0]mul_op; 34 | wire [31:0] mul_A; 35 | wire [31:0] mul_B; 36 | wire mul_isReady; 37 | wire [3:0] mul_label; 38 | wire mul_isfull; 39 | wire [31:0] mul_result; 40 | wire [3:0] mul_labelOut; 41 | wire RegDst; 42 | wire [1:0]ResStationDst; 43 | wire vkSrc; 44 | wire [3:0]queue_writeable_label; 45 | PC pc_instance( 46 | .clk(clk), 47 | .nRST(nRST), 48 | .newpc(newpc), 49 | .pcWrite(labelEN & pcWrite), 50 | .pc(pc) 51 | ); 52 | PCHelper pc_helper( 53 | .pc(pc), 54 | .immd16(immd16), 55 | .immd26(immd26), 56 | .sel(sel), 57 | .rs(0), // rs here is data 58 | .newpc 59 | ); 60 | ROM rom( 61 | .nrd(1'b0), 62 | .dataOut(ins), 63 | .addr(pc) 64 | ); 65 | Decoder decoder( 66 | .ins(ins), 67 | .op(op), 68 | .func(func), 69 | .sftamt(sftamt), 70 | .rs(rs), 71 | .rt(rt), 72 | .rd(rd), 73 | .immd16(immd16), 74 | .immd26(immd26) 75 | ); 76 | 77 | wire [3:0] cur_label; 78 | reg [4:0] writeDst; 79 | 80 | RegFile regfile( 81 | .clk(clk), 82 | .nRST(nRST), 83 | .ReadAddr1(rs), // TODO: 84 | .ReadAddr2(rt), 85 | .RegWr(labelEN), 86 | .WriteAddr(writeDst), 87 | .WriteLabel(cur_label), //TODO 88 | .DataOut1(rsData), 89 | .DataOut2(rtData), 90 | .LabelOut1(rsLabel), 91 | .LabelOut2(rtLabel), 92 | .BCEN(BCEN), 93 | .BClabel(BClabel), 94 | .BCdata(BCdata) 95 | ); 96 | 97 | 98 | // 假设已经搞定,译码完成,以下就是我想要的 99 | wire [3:0] ResStationEN;// 3,2,1,0 : lw,div,mul,alu 100 | wire [1:0] opcode;// updated by control_unit 101 | // wire [1:0] ResStationDst; // updated by control_unit 102 | wire [3:0] Qj; 103 | reg [3:0] Qk; 104 | wire [31:0] Vj; 105 | reg [31:0] Vk; 106 | // wire [31:0] Qi; 107 | // wire [31:0] A; 108 | //-------------------------- 109 | assign Qj = rsLabel; 110 | // assign Qk = rtLabel; 111 | assign Vj = rsData; 112 | //TODO : simplify this 113 | always@(*) begin 114 | if (vkSrc == `FromRtData) begin 115 | Vk = rtData; 116 | Qk = rtLabel; 117 | writeDst = rd; 118 | end 119 | else begin 120 | if (op == `opORI) begin 121 | Vk = {16'b0, immd16}; 122 | Qk = 4'b0000; 123 | writeDst = rt; 124 | end 125 | else begin 126 | Vk = {{16{immd16[15]}},immd16}; 127 | Qk = 4'b0000; 128 | writeDst = rt; 129 | end 130 | end 131 | end 132 | 133 | wire [3:0] alu_writeable_label; 134 | wire [3:0] mul_writeable_label; 135 | 136 | mux4to1_4 my_mux4to1_4( 137 | .sel((op == `opLW || op == `opSW) ? 2'b11 : ResStationDst), 138 | .dataIn0(alu_writeable_label), 139 | .dataIn1(mul_writeable_label), 140 | .dataIn2(4'b0), 141 | .dataIn3(queue_writeable_label), 142 | .dataOut(cur_label) 143 | ); 144 | 145 | //------------------------------- 146 | 147 | wire alu_EXEable; 148 | wire [1:0]alu_op; 149 | wire [31:0] alu_A; 150 | wire [31:0] alu_B; 151 | wire alu_isReady; 152 | wire alu_isfull; 153 | wire [31:0] alu_result; 154 | wire [3:0] alu_labelOut; 155 | 156 | ReservationStation alu_reservationstation( 157 | .clk(clk), 158 | .nRST(nRST), 159 | .EXEable(alu_EXEable), 160 | .WEN(ResStationEN[0]), 161 | .ResStationDst(2'b01), 162 | .opCode(opcode), 163 | .dataIn1(Vj), 164 | .label1(Qj), 165 | .dataIn2(Vk), 166 | .label2(Qk), 167 | .BCEN(BCEN), 168 | .BClabel(BClabel), 169 | .BCdata(BCdata), 170 | .opOut(alu_op), 171 | .dataOut1(alu_A), 172 | .dataOut2(alu_B), 173 | .isFull(alu_isfull), 174 | .OutEn(alu_isReady), 175 | .ready_labelOut(alu_label), 176 | .writeable_labelOut(alu_writeable_label) 177 | ); 178 | 179 | wire [1:0]pmfStateOut; 180 | wire pmfALUAvailable; 181 | wire pmfALUEN; 182 | wire pmfRequire; 183 | pmfState pmf_state( 184 | .clk(clk), 185 | .nRST(nRST), 186 | .stateOut(pmfStateOut), 187 | .WEN(alu_isReady), 188 | .requireAC(requireAC_s[0]), 189 | .available(alu_EXEable), 190 | .pmfALUEN(pmfALUEN), 191 | .op(alu_op), 192 | .require(require_s[0]) 193 | ); 194 | 195 | pmfALU pmf_alu( 196 | .clk(clk), 197 | .nRST(nRST), 198 | .op(alu_op), 199 | .EN(pmfALUEN), 200 | .dataIn1(alu_A), 201 | .dataIn2(alu_B), 202 | .labelIn(alu_label), 203 | .state(pmfStateOut), 204 | .result(alu_result), 205 | .labelOut(alu_labelOut) 206 | ); 207 | 208 | 209 | 210 | 211 | 212 | 213 | ReservationStation mul_reservationstation( 214 | .clk(clk), 215 | .nRST(nRST), 216 | .EXEable(mul_EXEable), 217 | .WEN(ResStationEN[1]), 218 | .ResStationDst(2'b10), 219 | .opCode(opcode), 220 | .dataIn1(Vj), 221 | .label1(Qj), 222 | .dataIn2(Vk), 223 | .label2(Qk), 224 | .BCEN, 225 | .BClabel, 226 | .BCdata, 227 | .opOut(mul_op), 228 | .dataOut1(mul_A), 229 | .dataOut2(mul_B), 230 | .isFull(mul_isfull), 231 | .OutEn(mul_isReady), 232 | .ready_labelOut(mul_label), 233 | .writeable_labelOut(mul_writeable_label) 234 | ); 235 | 236 | wire [2:0]mfStateOut; 237 | wire mfALUAvailable; 238 | wire mfALUEN; 239 | wire mfRequire; 240 | mfState mf_state( 241 | .clk, 242 | .nRST, 243 | .stateOut(mfStateOut), 244 | .WEN(mul_isReady), 245 | .requireAC(requireAC_s[1]), 246 | .available(mul_EXEable), 247 | .mfALUEN, 248 | .op(mul_op), 249 | .require(require_s[1]) 250 | ); 251 | 252 | mfALU mf_alu( 253 | .clk, 254 | .nRST, 255 | .EN(mfALUEN), 256 | .dataIn1(mul_A), 257 | .dataIn2(mul_B), 258 | .labelIn(mul_label), 259 | .state(mfStateOut), 260 | .result(mul_result), 261 | .labelOut(mul_labelOut) 262 | ); 263 | 264 | 265 | 266 | // wire div_EXEable; 267 | // wire div_op; 268 | // wire [31:0] div_A; 269 | // wire [31:0] div_B; 270 | // wire div_isReady; 271 | wire [3:0] div_label; 272 | // wire div_isfull; 273 | // wire [31:0] div_result; 274 | 275 | // ReservationStation div_reservationstation( 276 | // .clk(clk), 277 | // .nRST(nRST), 278 | // .EXEable(div_EXEable),// TODO: 279 | // .WEN(ResStationEN[2]), 280 | // .ResStationDst(ResStationDst), 281 | // .opCode(op), 282 | // .dataIn1(Vj), 283 | // .label1(Qj), 284 | // .dataIn2(Vk), 285 | // .label2(Qk), 286 | // .BCEN, 287 | // .BClabel, 288 | // .BCdata, 289 | // .opOut(div_op), 290 | // .dataOut1(div_A), 291 | // .DataOut2(div_B), 292 | // .isFull(div_isfull), 293 | // .OutEn(div_isReady), 294 | // .labelOut(div_label), 295 | // ); 296 | 297 | // wire [1:0]dfStateOut; 298 | // wire dfALUAvailable; 299 | // wire dfALUEN; 300 | // wire dfRequire; 301 | // dfState df_state( 302 | // .clk, 303 | // .nRST, 304 | // .stateOut(dfStateOut), 305 | // .WEN(div_isReady), 306 | // .requireAC(),// TODO: 307 | // .available(div_EXEable), 308 | // .dfALUEN, 309 | // .op(div_op), 310 | // .require()// TODO: 311 | // ); 312 | 313 | // dfALU df_alu( 314 | // .clk, 315 | // .nRST, 316 | // .EN(dfALUEN), 317 | // .dataIn1(div_A), 318 | // .dataIn2(div_B), 319 | // .state(dfStateOut), 320 | // .result(div_result) 321 | // ); 322 | 323 | 324 | // 3,2,1,0 ls, div ,mul ,alu 325 | wire [3:0] require_s; 326 | wire [3:0] requireAC_s; 327 | 328 | // test memory 329 | assign require_s[2] = 0; 330 | 331 | wire memory_available; 332 | wire QueueOp; 333 | 334 | wire RTOpOut; 335 | wire [31:0]RTDataOut; 336 | wire [3:0]RTLabelOut; 337 | wire queue_isfull; 338 | wire [2:0]queue_require; 339 | wire isLastState; //TODO 340 | 341 | Queue opprendRT_queue( 342 | .clk(clk), 343 | .nRST(nRST), 344 | .requireAC(memory_available), 345 | .WEN(ResStationEN[3]), 346 | .isFull(queue_isfull), 347 | .require(queue_require[0]), 348 | .dataIn(rtData), 349 | .labelIn(rtLabel), 350 | .opIN(QueueOp), 351 | .BCEN(BCEN), 352 | .BClabel(BClabel), 353 | .BCdata(BCdata), 354 | .opOut(RTOpOut), 355 | .dataOut(RTDataOut), 356 | .labelOut(RTLabelOut), 357 | .isLastState(isLastState), 358 | .queue_writeable_label(queue_writeable_label) 359 | ); 360 | wire ImmdOpOut; 361 | wire [31:0]ImmdDataOut; 362 | wire [3:0]ImmdLabelOut; 363 | Queue opprendImmd_queue( 364 | .clk(clk), 365 | .nRST(nRST), 366 | .requireAC(memory_available), 367 | .WEN(ResStationEN[3]), 368 | .isFull(), 369 | .require(queue_require[1]), 370 | .dataIn({{16{immd16[15]}},immd16}), // TODO: not generated 371 | .labelIn(4'b0), 372 | .opIN(QueueOp), 373 | .BCEN(BCEN), 374 | .BClabel(BClabel), 375 | .BCdata(BCdata), 376 | .opOut(ImmdOpOut), 377 | .dataOut(ImmdDataOut), 378 | .labelOut(ImmdLabelOut), 379 | .isLastState(isLastState), 380 | .queue_writeable_label() 381 | ); 382 | wire RSOpOut; 383 | wire [31:0]RSDataOut; 384 | wire [3:0]RSLabelOut; 385 | Queue opprendRS_queue( 386 | .clk(clk), 387 | .nRST(nRST), 388 | .requireAC(memory_available), 389 | .WEN(ResStationEN[3]), 390 | .isFull(), 391 | .require(queue_require[2]), 392 | .dataIn(rsData), 393 | .labelIn(rsLabel), 394 | .opIN(QueueOp), 395 | .BCEN(BCEN), 396 | .BClabel(BClabel), 397 | .BCdata(BCdata), 398 | .opOut(RSOpOut), 399 | .dataOut(RSDataOut), 400 | .labelOut(RSLabelOut), 401 | .isLastState(isLastState), 402 | .queue_writeable_label() 403 | ); 404 | wire [31:0]memory_loadData; 405 | wire memory_require_CDB; 406 | wire [3:0]memory_labelOut; 407 | Memory yf_memory( 408 | .clk(clk), 409 | .WEN(&queue_require), 410 | .dataIn1(RSDataOut), 411 | .dataIn2(ImmdDataOut), 412 | .op(RTOpOut), 413 | .writeData(RTDataOut), 414 | .loadData(memory_loadData), 415 | .available(memory_available), 416 | .require(require_s[3]), 417 | .requireAC(requireAC_s[3]), 418 | .labelIn(RTLabelOut), 419 | .labelOut(memory_labelOut), 420 | .isLastState(isLastState) 421 | ); 422 | 423 | 424 | 425 | 426 | CDBHelper cdb_helper( 427 | .requires(require_s), 428 | .accepts(requireAC_s) 429 | ); 430 | 431 | CDB cdb( 432 | .data0(alu_result), 433 | .label0(alu_labelOut), 434 | .data1(mul_result), 435 | .label1(mul_labelOut), 436 | // TODO: no link dfalu 437 | .data2(0), 438 | .label2(4'b0), 439 | .data3(memory_loadData), 440 | .label3(memory_labelOut), 441 | 442 | .sel(require_s), 443 | .dataOut(BCdata), 444 | .labelOut(BClabel), 445 | .EN(BCEN) 446 | ); 447 | 448 | CU contril_unit( 449 | .op, 450 | .func, 451 | .ALUop(opcode), 452 | .ALUSel(ResStationDst), 453 | .ResStationEN, 454 | .isFull({queue_isfull, mul_isfull, alu_isfull}), 455 | .isFullOut(isFullOut), 456 | .vkSrc, 457 | .RegDst, 458 | .QueueOp(QueueOp) 459 | ); 460 | 461 | assign labelEN = ~isFullOut; 462 | 463 | endmodule -------------------------------------------------------------------------------- /test/Memory_tb.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | 3 | module Memory_tb(); 4 | reg clk; 5 | reg outEn; 6 | reg [31:0] dataIn1;// Qj 7 | reg [31:0] dataIn2;// A 8 | reg op;// for example, 1 is load, 0 is write 9 | reg [31:0] writeData; 10 | reg requireAC; 11 | wire [31:0] loadData; 12 | wire available; 13 | wire requireCDB; 14 | 15 | initial begin 16 | requireAC = 0; 17 | clk = 0; 18 | outEn = 1; 19 | dataIn1 = 4; 20 | dataIn2 = 8; 21 | writeData = 32'h12345678; 22 | op = 0; 23 | #400 24 | requireAC = 1; 25 | dataIn1 = 4; 26 | dataIn2 = 8; 27 | writeData = 32'h12345678; 28 | op = 1; 29 | 30 | end 31 | 32 | always begin 33 | #10 34 | clk = ~clk; 35 | end 36 | 37 | Memory my_memory( 38 | .clk(clk), 39 | .WEN(outEn), 40 | .dataIn1(dataIn1), 41 | .dataIn2(dataIn2), 42 | .op(op), 43 | .writeData(writeData), 44 | .requireAC(requireAC), 45 | .loadData(loadData), 46 | .available(available), 47 | .require(requireCDB) 48 | ); 49 | 50 | 51 | 52 | endmodule -------------------------------------------------------------------------------- /test/Queue_tb.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "../source/head.v" 3 | module Queue_tb; 4 | reg clk = 0; 5 | reg nRST = 1; 6 | initial begin 7 | #1; 8 | nRST = 0; 9 | #1; 10 | nRST = 1; 11 | end 12 | always begin 13 | #5; 14 | clk = ~clk; 15 | end 16 | reg requireAC; 17 | reg WEN; 18 | wire isFull; 19 | wire require; 20 | reg [31:0]dataIn; 21 | reg [4:0] labelIn; 22 | reg opIN; 23 | reg BCEN; 24 | reg [4:0]BClabel; 25 | reg [31:0]BCdata; 26 | wire opOut; 27 | wire [31:0]dataOut; 28 | wire [31:0]labelOut; 29 | initial begin 30 | #3; 31 | requireAC = 0; 32 | WEN = 1; 33 | dataIn = 20; 34 | labelIn = 4; 35 | opIN = 0; 36 | BCEN = 0; 37 | #10; 38 | requireAC = 0; 39 | WEN = 1; 40 | dataIn = 30; 41 | labelIn = 5; 42 | #10; 43 | BCEN = 1; 44 | BClabel = 4; 45 | BCdata = 25; 46 | requireAC = 1; 47 | dataIn = 40; 48 | labelIn = 0; 49 | #10; 50 | BClabel = 5; 51 | BCdata = 1; 52 | dataIn = 30; 53 | labelIn = 2; 54 | #10; 55 | requireAC = 1; 56 | dataIn = 16; 57 | labelIn = 8; 58 | #10; 59 | dataIn = 1; 60 | labelIn = 2; 61 | #10; 62 | #10; 63 | BCEN = 1; 64 | BClabel = 2; 65 | BCdata = 10; 66 | #20; 67 | $finish; 68 | end 69 | Queue queue( 70 | .clk, 71 | .nRST, 72 | .requireAC, 73 | .WEN, 74 | .isFull, 75 | .require, 76 | .dataIn, 77 | .labelIn, 78 | .opIN, 79 | .BCEN, 80 | .BClabel, 81 | .BCdata, 82 | .opOut, 83 | .dataOut, 84 | .labelOut 85 | ); 86 | endmodule -------------------------------------------------------------------------------- /test/ReservationStation_tb.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns / 1ps 2 | // 测试发现问题: 3 | /* 4 | 1. 每一个上升沿一定会写(默认每次都会有新的指令)除非满 5 | 6 | */ 7 | module ReservationStation_tb(); 8 | reg clk; 9 | reg EXEable; // whether the ALU is available and ins can be issued 10 | reg WEN; // Write ENable 11 | 12 | reg [4:0] opCode; 13 | reg [4:0] func; 14 | reg [31:0] dataIn1; 15 | reg [4:0] label1; 16 | reg [31:0] dataIn2; 17 | reg [4:0] label2; 18 | 19 | reg BCEN; // BroadCast ENable 20 | reg [4:0] BClabel; // BoradCast label 21 | reg [31:0] BCdata; //BroadCast value 22 | 23 | wire [4:0] opOut; 24 | wire [31:0] dataOut1; 25 | wire [31:0] dataOut2; 26 | wire isFull; // whether the buffer is full 27 | wire OutEn; // whether output is valid 28 | wire [4:0]labelOut; 29 | initial begin 30 | clk = 0; 31 | EXEable = 0; 32 | opCode = 1; 33 | dataIn1 = 2; 34 | dataIn2 = 4; 35 | label1 = 2; 36 | label2 = 0; 37 | #40 38 | // EXEable = 1; 39 | opCode = 2; 40 | dataIn1 = 4; 41 | dataIn2 = 2; 42 | label1 = 0; 43 | label2 = 0; 44 | #40 45 | BCEN = 1; 46 | BClabel = 2; 47 | BCdata = 32; 48 | EXEable = 0; 49 | opCode = 3; 50 | dataIn1 = 8; 51 | dataIn2 = 16; 52 | label1 = 0; 53 | label2 = 2; 54 | #40 55 | EXEable = 1; 56 | opCode = 1; 57 | dataIn1 = 2; 58 | dataIn2 = 4; 59 | label1 = 0; 60 | label2 = 0; 61 | #40 62 | EXEable = 0; 63 | end 64 | 65 | always begin 66 | #20 67 | clk = ~clk; 68 | end 69 | 70 | ReservationStation test( 71 | .clk(clk), 72 | .EXEable(EXEable), 73 | .WEN(WEN), 74 | .opCode(opCode), 75 | .func(func), 76 | .dataIn1(dataIn1), 77 | .dataIn2(dataIn2), 78 | .label1(label1), 79 | .label2(label2), 80 | .BCEN(BCEN), 81 | .BClabel(BClabel), 82 | .BCdata(BCdata), 83 | .opOut(opOut), 84 | .dataOut1(dataOut1), 85 | .dataOut2(dataOut2), 86 | .isFull(isFull), 87 | .OutEn(OutEn), 88 | .labelOut(labelOut) 89 | ); 90 | 91 | 92 | endmodule -------------------------------------------------------------------------------- /test/mdfALU_tb.v: -------------------------------------------------------------------------------- 1 | `include "../source/head.v" 2 | `timescale 1ns/1ps 3 | module mdfALU_tb; 4 | reg clk = 0; 5 | reg nRST = 1; 6 | wire inEN; 7 | reg resultAC = 1; 8 | wire [31:0]result; 9 | wire [2:0] stateOut; 10 | initial begin 11 | #1; 12 | nRST = 0; 13 | #1; 14 | nRST = 1; 15 | end 16 | always begin 17 | #5; 18 | clk = ~clk; 19 | end 20 | reg [31:0]dataIn1 = 5; 21 | reg [31:0]dataIn2 = 10; 22 | 23 | mfState state( 24 | .clk(clk), 25 | .nRST(nRST), 26 | .stateOut(stateOut), 27 | .inEN(inEN), 28 | .resultAC(resultAC), 29 | .available(available), 30 | .mdfALUEN(inEN), 31 | .requireCDB(r) 32 | ); 33 | mfALU alu( 34 | .clk(clk), 35 | .nRST(nRST), 36 | .EN(inEN), 37 | .dataIn1(dataIn1), 38 | .dataIn2(dataIn2), 39 | .state(stateOut), 40 | .result(result) 41 | ); 42 | endmodule -------------------------------------------------------------------------------- /test/register_tb.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "../source/head.v" 3 | module RegFile_tb(); 4 | reg clk = 0; 5 | reg nRST = 1; 6 | initial begin 7 | #1; 8 | nRST = 0; 9 | #1; 10 | nRST = 1; 11 | end 12 | always begin 13 | #5; 14 | clk = ~clk; 15 | end 16 | reg [4:0] ReadAddr1; 17 | reg [4:0] ReadAddr2; 18 | reg RegWr; 19 | reg [4:0] WriteAddr; 20 | reg [4:0]WriteLabel; 21 | reg BCEN; 22 | reg [4:0]BClabel; 23 | reg [31:0]BCdata; 24 | wire [31:0] DataOut1; 25 | wire [31:0] DataOut2; 26 | wire [4:0] LabelOut1; 27 | wire [4:0] LabelOut2; 28 | initial begin 29 | #4; 30 | ReadAddr1 = 1; 31 | ReadAddr2 = 2; 32 | RegWr = 1; 33 | WriteAddr = 2; 34 | WriteLabel = 3; 35 | BCEN = 0; 36 | #10; 37 | ReadAddr1 = 2; 38 | ReadAddr2 = 4; 39 | RegWr = 0; 40 | BCEN = 1; 41 | BClabel = 3; 42 | BCdata = 10; 43 | end 44 | 45 | RegFile regfile( 46 | .clk, 47 | .nRST, 48 | .ReadAddr1, 49 | .ReadAddr2, 50 | .RegWr, 51 | .WriteAddr, 52 | .WriteLabel, 53 | .DataOut1, 54 | .DataOut2, 55 | .LabelOut1, 56 | .LabelOut2, 57 | .BCEN, 58 | .BClabel, 59 | .BCdata 60 | ); 61 | endmodule -------------------------------------------------------------------------------- /test/tomasulo_tb.v: -------------------------------------------------------------------------------- 1 | `include "../source/head.v" 2 | `timescale 1ns/1ps 3 | module tomasulo; 4 | reg clk = 0; 5 | reg nRST = 1; 6 | initial begin 7 | #1; 8 | nRST = 0; 9 | #1; 10 | nRST = 1; 11 | end 12 | always begin 13 | #5; 14 | clk = ~clk; 15 | end 16 | endmodule -------------------------------------------------------------------------------- /test/top_tb.v: -------------------------------------------------------------------------------- 1 | `timescale 1ns/1ps 2 | `include "../source/head.v" 3 | module top_tb; 4 | reg clk = 1; 5 | reg nRST = 1; 6 | initial begin 7 | #1; 8 | nRST = 0; 9 | #2; 10 | nRST = 1; 11 | #60; 12 | 13 | end 14 | always begin 15 | #5; 16 | clk = ~clk; 17 | end 18 | top top_( 19 | .clk(clk), 20 | .nRST(nRST) 21 | ); 22 | endmodule --------------------------------------------------------------------------------