├── .gitignore ├── .sv.style ├── LICENSE ├── Makefile ├── README.md ├── build.sc ├── docs ├── imgs │ ├── SoC.png │ ├── nagi.jpeg │ ├── nagicore-5-stages.drawio.png │ └── nagicore-dual.dual.drawio.png └── nagicore.drawio ├── nscscc ├── async.v ├── ram_wrapper.v ├── thinpad_top.v └── uart_wrapper.sv └── src └── main ├── resources └── sv │ ├── DPIC_PERF_BRU.sv │ ├── DPIC_PERF_BUFF.sv │ ├── DPIC_PERF_CACHE.sv │ ├── DPIC_PERF_PIPE.sv │ ├── DPIC_RAM_1CYC.sv │ ├── DPIC_RAM_2CYC.sv │ ├── DPIC_TRACE_MEM.sv │ ├── DPIC_TYPES_DEFINE.sv │ ├── DPIC_UPDATE_GPR.sv │ ├── DPIC_UPDATE_GPR2.sv │ ├── DPIC_UPDATE_PC.sv │ ├── DPIC_UPDATE_PC2.sv │ └── axi_cdc │ ├── axi_cdc.v │ ├── axi_cdc_rd.v │ └── axi_cdc_wr.v └── scala └── nagicore ├── Main.scala ├── bus ├── AXI4.scala └── RAM.scala ├── loongarch ├── ISA.scala ├── nscscc2024 │ ├── Config.scala │ ├── Core.scala │ ├── CtrlFlags.scala │ ├── Decoder.scala │ └── stages │ │ ├── EX.scala │ │ ├── ID.scala │ │ ├── IF.scala │ │ ├── MEM.scala │ │ └── PREIF.scala └── nscscc2024Dual │ ├── Config.scala │ ├── Core.scala │ ├── CtrlFlags.scala │ ├── Decoder.scala │ └── stages │ ├── EX.scala │ ├── ID.scala │ ├── IF.scala │ ├── IS.scala │ ├── MEM.scala │ └── PREIF.scala ├── unit ├── ALU.scala ├── BPU.scala ├── BRU.scala ├── DIVU.scala ├── DPIC.scala ├── GPR.scala ├── InstrsBuff.scala ├── MIAU.scala ├── MULU.scala ├── RingBuff.scala ├── cache │ ├── Cache.scala │ ├── CacheMini.scala │ ├── CachePiped.scala │ ├── CacheType.scala │ ├── CacheWT.scala │ └── UnCache.scala └── ip │ └── Xiangshan │ ├── ArrayMulDataModule.scala │ └── CSA.scala └── utils ├── Flags.scala └── utils.scala /.gitignore: -------------------------------------------------------------------------------- 1 | /.bloop 2 | /.metals 3 | /.scala-build 4 | /.vscode 5 | /out 6 | /diagram 7 | /.cache 8 | /build 9 | compile_commands.json 10 | *.backup 11 | /.idea 12 | *.iml 13 | # *.drawio 14 | *.dtmp 15 | *.bkp 16 | /test_run_dir 17 | /nscscc/nagicore 18 | 19 | 20 | # Xilinx Vivado 21 | *.jou 22 | *.log -------------------------------------------------------------------------------- /.sv.style: -------------------------------------------------------------------------------- 1 | # https://chipsalliance.github.io/verible/lint.html 2 | -package-filename 3 | -enum-name-style 4 | -struct-union-name-style 5 | -line-length=150 6 | -no-tabs=false 7 | -explicit-function-lifetime=false 8 | -parameter-name-style 9 | -generate-label=false 10 | -always-comb=false 11 | -explicit-parameter-storage-type=false 12 | -no-trailing-spaces=false -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BUILD_DIR = build 2 | SRC = $(shell find src/main/scala -name "*.scala") 3 | SRC += $(shell find src/main/resources/sv -name "*.sv" -or -name "*.v") 4 | 5 | TARGET = $(BUILD_DIR)/Core.v 6 | 7 | $(TARGET): $(SRC) 8 | -rm -rf $(BUILD_DIR) 9 | mill nagicore.run hello 10 | 11 | generate: $(TARGET) 12 | 13 | generate-nscscc: $(SRC) 14 | -rm -rf $(BUILD_DIR) 15 | mill nagicore.run NSCSCC 16 | rm -rf ./nscscc/nagicore 17 | mkdir -p ./nscscc/nagicore 18 | cp build/*.sv nscscc/nagicore 19 | 20 | test: generate 21 | xmake b diff 22 | xmake r diff 23 | 24 | wave: 25 | xmake r wave 26 | 27 | config: 28 | xmake f --menu 29 | 30 | clean: 31 | -rm -rf $(BUILD_DIR) 32 | 33 | intellij-init: 34 | mill mill.idea.GenIdea/idea 35 | 36 | .PHONY: intellij-init clean config wave -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 凪(Nagi) 顺序单/双发射LA32R处理器 2 | 3 |

4 | 5 |

6 | 7 | ## Intro 8 | 9 | 凪(**Nagi**,意为“风平浪静”)包含两个支持部分LoongArch32R指令集的处理器: 10 | - 名为**NagiCore**的顺序单发射五级流水线标量处理器 11 | - 名为**NagiDual**的顺序双发射六级流水线超标量处理器 12 | 13 | Nagi系列处理器都具有高度可配置的Cache(缓存)、BTB(分支预测)、多周期乘除法器等部件,并支持AXI4总线访问(包括突发传输),能够正确通过在仿真环境和FPGA上的所有等级测试和性能测试。 14 | 15 | Nagi系列处理器是使用Chisel进行开发的,与敏捷开发环境[NagiDev](https://github.com/MrAMS/Nagi)紧密联系,可基于Verilator进行完整仿真,同时也可生成龙芯杯所需的FPGA上板工程。 16 | 17 | Nagi系列处理器是为2024年[龙芯杯](http://www.nscscc.com/)个人赛设计的,在决赛现场设计了名为MIA的协处理器进行硬件加速,获得决赛第1名的成绩(`0.000s`),最终成绩为LoongArch指令集赛道全国第4名。 18 | 19 | ## 性能指标 20 | 21 | NagiCore性能一览 22 | 23 | | 测试程序 | 耗时(s) | 频率(MHz) | IPC | 分支预测准确率 | ICache命中率 | 24 | | ----------- | ------- | --------- | ---------- | -------------- | ------------ | 25 | | MATRIX | 0.117 | 198 | 0.40116772 | 0.994812547 | 0.999997993 | 26 | | CRYPTONIGHT | 0.214 | 198 | 0.5569556 | 0.999994914 | 0.99999922 | 27 | 28 | NagiDual性能一览 29 | 30 | | 测试程序 | 耗时(s) | 频率(MHz) | IPC | 分支预测准确率 | ICache命中率 | 31 | | ----------- | ------- | --------- | ---------- | -------------- | ------------ | 32 | | MATRIX | 0.132 | 162 | 0.43596125 | 0.994812547 | 0.999997562 | 33 | | CRYPTONIGHT | 0.230 | 162 | 0.63767296 | 0.999994914 | 0.999999003 | 34 | 35 | *注:IPC等性能指标是从计时器开始时记录的(即串口输出`0x06`后开始)* 36 | 37 | *注:仍有相当大的超频空间* 38 | 39 | ## CPU架构 40 | 41 | 42 |

43 | SoC架构图 44 |

45 | 46 | Nagi系列处理器的流水线采用各模块解耦的分布式控制。由于个人赛的性能测试程序(CRYPTONIGHT)对DCache极不友好,而且在高频率下访存代价非常大(采用四周期访存,因为SoC上的SRAM最高工作频率仅约50MHz),权衡之下,故均去除了DCache。 47 | 48 | NagiCore处理器采用五级流水线,包括预取指(PREIF)、取指(IF)、译码(ID)、执行(EX)、访存(MEM)五个阶段。 49 | 50 | ![NagiCore](./docs/imgs/nagicore-5-stages.drawio.png) 51 | 52 | NagiDual 处理器采用六级流水线,包括预取指(PREIF)、取指(IF)、译码(ID)、发射(IS)、执行(EX)、访存(MEM) 六个阶段。采用简单的非对称双发射结构以追求频率。 53 | 54 | ![NagiDual](./docs/imgs/nagicore-dual.dual.drawio.png) 55 | 56 | 更详尽的设计介绍请参见大赛设计报告`design.pdf`。 57 | 58 | -------------------------------------------------------------------------------- /build.sc: -------------------------------------------------------------------------------- 1 | // import Mill dependency 2 | import mill._ 3 | import mill.define.Sources 4 | import mill.modules.Util 5 | import mill.scalalib.TestModule.ScalaTest 6 | import scalalib._ 7 | // support BSP 8 | import mill.bsp._ 9 | 10 | import os.Path 11 | 12 | trait base extends SbtModule { m => 13 | override def millSourcePath = os.pwd 14 | override def scalaVersion = "2.13.12" 15 | override def scalacOptions = Seq( 16 | "-language:reflectiveCalls", 17 | "-deprecation", 18 | "-feature", 19 | "-Xcheckinit", 20 | ) 21 | override def ivyDeps = Agg( 22 | ivy"org.chipsalliance::chisel:5.1.0", 23 | ) 24 | override def scalacPluginIvyDeps = Agg( 25 | ivy"org.chipsalliance:::chisel-plugin:5.1.0", 26 | ) 27 | // object test extends SbtModuleTests with TestModule.ScalaTest { 28 | // override def ivyDeps = m.ivyDeps() ++ Agg( 29 | // ivy"org.scalatest::scalatest::3.2.16" 30 | // ) 31 | // } 32 | } 33 | 34 | object nagicore extends base 35 | -------------------------------------------------------------------------------- /docs/imgs/SoC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/SoC.png -------------------------------------------------------------------------------- /docs/imgs/nagi.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/nagi.jpeg -------------------------------------------------------------------------------- /docs/imgs/nagicore-5-stages.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/nagicore-5-stages.drawio.png -------------------------------------------------------------------------------- /docs/imgs/nagicore-dual.dual.drawio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/nagicore-dual.dual.drawio.png -------------------------------------------------------------------------------- /nscscc/async.v: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////// 2 | // RS-232 RX and TX module 3 | // (c) fpga4fun.com & KNJN LLC - 2003 to 2016 4 | 5 | // The RS-232 settings are fixed 6 | // TX: 8-bit data, 2 stop, no-parity 7 | // RX: 8-bit data, 1 stop, no-parity (the receiver can accept more stop bits of course) 8 | 9 | //`define SIMULATION // in this mode, TX outputs one bit per clock cycle 10 | // and RX receives one bit per clock cycle (for fast simulations) 11 | 12 | //////////////////////////////////////////////////////// 13 | 14 | module async_transmitter( 15 | input wire clk, 16 | input wire TxD_start, 17 | input wire [7:0] TxD_data, 18 | output wire TxD, 19 | output wire TxD_busy 20 | ); 21 | 22 | // Assert TxD_start for (at least) one clock cycle to start transmission of TxD_data 23 | // TxD_data is latched so that it doesn't have to stay valid while it is being sent 24 | 25 | parameter ClkFrequency = 25000000; // 25MHz 26 | parameter Baud = 115200; 27 | 28 | // generate 29 | // if(ClkFrequency> 1); 52 | 53 | case(TxD_state) 54 | 4'b0000: if(TxD_start) TxD_state <= 4'b0100; 55 | 4'b0100: if(BitTick) TxD_state <= 4'b1000; // start bit 56 | 4'b1000: if(BitTick) TxD_state <= 4'b1001; // bit 0 57 | 4'b1001: if(BitTick) TxD_state <= 4'b1010; // bit 1 58 | 4'b1010: if(BitTick) TxD_state <= 4'b1011; // bit 2 59 | 4'b1011: if(BitTick) TxD_state <= 4'b1100; // bit 3 60 | 4'b1100: if(BitTick) TxD_state <= 4'b1101; // bit 4 61 | 4'b1101: if(BitTick) TxD_state <= 4'b1110; // bit 5 62 | 4'b1110: if(BitTick) TxD_state <= 4'b1111; // bit 6 63 | 4'b1111: if(BitTick) TxD_state <= 4'b0010; // bit 7 64 | 4'b0010: if(BitTick) TxD_state <= 4'b0000; // stop1 65 | //4'b0011: if(BitTick) TxD_state <= 4'b0000; // stop2 66 | default: if(BitTick) TxD_state <= 4'b0000; 67 | endcase 68 | end 69 | 70 | assign TxD = (TxD_state<4) | (TxD_state[3] & TxD_shift[0]); // put together the start, data and stop bits 71 | endmodule 72 | 73 | 74 | //////////////////////////////////////////////////////// 75 | module async_receiver( 76 | input wire clk, 77 | input wire RxD, 78 | output reg RxD_data_ready, 79 | input wire RxD_clear, 80 | output reg [7:0] RxD_data // data received, valid only (for one clock cycle) when RxD_data_ready is asserted 81 | ); 82 | 83 | parameter ClkFrequency = 25000000; // 25MHz 84 | parameter Baud = 115200; 85 | 86 | parameter Oversampling = 8; // needs to be a power of 2 87 | // we oversample the RxD line at a fixed rate to capture each RxD data bit at the "right" time 88 | // 8 times oversampling by default, use 16 for higher quality reception 89 | 90 | // generate 91 | // if(ClkFrequency>log2) log2=log2+1; end endfunction 136 | localparam l2o = log2(Oversampling); 137 | reg [l2o-2:0] OversamplingCnt = 0; 138 | always @(posedge clk) if(OversamplingTick) OversamplingCnt <= (RxD_state==0) ? 1'd0 : OversamplingCnt + 1'd1; 139 | wire sampleNow = OversamplingTick && (OversamplingCnt==Oversampling/2-1); 140 | `endif 141 | 142 | // now we can accumulate the RxD bits in a shift-register 143 | always @(posedge clk) 144 | case(RxD_state) 145 | 4'b0000: if(~RxD_bit) RxD_state <= `ifdef SIMULATION 4'b1000 `else 4'b0001 `endif; // start bit found? 146 | 4'b0001: if(sampleNow) RxD_state <= 4'b1000; // sync start bit to sampleNow 147 | 4'b1000: if(sampleNow) RxD_state <= 4'b1001; // bit 0 148 | 4'b1001: if(sampleNow) RxD_state <= 4'b1010; // bit 1 149 | 4'b1010: if(sampleNow) RxD_state <= 4'b1011; // bit 2 150 | 4'b1011: if(sampleNow) RxD_state <= 4'b1100; // bit 3 151 | 4'b1100: if(sampleNow) RxD_state <= 4'b1101; // bit 4 152 | 4'b1101: if(sampleNow) RxD_state <= 4'b1110; // bit 5 153 | 4'b1110: if(sampleNow) RxD_state <= 4'b1111; // bit 6 154 | 4'b1111: if(sampleNow) RxD_state <= 4'b0010; // bit 7 155 | 4'b0010: if(sampleNow) RxD_state <= 4'b0000; // stop bit 156 | default: RxD_state <= 4'b0000; 157 | endcase 158 | 159 | always @(posedge clk) 160 | if(sampleNow && RxD_state[3]) RxD_data <= {RxD_bit, RxD_data[7:1]}; 161 | 162 | //reg RxD_data_error = 0; 163 | always @(posedge clk) 164 | begin 165 | if(RxD_clear) 166 | RxD_data_ready <= 0; 167 | else 168 | RxD_data_ready <= RxD_data_ready | (sampleNow && RxD_state==4'b0010 && RxD_bit); // make sure a stop bit is received 169 | //RxD_data_error <= (sampleNow && RxD_state==4'b0010 && ~RxD_bit); // error if a stop bit is not received 170 | end 171 | 172 | `ifdef SIMULATION 173 | assign RxD_idle = 0; 174 | `else 175 | reg [l2o+1:0] GapCnt = 0; 176 | always @(posedge clk) if (RxD_state!=0) GapCnt<=0; else if(OversamplingTick & ~GapCnt[log2(Oversampling)+1]) GapCnt <= GapCnt + 1'h1; 177 | assign RxD_idle = GapCnt[l2o+1]; 178 | always @(posedge clk) RxD_endofpacket <= OversamplingTick & ~GapCnt[l2o+1] & &GapCnt[l2o:0]; 179 | `endif 180 | 181 | endmodule 182 | 183 | 184 | //////////////////////////////////////////////////////// 185 | // dummy module used to be able to raise an assertion in Verilog 186 | module ASSERTION_ERROR(); 187 | endmodule 188 | 189 | 190 | //////////////////////////////////////////////////////// 191 | module BaudTickGen( 192 | input wire clk, enable, 193 | output wire tick // generate a tick at the specified baud rate * oversampling 194 | ); 195 | parameter ClkFrequency = 25000000; 196 | parameter Baud = 115200; 197 | parameter Oversampling = 1; 198 | 199 | function integer log2(input integer v); begin log2=0; while(v>>log2) log2=log2+1; end endfunction 200 | localparam AccWidth = log2(ClkFrequency/Baud)+8; // +/- 2% max timing error over a byte 201 | reg [AccWidth:0] Acc = 0; 202 | localparam ShiftLimiter = log2(Baud*Oversampling >> (31-AccWidth)); // this makes sure Inc calculation doesn't overflow 203 | localparam Inc = ((Baud*Oversampling << (AccWidth-ShiftLimiter))+(ClkFrequency>>(ShiftLimiter+1)))/(ClkFrequency>>ShiftLimiter); 204 | always @(posedge clk) if(enable) Acc <= Acc[AccWidth-1:0] + Inc[AccWidth:0]; else Acc <= Inc[AccWidth:0]; 205 | assign tick = Acc[AccWidth]; 206 | endmodule 207 | 208 | 209 | //////////////////////////////////////////////////////// 210 | -------------------------------------------------------------------------------- /nscscc/ram_wrapper.v: -------------------------------------------------------------------------------- 1 | module ram_wrapper( 2 | inout wire[31:0] ram_data, //RAM数据 3 | output wire[19:0] ram_addr, //RAM地址 4 | output wire[3:0] ram_be_n, //RAM字节使能,低有效。如果不使用字节使能,请保持为0 5 | output wire ram_ce_n, //RAM片选,低有效 6 | output wire ram_oe_n, //RAM读使能,低有效 7 | output wire ram_we_n, //RAM写使能,低有效 8 | 9 | output [31:0] io_sram_dout, 10 | input [19:0] io_sram_addr, 11 | input [31:0] io_sram_din, 12 | input io_sram_en, 13 | io_sram_re, 14 | io_sram_we, 15 | input [3:0] io_sram_wmask 16 | ); 17 | 18 | assign ram_addr = io_sram_addr; 19 | wire we = io_sram_en&&io_sram_we; 20 | wire re = !we; // 这样比io_sram_en&&io_sram_re时序要好点 21 | assign ram_be_n = we?~io_sram_wmask:0; 22 | assign ram_ce_n = 0; 23 | assign ram_oe_n = !re; 24 | assign ram_we_n = !we; 25 | assign ram_data = we ? io_sram_din : 32'dz; 26 | assign io_sram_dout = we ? 0 : ram_data; 27 | 28 | 29 | endmodule -------------------------------------------------------------------------------- /nscscc/thinpad_top.v: -------------------------------------------------------------------------------- 1 | `default_nettype wire 2 | 3 | module thinpad_top( 4 | input wire clk_50M, //50MHz 时钟输入 5 | input wire clk_11M0592, //11.0592MHz 时钟输入 6 | 7 | input wire clock_btn, //BTN5手动时钟按钮开关,带消抖电路,按下时为1 8 | input wire reset_btn, //BTN6手动复位按钮开关,带消抖电路,按下时为1 9 | 10 | input wire[3:0] touch_btn, //BTN1~BTN4,按钮开关,按下时为1 11 | input wire[31:0] dip_sw, //32位拨码开关,拨到"ON"时为1 12 | output wire[15:0] leds, //16位LED,输出时1点亮 13 | output wire[7:0] dpy0, //数码管低位信号,包括小数点,输出1点亮 14 | output wire[7:0] dpy1, //数码管高位信号,包括小数点,输出1点亮 15 | 16 | //CPLD串口控制器信号 17 | output wire uart_rdn, //读串口信号,低有效 18 | output wire uart_wrn, //写串口信号,低有效 19 | input wire uart_dataready, //串口数据准备好 20 | input wire uart_tbre, //发送数据标志 21 | input wire uart_tsre, //数据发送完毕标志 22 | 23 | //BaseRAM信号 24 | inout wire[31:0] base_ram_data, //BaseRAM数据,低8位与CPLD串口控制器共享 25 | output wire[19:0] base_ram_addr, //BaseRAM地址 26 | output wire[3:0] base_ram_be_n, //BaseRAM字节使能,低有效。如果不使用字节使能,请保持为0 27 | output wire base_ram_ce_n, //BaseRAM片选,低有效 28 | output wire base_ram_oe_n, //BaseRAM读使能,低有效 29 | output wire base_ram_we_n, //BaseRAM写使能,低有效 30 | 31 | //ExtRAM信号 32 | inout wire[31:0] ext_ram_data, //ExtRAM数据 33 | output wire[19:0] ext_ram_addr, //ExtRAM地址 34 | output wire[3:0] ext_ram_be_n, //ExtRAM字节使能,低有效。如果不使用字节使能,请保持为0 35 | output wire ext_ram_ce_n, //ExtRAM片选,低有效 36 | output wire ext_ram_oe_n, //ExtRAM读使能,低有效 37 | output wire ext_ram_we_n, //ExtRAM写使能,低有效 38 | 39 | //直连串口信号 40 | output wire txd, //直连串口发送端 41 | input wire rxd, //直连串口接收端 42 | 43 | //Flash存储器信号,参考 JS28F640 芯片手册 44 | output wire [22:0]flash_a, //Flash地址,a0仅在8bit模式有效,16bit模式无意义 45 | inout wire [15:0]flash_d, //Flash数据 46 | output wire flash_rp_n, //Flash复位信号,低有效 47 | output wire flash_vpen, //Flash写保护信号,低电平时不能擦除、烧写 48 | output wire flash_ce_n, //Flash片选信号,低有效 49 | output wire flash_oe_n, //Flash读使能信号,低有效 50 | output wire flash_we_n, //Flash写使能信号,低有效 51 | output wire flash_byte_n, //Flash 8bit模式选择,低有效。在使用flash的16位模式时请设为1 52 | 53 | //图像输出信号 54 | output wire[2:0] video_red, //红色像素,3位 55 | output wire[2:0] video_green, //绿色像素,3位 56 | output wire[1:0] video_blue, //蓝色像素,2位 57 | output wire video_hsync, //行同步(水平同步)信号 58 | output wire video_vsync, //场同步(垂直同步)信号 59 | output wire video_clk, //像素时钟输出 60 | output wire video_de //行数据有效信号,用于区分消隐区 61 | ); 62 | 63 | //assign leds = dip_sw[15:0]; 64 | 65 | wire [31:0] io_isram_dout; 66 | wire [19:0] io_isram_addr; 67 | wire [31:0] io_isram_din; 68 | wire io_isram_en; 69 | wire io_isram_re; 70 | wire io_isram_we; 71 | wire [3:0] io_isram_wmask; 72 | 73 | ram_wrapper iwrapper( 74 | .ram_data (base_ram_data), 75 | .ram_addr (base_ram_addr), 76 | .ram_be_n (base_ram_be_n), 77 | .ram_ce_n (base_ram_ce_n), 78 | .ram_oe_n (base_ram_oe_n), 79 | .ram_we_n (base_ram_we_n), 80 | 81 | .io_sram_dout (io_isram_dout), 82 | .io_sram_addr (io_isram_addr), 83 | .io_sram_din (io_isram_din), 84 | .io_sram_en (io_isram_en), 85 | .io_sram_re (io_isram_re), 86 | .io_sram_we (io_isram_we), 87 | .io_sram_wmask (io_isram_wmask) 88 | ); 89 | 90 | wire [31:0] io_dsram_dout; 91 | wire [19:0] io_dsram_addr; 92 | wire [31:0] io_dsram_din; 93 | wire io_dsram_en; 94 | wire io_dsram_we; 95 | wire io_dsram_re; 96 | wire [3:0] io_dsram_wmask; 97 | 98 | ram_wrapper dwrapper( 99 | .ram_data (ext_ram_data), 100 | .ram_addr (ext_ram_addr), 101 | .ram_be_n (ext_ram_be_n), 102 | .ram_ce_n (ext_ram_ce_n), 103 | .ram_oe_n (ext_ram_oe_n), 104 | .ram_we_n (ext_ram_we_n), 105 | 106 | .io_sram_dout (io_dsram_dout), 107 | .io_sram_addr (io_dsram_addr), 108 | .io_sram_din (io_dsram_din), 109 | .io_sram_en (io_dsram_en), 110 | .io_sram_re (io_dsram_re), 111 | .io_sram_we (io_dsram_we), 112 | .io_sram_wmask (io_dsram_wmask) 113 | 114 | ); 115 | 116 | wire io_uart_ar_ready; 117 | wire [7:0] io_uart_r_id; 118 | wire [1:0] io_uart_r_resp; 119 | wire [31:0] io_uart_r_data; 120 | wire io_uart_r_last; 121 | wire io_uart_r_valid; 122 | wire io_uart_aw_ready; 123 | wire io_uart_w_ready; 124 | wire [7:0] io_uart_b_id; 125 | wire [1:0] io_uart_b_resp; 126 | wire io_uart_b_valid; 127 | 128 | 129 | wire [7:0] io_uart_ar_id; 130 | wire [31:0] io_uart_ar_addr; 131 | wire [7:0] io_uart_ar_len; 132 | wire [2:0] io_uart_ar_size; 133 | wire [1:0] io_uart_ar_burst; 134 | wire io_uart_ar_valid; 135 | wire io_uart_r_ready; 136 | wire [7:0] io_uart_aw_id; 137 | wire [31:0] io_uart_aw_addr; 138 | wire [7:0] io_uart_aw_len; 139 | wire [2:0] io_uart_aw_size; 140 | wire [1:0] io_uart_aw_burst; 141 | wire io_uart_aw_valid; 142 | wire [31:0] io_uart_w_data; 143 | wire [3:0] io_uart_w_strb; 144 | wire io_uart_w_last, 145 | io_uart_w_valid, 146 | io_uart_b_ready; 147 | 148 | 149 | wire clk_cpu; 150 | wire clk_locked; 151 | reg rst_cpu; 152 | 153 | clk_wiz_0 clk_wiz_0_inst( 154 | .reset(reset_btn), 155 | .clk_50M(clk_50M), 156 | .clk_cpu(clk_cpu), 157 | .locked(clk_locked) 158 | ); 159 | 160 | always @(posedge clk_cpu or negedge clk_locked) begin 161 | if (~clk_locked) rst_cpu <= 1'b1; 162 | else rst_cpu <= 1'b0; 163 | end 164 | 165 | CoreNSCSCC core( 166 | .clock(clk_cpu), 167 | .reset(rst_cpu), 168 | .io_isram_dout(io_isram_dout), 169 | .io_dsram_dout(io_dsram_dout), 170 | .io_isram_addr(io_isram_addr), 171 | .io_isram_din(io_isram_din), 172 | .io_isram_en(io_isram_en), 173 | .io_isram_re(io_isram_re), 174 | .io_isram_we(io_isram_we), 175 | .io_isram_wmask(io_isram_wmask), 176 | .io_dsram_addr(io_dsram_addr), 177 | .io_dsram_din(io_dsram_din), 178 | .io_dsram_en(io_dsram_en), 179 | .io_dsram_re(io_dsram_re), 180 | .io_dsram_we(io_dsram_we), 181 | .io_dsram_wmask(io_dsram_wmask), 182 | 183 | .io_uart_ar_ready(io_uart_ar_ready), 184 | .io_uart_r_bits_id(io_uart_r_id), 185 | .io_uart_r_bits_resp(io_uart_r_resp), 186 | .io_uart_r_bits_data(io_uart_r_data), 187 | .io_uart_r_bits_last(io_uart_r_last), 188 | .io_uart_r_valid(io_uart_r_valid), 189 | .io_uart_aw_ready(io_uart_aw_ready), 190 | .io_uart_w_ready(io_uart_w_ready), 191 | .io_uart_b_bits_id(io_uart_b_id), 192 | .io_uart_b_bits_resp(io_uart_b_resp), 193 | .io_uart_b_valid(io_uart_b_valid), 194 | 195 | .io_uart_ar_bits_id(io_uart_ar_id), 196 | .io_uart_ar_bits_addr(io_uart_ar_addr), 197 | .io_uart_ar_bits_len(io_uart_ar_len), 198 | .io_uart_ar_bits_size(io_uart_ar_size), 199 | .io_uart_ar_bits_burst(io_uart_ar_burst), 200 | .io_uart_ar_valid(io_uart_ar_valid), 201 | .io_uart_r_ready(io_uart_r_ready), 202 | .io_uart_aw_bits_id(io_uart_aw_id), 203 | .io_uart_aw_bits_addr(io_uart_aw_addr), 204 | .io_uart_aw_bits_len(io_uart_aw_len), 205 | .io_uart_aw_bits_size(io_uart_aw_size), 206 | .io_uart_aw_bits_burst(io_uart_aw_burst), 207 | .io_uart_aw_valid(io_uart_aw_valid), 208 | .io_uart_w_bits_data(io_uart_w_data), 209 | .io_uart_w_bits_strb(io_uart_w_strb), 210 | .io_uart_w_bits_last(io_uart_w_last), 211 | .io_uart_w_valid(io_uart_w_valid), 212 | .io_uart_b_ready(io_uart_b_ready) 213 | ); 214 | 215 | uart_wrapper#( 216 | .clk_freq(162000000), 217 | .uart_baud(9600) 218 | ) uart( 219 | .clk(clk_cpu), 220 | .rst(reset_btn), 221 | 222 | .txd(txd), 223 | .rxd(rxd), 224 | 225 | .io_uart_ar_ready(io_uart_ar_ready), 226 | .io_uart_r_id(io_uart_r_id), 227 | .io_uart_r_resp(io_uart_r_resp), 228 | .io_uart_r_data(io_uart_r_data), 229 | .io_uart_r_last(io_uart_r_last), 230 | .io_uart_r_valid(io_uart_r_valid), 231 | .io_uart_aw_ready(io_uart_aw_ready), 232 | .io_uart_w_ready(io_uart_w_ready), 233 | .io_uart_b_id(io_uart_b_id), 234 | .io_uart_b_resp(io_uart_b_resp), 235 | .io_uart_b_valid(io_uart_b_valid), 236 | 237 | .io_uart_ar_id(io_uart_ar_id), 238 | .io_uart_ar_addr(io_uart_ar_addr), 239 | .io_uart_ar_len(io_uart_ar_len), 240 | .io_uart_ar_size(io_uart_ar_size), 241 | .io_uart_ar_burst(io_uart_ar_burst), 242 | .io_uart_ar_valid(io_uart_ar_valid), 243 | .io_uart_r_ready(io_uart_r_ready), 244 | .io_uart_aw_id(io_uart_aw_id), 245 | .io_uart_aw_addr(io_uart_aw_addr), 246 | .io_uart_aw_len(io_uart_aw_len), 247 | .io_uart_aw_size(io_uart_aw_size), 248 | .io_uart_aw_burst(io_uart_aw_burst), 249 | .io_uart_aw_valid(io_uart_aw_valid), 250 | .io_uart_w_data(io_uart_w_data), 251 | .io_uart_w_strb(io_uart_w_strb), 252 | .io_uart_w_last(io_uart_w_last), 253 | .io_uart_w_valid(io_uart_w_valid), 254 | .io_uart_b_ready(io_uart_b_ready) 255 | ); 256 | 257 | assign leds = dip_sw[15:0]; 258 | 259 | endmodule 260 | -------------------------------------------------------------------------------- /nscscc/uart_wrapper.sv: -------------------------------------------------------------------------------- 1 | module uart_wrapper # ( 2 | parameter clk_freq = 50000000, 3 | parameter uart_baud = 9600 4 | )( 5 | input wire clk, 6 | input wire rst, 7 | output io_uart_ar_ready, 8 | output [7:0] io_uart_r_id, 9 | output [1:0] io_uart_r_resp, 10 | output [31:0] io_uart_r_data, 11 | output io_uart_r_last, 12 | output io_uart_r_valid, 13 | output io_uart_aw_ready, 14 | output io_uart_w_ready, 15 | output [7:0] io_uart_b_id, 16 | output [1:0] io_uart_b_resp, 17 | output io_uart_b_valid, 18 | 19 | input [7:0] io_uart_ar_id, 20 | input [31:0] io_uart_ar_addr, 21 | input [7:0] io_uart_ar_len, 22 | input [2:0] io_uart_ar_size, 23 | input [1:0] io_uart_ar_burst, 24 | input io_uart_ar_valid, 25 | input io_uart_r_ready, 26 | input [7:0] io_uart_aw_id, 27 | input [31:0] io_uart_aw_addr, 28 | input [7:0] io_uart_aw_len, 29 | input [2:0] io_uart_aw_size, 30 | input [1:0] io_uart_aw_burst, 31 | input io_uart_aw_valid, 32 | input [31:0] io_uart_w_data, 33 | input [3:0] io_uart_w_strb, 34 | input io_uart_w_last, 35 | input io_uart_w_valid, 36 | input io_uart_b_ready, 37 | 38 | 39 | output wire txd, //直连串口发送端 40 | input wire rxd //直连串口接收端 41 | ); 42 | 43 | wire [7:0] ext_uart_rx; 44 | reg [7:0] ext_uart_tx; 45 | wire ext_uart_ready, ext_uart_clear, ext_uart_busy; 46 | reg ext_uart_start; 47 | 48 | 49 | reg [7:0] rid; 50 | reg [31:0] raddr; 51 | reg stater; 52 | 53 | wire ar_fire = io_uart_ar_valid && io_uart_ar_ready; 54 | wire r_fire = io_uart_r_valid && io_uart_r_ready; 55 | wire [7:0] uart_state = {6'b0, ext_uart_ready, !ext_uart_busy}; 56 | // 0xBFD003F8 -> rw data 57 | // 0xBFD003FC -> state 58 | wire read_state = raddr[2]; 59 | wire [7:0] rdata = read_state ? uart_state : ext_uart_rx; 60 | 61 | assign io_uart_ar_ready = !stater; 62 | assign io_uart_r_valid = (read_state ? 1 : ext_uart_ready) && stater; 63 | assign io_uart_r_data = {4{rdata}}; 64 | assign io_uart_r_id = rid; 65 | assign io_uart_r_last = io_uart_r_valid; 66 | assign io_uart_r_resp = 0; 67 | 68 | always @(posedge clk) begin 69 | if (rst) begin 70 | stater <= 0; 71 | end else begin 72 | if(!stater&&ar_fire) begin 73 | rid <= io_uart_ar_id; 74 | raddr <= io_uart_ar_addr; 75 | stater <= 1; 76 | end 77 | if(stater&&r_fire) begin 78 | stater <= 0; 79 | end 80 | end 81 | end 82 | 83 | 84 | async_receiver #(.ClkFrequency(clk_freq),.Baud(uart_baud)) //接收模块,9600无检验位 85 | ext_uart_r( 86 | .clk(clk), //外部时钟信号 87 | .RxD(rxd), //外部串行信号输入 88 | .RxD_data_ready(ext_uart_ready), //数据接收到标志 89 | .RxD_clear(ext_uart_clear), //清除接收标志 90 | .RxD_data(ext_uart_rx) //接收到的一字节数据 91 | ); 92 | 93 | assign ext_uart_clear = r_fire && !read_state; 94 | 95 | wire aw_fire = io_uart_aw_valid && io_uart_aw_ready; 96 | wire w_fire = io_uart_w_valid && io_uart_w_ready; 97 | wire b_fire = io_uart_b_valid && io_uart_b_ready; 98 | 99 | reg [7:0] wid; 100 | reg wb; 101 | 102 | // parameter WS_IDLE = 0; 103 | // parameter WS_W = 1; 104 | // parameter WS_B = 2; 105 | 106 | assign io_uart_aw_ready = 1; 107 | assign io_uart_w_ready = !ext_uart_busy; 108 | assign io_uart_b_id = wid; 109 | assign io_uart_b_valid = wb; 110 | assign io_uart_b_resp = 0; 111 | 112 | 113 | always @(posedge clk) begin 114 | if (rst) begin 115 | wb <= 0; 116 | end else begin 117 | if(aw_fire) begin 118 | wid <= io_uart_aw_id; 119 | end 120 | if(!wb&&w_fire) begin 121 | wb <= 1; 122 | end 123 | if(wb&&b_fire) begin 124 | wb <= 0; 125 | end 126 | end 127 | end 128 | 129 | always @(posedge clk) begin //将缓冲区ext_uart_buffer发送出去 130 | if(rst) begin 131 | ext_uart_tx <= 0; 132 | ext_uart_start <= 0; 133 | end else begin 134 | if(!ext_uart_busy&&w_fire)begin 135 | ext_uart_tx <= io_uart_w_data[7:0]; 136 | ext_uart_start <= 1; 137 | end else if(ext_uart_busy) begin 138 | ext_uart_start <= 0; 139 | end 140 | end 141 | end 142 | 143 | async_transmitter #(.ClkFrequency(clk_freq),.Baud(uart_baud)) //发送模块,9600无检验位 144 | ext_uart_t( 145 | .clk(clk), //外部时钟信号 146 | .TxD(txd), //串行信号输出 147 | .TxD_busy(ext_uart_busy), //发送器忙状态指示 148 | .TxD_start(ext_uart_start), //开始发送信号 149 | .TxD_data(ext_uart_tx) //待发送的数据 150 | ); 151 | 152 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_PERF_BRU.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_perf_bru(input `uint8_t fail); 3 | module DPIC_PERF_BRU #( 4 | parameter DATA_WIDTH = 32 5 | ) ( 6 | input wire clk, 7 | input wire rst, 8 | input wire valid, 9 | input wire [7:0] fail 10 | ); 11 | 12 | always @(posedge clk) begin 13 | if(!rst && valid) begin 14 | dpic_perf_bru(fail); 15 | end 16 | end 17 | 18 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_PERF_BUFF.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_update_instrs_buff(input `uint8_t id, input `uint8_t head, input `uint8_t tail, input `uint8_t full, input `uint8_t reload); 3 | module DPIC_PERF_BUFF #( 4 | parameter DATA_WIDTH = 32 5 | ) ( 6 | input wire clk, 7 | input wire rst, 8 | input wire [7:0] id, 9 | input wire [7:0] head, 10 | input wire [7:0] tail, 11 | input wire [7:0] full, 12 | input wire [7:0] reload 13 | ); 14 | 15 | always @(posedge clk) begin 16 | if(!rst) begin 17 | dpic_update_instrs_buff(id, head, tail, full, reload); 18 | end 19 | end 20 | 21 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_PERF_CACHE.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_perf_cache(input `uint8_t id, input `uint8_t access_type); 3 | module DPIC_PERF_CACHE #( 4 | parameter DATA_WIDTH = 32 5 | ) ( 6 | input wire clk, 7 | input wire rst, 8 | input wire valid, 9 | input wire [7:0] id, 10 | input wire [7:0] access_type 11 | 12 | ); 13 | 14 | always @(posedge clk) begin 15 | if(!rst && valid) begin 16 | dpic_perf_cache(id, access_type); 17 | end 18 | end 19 | 20 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_PERF_PIPE.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_perf_pipe(input `uint8_t id, input `uint8_t valid, input `uint8_t stall); 3 | module DPIC_PERF_PIPE #( 4 | parameter DATA_WIDTH = 32 5 | ) ( 6 | input wire clk, 7 | input wire rst, 8 | input wire [7:0] id, 9 | input wire invalid, 10 | input wire stall 11 | ); 12 | 13 | always @(posedge clk) begin 14 | if(!rst) begin 15 | dpic_perf_pipe(id, {7'b0, invalid}, {7'b0, stall}); 16 | end 17 | end 18 | 19 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_RAM_1CYC.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | // import "DPI-C" function void dpic_bus_read(input `uint32_t addr, input `uint8_t size, output `uint32_t rdata); 3 | // import "DPI-C" function void dpic_bus_write(input `uint32_t addr, input `uint8_t wmask, input `uint32_t wdata); 4 | 5 | module DPIC_RAM_1CYC #( 6 | parameter ADDR_WIDTH = 32, 7 | parameter DATA_WIDTH = 32 8 | ) ( 9 | input wire clk, 10 | input wire rst, 11 | input wire en, 12 | input wire [ADDR_WIDTH-1:0] addr, 13 | input wire re, 14 | input wire we, 15 | input wire [DATA_WIDTH/8-1:0] wmask, 16 | input wire [1:0] size, 17 | input wire [DATA_WIDTH-1:0] wdata, 18 | output reg [DATA_WIDTH-1:0] rdata 19 | ); 20 | 21 | always @(*) begin 22 | rdata = 0; 23 | if (en&&!rst) begin 24 | if(we) begin 25 | dpic_bus_write({{32-ADDR_WIDTH{1'b0}}, addr}, {{8-DATA_WIDTH/8{1'b0}}, wmask}, wdata); 26 | end else if(re) begin 27 | dpic_bus_read({{32-ADDR_WIDTH{1'b0}}, addr}, {6'b0, size}, rdata); 28 | end 29 | end 30 | end 31 | 32 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_RAM_2CYC.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_bus_read(input `uint32_t addr, input `uint8_t size, output `uint32_t rdata); 3 | import "DPI-C" function void dpic_bus_write(input `uint32_t addr, input `uint8_t wmask, input `uint32_t wdata); 4 | 5 | module DPIC_RAM_2CYC #( 6 | parameter ADDR_WIDTH = 32, 7 | parameter DATA_WIDTH = 32 8 | ) ( 9 | input wire clk, 10 | input wire rst, 11 | input wire en, 12 | input wire [ADDR_WIDTH-1:0] addr, 13 | input wire re, 14 | input wire we, 15 | input wire [DATA_WIDTH/8-1:0] wmask, 16 | input wire [1:0] size, 17 | input wire [DATA_WIDTH-1:0] wdata, 18 | output reg [DATA_WIDTH-1:0] rdata 19 | ); 20 | wire [DATA_WIDTH-1:0] rdata_wire; 21 | 22 | 23 | 24 | always @(posedge clk) begin 25 | if (rst) begin 26 | rdata <= 0; 27 | end else begin 28 | if (en) begin 29 | if(we) begin 30 | dpic_bus_write({{32-ADDR_WIDTH{1'b0}}, addr}, {{8-DATA_WIDTH/8{1'b0}}, wmask}, wdata); 31 | end else if(re) begin 32 | dpic_bus_read({{32-ADDR_WIDTH{1'b0}}, addr}, {6'b0, size}, rdata_wire); 33 | rdata <= rdata_wire; 34 | end 35 | end 36 | end 37 | end 38 | 39 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_TRACE_MEM.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_trace_mem(input `uint32_t addr, input `uint8_t size, input `uint32_t data, input `uint8_t wmask); 3 | module DPIC_TRACE_MEM #( 4 | parameter ADDR_WIDTH = 32, 5 | parameter DATA_WIDTH = 32 6 | ) ( 7 | input wire clk, 8 | input wire rst, 9 | input wire valid, 10 | input wire [ADDR_WIDTH-1:0] addr, 11 | input wire [DATA_WIDTH/8-1:0] wmask, 12 | input wire [1:0] size, 13 | input wire [ADDR_WIDTH-1:0] data 14 | ); 15 | 16 | always @(posedge clk) begin 17 | if(!rst && valid) begin 18 | dpic_trace_mem({{32-ADDR_WIDTH{1'b0}}, addr}, {6'b0, size}, {{32-DATA_WIDTH{1'b0}}, data}, {{8-DATA_WIDTH/8{1'b0}}, wmask}); 19 | end 20 | end 21 | 22 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_TYPES_DEFINE.sv: -------------------------------------------------------------------------------- 1 | // typedef byte uint8_t; 2 | // typedef shortint uint16_t; 3 | // typedef int uint32_t; 4 | // typedef longint uint64_t; 5 | 6 | `define uint8_t byte unsigned 7 | `define uint16_t shortint unsigned 8 | `define uint32_t int unsigned 9 | `define uint64_t longint unsigned 10 | -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_UPDATE_GPR.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_update_gpr(input `uint8_t id, input `uint32_t value); 3 | 4 | module DPIC_UPDATE_GPR #( 5 | parameter GPR_NUM = 32, 6 | parameter DATA_WIDTH = 32 7 | ) ( 8 | input wire clk, 9 | input wire rst, 10 | input wire [$clog2(GPR_NUM)-1:0] id, 11 | input wire wen, 12 | input wire [DATA_WIDTH-1:0] wdata 13 | ); 14 | 15 | always @(posedge clk) begin 16 | if(!rst && id!=0 && wen) begin 17 | dpic_update_gpr({{8-$clog2(GPR_NUM){1'b0}}, id}, wdata); 18 | end 19 | end 20 | 21 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_UPDATE_GPR2.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_update_gpr(input `uint8_t id, input `uint32_t value); 3 | 4 | module DPIC_UPDATE_GPR2 #( 5 | parameter GPR_NUM = 32, 6 | parameter DATA_WIDTH = 32 7 | ) ( 8 | input wire clk, 9 | input wire rst, 10 | input wire [$clog2(GPR_NUM)-1:0] id1, 11 | input wire wen1, 12 | input wire [DATA_WIDTH-1:0] wdata1, 13 | input wire [$clog2(GPR_NUM)-1:0] id2, 14 | input wire wen2, 15 | input wire [DATA_WIDTH-1:0] wdata2 16 | ); 17 | 18 | always @(posedge clk) begin 19 | if(!rst && id1!=0 && wen1) begin 20 | dpic_update_gpr({{8-$clog2(GPR_NUM){1'b0}}, id1}, wdata1); 21 | end 22 | if(!rst && id2!=0 && wen2) begin 23 | dpic_update_gpr({{8-$clog2(GPR_NUM){1'b0}}, id2}, wdata2); 24 | end 25 | end 26 | 27 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_UPDATE_PC.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_update_pc(input `uint32_t value); 3 | module DPIC_UPDATE_PC #( 4 | parameter DATA_WIDTH = 32 5 | ) ( 6 | input wire clk, 7 | input wire rst, 8 | input wire wen, 9 | input wire [DATA_WIDTH-1:0] pc 10 | ); 11 | 12 | always @(posedge clk) begin 13 | if(!rst && wen) begin 14 | dpic_update_pc(pc); 15 | end 16 | end 17 | 18 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/DPIC_UPDATE_PC2.sv: -------------------------------------------------------------------------------- 1 | `include "DPIC_TYPES_DEFINE.sv" 2 | import "DPI-C" function void dpic_update_pc2(input `uint32_t pc1, input `uint8_t valid1, input `uint32_t pc2, input `uint8_t valid2); 3 | module DPIC_UPDATE_PC2 #( 4 | parameter DATA_WIDTH = 32 5 | ) ( 6 | input wire clk, 7 | input wire rst, 8 | input wire wen1, 9 | input wire [DATA_WIDTH-1:0] pc1, 10 | input wire wen2, 11 | input wire [DATA_WIDTH-1:0] pc2 12 | ); 13 | 14 | always @(posedge clk) begin 15 | if(!rst) begin 16 | dpic_update_pc2(pc1, {7'b0, wen1}, pc2, {7'b0, wen2}); 17 | end 18 | end 19 | 20 | endmodule -------------------------------------------------------------------------------- /src/main/resources/sv/axi_cdc/axi_cdc.v: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Alex Forencich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | */ 24 | 25 | // Language: Verilog 2001 26 | 27 | `resetall 28 | `timescale 1ns / 1ps 29 | `default_nettype none 30 | 31 | /* 32 | * AXI4 clock domain crossing module 33 | */ 34 | module axi_cdc # 35 | ( 36 | // Width of data bus in bits 37 | parameter DATA_WIDTH = 32, 38 | // Width of address bus in bits 39 | parameter ADDR_WIDTH = 32, 40 | // Width of wstrb (width of data bus in words) 41 | parameter STRB_WIDTH = (DATA_WIDTH/8), 42 | // Width of ID 43 | parameter ID_WIDTH = 4 44 | ) 45 | ( 46 | /* 47 | * AXI4 slave interface 48 | */ 49 | input wire s_clk, 50 | input wire s_rst, 51 | input wire [ADDR_WIDTH-1:0] s_axi_awaddr, 52 | input wire [ID_WIDTH-1:0] s_axi_awid, 53 | input wire [8-1:0] s_axi_awlen, 54 | input wire [3-1:0] s_axi_awsize, 55 | input wire [2-1:0] s_axi_awburst, 56 | input wire [2:0] s_axi_awprot, 57 | input wire s_axi_awvalid, 58 | output wire s_axi_awready, 59 | input wire [DATA_WIDTH-1:0] s_axi_wdata, 60 | input wire [STRB_WIDTH-1:0] s_axi_wstrb, 61 | input wire s_axi_wlast, 62 | input wire s_axi_wvalid, 63 | output wire s_axi_wready, 64 | output wire [ID_WIDTH-1:0] s_axi_bid, 65 | output wire [1:0] s_axi_bresp, 66 | output wire s_axi_bvalid, 67 | input wire s_axi_bready, 68 | input wire [ADDR_WIDTH-1:0] s_axi_araddr, 69 | input wire [ID_WIDTH-1:0] s_axi_arid, 70 | input wire [8-1:0] s_axi_arlen, 71 | input wire [3-1:0] s_axi_arsize, 72 | input wire [2-1:0] s_axi_arburst, 73 | input wire [2:0] s_axi_arprot, 74 | input wire s_axi_arvalid, 75 | output wire s_axi_arready, 76 | output wire [DATA_WIDTH-1:0] s_axi_rdata, 77 | output wire [ID_WIDTH-1:0] s_axi_rid, 78 | output wire s_axi_rlast, 79 | output wire [1:0] s_axi_rresp, 80 | output wire s_axi_rvalid, 81 | input wire s_axi_rready, 82 | 83 | /* 84 | * AXI4 master interface 85 | */ 86 | input wire m_clk, 87 | input wire m_rst, 88 | output wire [ADDR_WIDTH-1:0] m_axi_awaddr, 89 | output wire [ID_WIDTH-1:0] m_axi_awid, 90 | output wire [8-1:0] m_axi_awlen, 91 | output wire [3-1:0] m_axi_awsize, 92 | output wire [2-1:0] m_axi_awburst, 93 | output wire [2:0] m_axi_awprot, 94 | output wire m_axi_awvalid, 95 | input wire m_axi_awready, 96 | output wire [DATA_WIDTH-1:0] m_axi_wdata, 97 | output wire [STRB_WIDTH-1:0] m_axi_wstrb, 98 | output wire m_axi_wlast, 99 | output wire m_axi_wvalid, 100 | input wire m_axi_wready, 101 | input wire [ID_WIDTH-1:0] m_axi_bid, 102 | input wire [1:0] m_axi_bresp, 103 | input wire m_axi_bvalid, 104 | output wire m_axi_bready, 105 | output wire [ADDR_WIDTH-1:0] m_axi_araddr, 106 | output wire [ID_WIDTH-1:0] m_axi_arid, 107 | output wire [8-1:0] m_axi_arlen, 108 | output wire [3-1:0] m_axi_arsize, 109 | output wire [2-1:0] m_axi_arburst, 110 | output wire [2:0] m_axi_arprot, 111 | output wire m_axi_arvalid, 112 | input wire m_axi_arready, 113 | input wire [DATA_WIDTH-1:0] m_axi_rdata, 114 | input wire [ID_WIDTH-1:0] m_axi_rid, 115 | input wire m_axi_rlast, 116 | input wire [1:0] m_axi_rresp, 117 | input wire m_axi_rvalid, 118 | output wire m_axi_rready 119 | ); 120 | 121 | axi_cdc_wr #( 122 | .DATA_WIDTH(DATA_WIDTH), 123 | .ADDR_WIDTH(ADDR_WIDTH), 124 | .STRB_WIDTH(STRB_WIDTH) 125 | ) 126 | axi_cdc_wr_inst ( 127 | /* 128 | * AXI4 slave interface 129 | */ 130 | .s_clk(s_clk), 131 | .s_rst(s_rst), 132 | .s_axi_awaddr(s_axi_awaddr), 133 | .s_axi_awid(s_axi_awid), 134 | .s_axi_awlen(s_axi_awlen), 135 | .s_axi_awsize(s_axi_awsize), 136 | .s_axi_awburst(s_axi_awburst), 137 | .s_axi_awprot(s_axi_awprot), 138 | .s_axi_awvalid(s_axi_awvalid), 139 | .s_axi_awready(s_axi_awready), 140 | .s_axi_wdata(s_axi_wdata), 141 | .s_axi_wstrb(s_axi_wstrb), 142 | .s_axi_wlast(s_axi_wlast), 143 | .s_axi_wvalid(s_axi_wvalid), 144 | .s_axi_wready(s_axi_wready), 145 | .s_axi_bid(s_axi_bid), 146 | .s_axi_bresp(s_axi_bresp), 147 | .s_axi_bvalid(s_axi_bvalid), 148 | .s_axi_bready(s_axi_bready), 149 | 150 | /* 151 | * AXI4 master interface 152 | */ 153 | .m_clk(m_clk), 154 | .m_rst(m_rst), 155 | .m_axi_awaddr(m_axi_awaddr), 156 | .m_axi_awid(m_axi_awid), 157 | .m_axi_awlen(m_axi_awlen), 158 | .m_axi_awsize(m_axi_awsize), 159 | .m_axi_awburst(m_axi_awburst), 160 | .m_axi_awprot(m_axi_awprot), 161 | .m_axi_awvalid(m_axi_awvalid), 162 | .m_axi_awready(m_axi_awready), 163 | .m_axi_wdata(m_axi_wdata), 164 | .m_axi_wstrb(m_axi_wstrb), 165 | .m_axi_wlast(m_axi_wlast), 166 | .m_axi_wvalid(m_axi_wvalid), 167 | .m_axi_wready(m_axi_wready), 168 | .m_axi_bid(m_axi_bid), 169 | .m_axi_bresp(m_axi_bresp), 170 | .m_axi_bvalid(m_axi_bvalid), 171 | .m_axi_bready(m_axi_bready) 172 | ); 173 | 174 | axi_cdc_rd #( 175 | .DATA_WIDTH(DATA_WIDTH), 176 | .ADDR_WIDTH(ADDR_WIDTH), 177 | .STRB_WIDTH(STRB_WIDTH) 178 | ) 179 | axi_cdc_rd_inst ( 180 | /* 181 | * AXI4 slave interface 182 | */ 183 | .s_clk(s_clk), 184 | .s_rst(s_rst), 185 | .s_axi_araddr(s_axi_araddr), 186 | .s_axi_arid(s_axi_arid), 187 | .s_axi_arlen(s_axi_arlen), 188 | .s_axi_arsize(s_axi_arsize), 189 | .s_axi_arburst(s_axi_arburst), 190 | .s_axi_arprot(s_axi_arprot), 191 | .s_axi_arvalid(s_axi_arvalid), 192 | .s_axi_arready(s_axi_arready), 193 | .s_axi_rdata(s_axi_rdata), 194 | .s_axi_rid(s_axi_rid), 195 | .s_axi_rlast(s_axi_rlast), 196 | .s_axi_rresp(s_axi_rresp), 197 | .s_axi_rvalid(s_axi_rvalid), 198 | .s_axi_rready(s_axi_rready), 199 | 200 | /* 201 | * AXI4 master interface 202 | */ 203 | .m_clk(m_clk), 204 | .m_rst(m_rst), 205 | .m_axi_araddr(m_axi_araddr), 206 | .m_axi_arid(m_axi_arid), 207 | .m_axi_arlen(m_axi_arlen), 208 | .m_axi_arsize(m_axi_arsize), 209 | .m_axi_arburst(m_axi_arburst), 210 | .m_axi_arprot(m_axi_arprot), 211 | .m_axi_arvalid(m_axi_arvalid), 212 | .m_axi_arready(m_axi_arready), 213 | .m_axi_rdata(m_axi_rdata), 214 | .m_axi_rid(m_axi_rid), 215 | .m_axi_rlast(m_axi_rlast), 216 | .m_axi_rresp(m_axi_rresp), 217 | .m_axi_rvalid(m_axi_rvalid), 218 | .m_axi_rready(m_axi_rready) 219 | ); 220 | 221 | endmodule 222 | 223 | `resetall 224 | -------------------------------------------------------------------------------- /src/main/resources/sv/axi_cdc/axi_cdc_rd.v: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Alex Forencich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | */ 24 | 25 | // Language: Verilog 2001 26 | 27 | `resetall 28 | `timescale 1ns / 1ps 29 | `default_nettype none 30 | 31 | /* 32 | * AXI4 lite clock domain crossing module (read) 33 | */ 34 | module axi_cdc_rd # 35 | ( 36 | // Width of data bus in bits 37 | parameter DATA_WIDTH = 32, 38 | // Width of address bus in bits 39 | parameter ADDR_WIDTH = 32, 40 | // Width of wstrb (width of data bus in words) 41 | parameter STRB_WIDTH = (DATA_WIDTH/8), 42 | // Width of ID 43 | parameter ID_WIDTH = 4 44 | ) 45 | ( 46 | /* 47 | * AXI lite slave interface 48 | */ 49 | input wire s_clk, 50 | input wire s_rst, 51 | input wire [ADDR_WIDTH-1:0] s_axi_araddr, 52 | 53 | input wire [ID_WIDTH-1:0] s_axi_arid, 54 | input wire [8-1:0] s_axi_arlen, 55 | input wire [3-1:0] s_axi_arsize, 56 | input wire [2-1:0] s_axi_arburst, 57 | 58 | input wire [2:0] s_axi_arprot, 59 | input wire s_axi_arvalid, 60 | output wire s_axi_arready, 61 | 62 | output wire [DATA_WIDTH-1:0] s_axi_rdata, 63 | 64 | output wire [ID_WIDTH-1:0] s_axi_rid, 65 | output wire s_axi_rlast, 66 | 67 | output wire [1:0] s_axi_rresp, 68 | output wire s_axi_rvalid, 69 | input wire s_axi_rready, 70 | 71 | /* 72 | * AXI lite master interface 73 | */ 74 | input wire m_clk, 75 | input wire m_rst, 76 | output wire [ADDR_WIDTH-1:0] m_axi_araddr, 77 | 78 | output wire [ID_WIDTH-1:0] m_axi_arid, 79 | output wire [8-1:0] m_axi_arlen, 80 | output wire [3-1:0] m_axi_arsize, 81 | output wire [2-1:0] m_axi_arburst, 82 | 83 | output wire [2:0] m_axi_arprot, 84 | output wire m_axi_arvalid, 85 | input wire m_axi_arready, 86 | input wire [DATA_WIDTH-1:0] m_axi_rdata, 87 | 88 | input wire [ID_WIDTH-1:0] m_axi_rid, 89 | input wire m_axi_rlast, 90 | 91 | input wire [1:0] m_axi_rresp, 92 | input wire m_axi_rvalid, 93 | output wire m_axi_rready 94 | ); 95 | 96 | reg [1:0] s_state_reg = 2'd0; 97 | reg s_flag_reg = 1'b0; 98 | (* srl_style = "register" *) 99 | reg s_flag_sync_reg_1 = 1'b0; 100 | (* srl_style = "register" *) 101 | reg s_flag_sync_reg_2 = 1'b0; 102 | 103 | reg [1:0] m_state_reg = 2'd0; 104 | reg m_flag_reg = 1'b0; 105 | (* srl_style = "register" *) 106 | reg m_flag_sync_reg_1 = 1'b0; 107 | (* srl_style = "register" *) 108 | reg m_flag_sync_reg_2 = 1'b0; 109 | 110 | reg [ADDR_WIDTH-1:0] s_axi_araddr_reg = {ADDR_WIDTH{1'b0}}; 111 | 112 | reg [ID_WIDTH-1:0] s_axi_arid_reg = {ID_WIDTH{1'b0}}; 113 | reg [8-1:0] s_axi_arlen_reg = {8{1'b0}}; 114 | reg [3-1:0] s_axi_arsize_reg = {3{1'b0}}; 115 | reg [2-1:0] s_axi_arburst_reg = {2{1'b0}}; 116 | 117 | reg [2:0] s_axi_arprot_reg = 3'd0; 118 | reg s_axi_arvalid_reg = 1'b0; 119 | reg [DATA_WIDTH-1:0] s_axi_rdata_reg = {DATA_WIDTH{1'b0}}; 120 | 121 | reg [ID_WIDTH-1:0] s_axi_rid_reg = {ID_WIDTH{1'b0}}; 122 | reg s_axi_rlast_reg = 1'b0; 123 | 124 | reg [1:0] s_axi_rresp_reg = 2'b00; 125 | reg s_axi_rvalid_reg = 1'b0; 126 | 127 | reg [ADDR_WIDTH-1:0] m_axi_araddr_reg = {ADDR_WIDTH{1'b0}}; 128 | 129 | reg [ID_WIDTH-1:0] m_axi_arid_reg = {ID_WIDTH{1'b0}}; 130 | reg [8-1:0] m_axi_arlen_reg = {8{1'b0}}; 131 | reg [3-1:0] m_axi_arsize_reg = {3{1'b0}}; 132 | reg [2-1:0] m_axi_arburst_reg = {2{1'b0}}; 133 | 134 | reg [2:0] m_axi_arprot_reg = 3'd0; 135 | reg m_axi_arvalid_reg = 1'b0; 136 | reg [DATA_WIDTH-1:0] m_axi_rdata_reg = {DATA_WIDTH{1'b0}}; 137 | 138 | reg [ID_WIDTH-1:0] m_axi_rid_reg = {ID_WIDTH{1'b0}}; 139 | reg m_axi_rlast_reg = 1'b0; 140 | 141 | reg [1:0] m_axi_rresp_reg = 2'b00; 142 | reg m_axi_rvalid_reg = 1'b1; 143 | 144 | assign s_axi_arready = !s_axi_arvalid_reg && !s_axi_rvalid_reg; 145 | assign s_axi_rdata = s_axi_rdata_reg; 146 | 147 | assign s_axi_rid = s_axi_rid_reg; 148 | assign s_axi_rlast = s_axi_rlast_reg; 149 | 150 | assign s_axi_rresp = s_axi_rresp_reg; 151 | assign s_axi_rvalid = s_axi_rvalid_reg; 152 | 153 | assign m_axi_araddr = m_axi_araddr_reg; 154 | 155 | assign m_axi_arid = m_axi_arid_reg; 156 | assign m_axi_arlen = m_axi_arlen_reg; 157 | assign m_axi_arsize = m_axi_arsize_reg; 158 | assign m_axi_arburst = m_axi_arburst_reg; 159 | 160 | assign m_axi_arprot = m_axi_arprot_reg; 161 | assign m_axi_arvalid = m_axi_arvalid_reg; 162 | assign m_axi_rready = !m_axi_rvalid_reg; 163 | 164 | // slave side 165 | always @(posedge s_clk) begin 166 | s_axi_rvalid_reg <= s_axi_rvalid_reg && !s_axi_rready; 167 | 168 | if (!s_axi_arvalid_reg && !s_axi_rvalid_reg) begin 169 | s_axi_araddr_reg <= s_axi_araddr; 170 | 171 | s_axi_arid_reg <= s_axi_arid; 172 | s_axi_arlen_reg <= s_axi_arlen; 173 | s_axi_arsize_reg <= s_axi_arsize; 174 | s_axi_arburst_reg <= s_axi_arburst; 175 | 176 | s_axi_arprot_reg <= s_axi_arprot; 177 | s_axi_arvalid_reg <= s_axi_arvalid; 178 | end 179 | 180 | case (s_state_reg) 181 | 2'd0: begin 182 | if (s_axi_arvalid_reg) begin 183 | s_state_reg <= 2'd1; 184 | s_flag_reg <= 1'b1; 185 | end 186 | end 187 | 2'd1: begin 188 | if (m_flag_sync_reg_2) begin 189 | s_state_reg <= 2'd2; 190 | s_flag_reg <= 1'b0; 191 | s_axi_rdata_reg <= m_axi_rdata_reg; 192 | 193 | s_axi_rid_reg <= m_axi_rid_reg; 194 | s_axi_rlast_reg <= m_axi_rlast_reg; 195 | 196 | s_axi_rresp_reg <= m_axi_rresp_reg; 197 | s_axi_rvalid_reg <= 1'b1; 198 | end 199 | end 200 | 2'd2: begin 201 | if (!m_flag_sync_reg_2) begin 202 | s_state_reg <= 2'd0; 203 | s_axi_arvalid_reg <= 1'b0; 204 | end 205 | end 206 | endcase 207 | 208 | if (s_rst) begin 209 | s_state_reg <= 2'd0; 210 | s_flag_reg <= 1'b0; 211 | s_axi_arvalid_reg <= 1'b0; 212 | s_axi_rvalid_reg <= 1'b0; 213 | end 214 | end 215 | 216 | // synchronization 217 | always @(posedge s_clk) begin 218 | m_flag_sync_reg_1 <= m_flag_reg; 219 | m_flag_sync_reg_2 <= m_flag_sync_reg_1; 220 | end 221 | 222 | always @(posedge m_clk) begin 223 | s_flag_sync_reg_1 <= s_flag_reg; 224 | s_flag_sync_reg_2 <= s_flag_sync_reg_1; 225 | end 226 | 227 | // master side 228 | always @(posedge m_clk) begin 229 | m_axi_arvalid_reg <= m_axi_arvalid_reg && !m_axi_arready; 230 | 231 | if (!m_axi_rvalid_reg) begin 232 | m_axi_rdata_reg <= m_axi_rdata; 233 | 234 | m_axi_rid_reg <= m_axi_rid; 235 | m_axi_rlast_reg <= m_axi_rlast; 236 | 237 | m_axi_rresp_reg <= m_axi_rresp; 238 | m_axi_rvalid_reg <= m_axi_rvalid; 239 | end 240 | 241 | case (m_state_reg) 242 | 2'd0: begin 243 | if (s_flag_sync_reg_2) begin 244 | m_state_reg <= 2'd1; 245 | m_axi_araddr_reg <= s_axi_araddr_reg; 246 | 247 | m_axi_arid_reg <= s_axi_arid_reg; 248 | m_axi_arlen_reg <= s_axi_arlen_reg; 249 | m_axi_arsize_reg <= s_axi_arsize_reg; 250 | m_axi_arburst_reg <= s_axi_arburst_reg; 251 | 252 | m_axi_arprot_reg <= s_axi_arprot_reg; 253 | m_axi_arvalid_reg <= 1'b1; 254 | m_axi_rvalid_reg <= 1'b0; 255 | end 256 | end 257 | 2'd1: begin 258 | if (m_axi_rvalid_reg) begin 259 | m_flag_reg <= 1'b1; 260 | m_state_reg <= 2'd2; 261 | end 262 | end 263 | 2'd2: begin 264 | if (!s_flag_sync_reg_2) begin 265 | m_state_reg <= 2'd0; 266 | m_flag_reg <= 1'b0; 267 | end 268 | end 269 | endcase 270 | 271 | if (m_rst) begin 272 | m_state_reg <= 2'd0; 273 | m_flag_reg <= 1'b0; 274 | m_axi_arvalid_reg <= 1'b0; 275 | m_axi_rvalid_reg <= 1'b1; 276 | end 277 | end 278 | 279 | endmodule 280 | 281 | `resetall 282 | -------------------------------------------------------------------------------- /src/main/resources/sv/axi_cdc/axi_cdc_wr.v: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Alex Forencich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | */ 24 | 25 | // Language: Verilog 2001 26 | 27 | `resetall 28 | `timescale 1ns / 1ps 29 | `default_nettype none 30 | 31 | /* 32 | * AXI4 clock domain crossing module (write) 33 | */ 34 | module axi_cdc_wr # 35 | ( 36 | // Width of data bus in bits 37 | parameter DATA_WIDTH = 32, 38 | // Width of address bus in bits 39 | parameter ADDR_WIDTH = 32, 40 | // Width of wstrb (width of data bus in words) 41 | parameter STRB_WIDTH = (DATA_WIDTH/8), 42 | // Width of ID 43 | parameter ID_WIDTH = 4 44 | ) 45 | ( 46 | /* 47 | * AXI slave interface 48 | */ 49 | input wire s_clk, 50 | input wire s_rst, 51 | input wire [ADDR_WIDTH-1:0] s_axi_awaddr, 52 | 53 | input wire [ID_WIDTH-1:0] s_axi_awid, 54 | input wire [8-1:0] s_axi_awlen, 55 | input wire [3-1:0] s_axi_awsize, 56 | input wire [2-1:0] s_axi_awburst, 57 | 58 | input wire [2:0] s_axi_awprot, 59 | input wire s_axi_awvalid, 60 | output wire s_axi_awready, 61 | 62 | input wire [DATA_WIDTH-1:0] s_axi_wdata, 63 | input wire [STRB_WIDTH-1:0] s_axi_wstrb, 64 | 65 | input wire s_axi_wlast, 66 | 67 | input wire s_axi_wvalid, 68 | output wire s_axi_wready, 69 | 70 | output wire [ID_WIDTH-1:0] s_axi_bid, 71 | 72 | output wire [1:0] s_axi_bresp, 73 | output wire s_axi_bvalid, 74 | input wire s_axi_bready, 75 | 76 | /* 77 | * AXI master interface 78 | */ 79 | input wire m_clk, 80 | input wire m_rst, 81 | output wire [ADDR_WIDTH-1:0] m_axi_awaddr, 82 | 83 | output wire [ID_WIDTH-1:0] m_axi_awid, 84 | output wire [8-1:0] m_axi_awlen, 85 | output wire [3-1:0] m_axi_awsize, 86 | output wire [2-1:0] m_axi_awburst, 87 | 88 | output wire [2:0] m_axi_awprot, 89 | output wire m_axi_awvalid, 90 | input wire m_axi_awready, 91 | output wire [DATA_WIDTH-1:0] m_axi_wdata, 92 | output wire [STRB_WIDTH-1:0] m_axi_wstrb, 93 | 94 | output wire m_axi_wlast, 95 | 96 | output wire m_axi_wvalid, 97 | input wire m_axi_wready, 98 | 99 | input wire [ID_WIDTH-1:0] m_axi_bid, 100 | 101 | input wire [1:0] m_axi_bresp, 102 | input wire m_axi_bvalid, 103 | output wire m_axi_bready 104 | ); 105 | 106 | reg [1:0] s_state_reg = 2'd0; 107 | reg s_flag_reg = 1'b0; 108 | (* srl_style = "register" *) 109 | reg s_flag_sync_reg_1 = 1'b0; 110 | (* srl_style = "register" *) 111 | reg s_flag_sync_reg_2 = 1'b0; 112 | 113 | reg [1:0] m_state_reg = 2'd0; 114 | reg m_flag_reg = 1'b0; 115 | (* srl_style = "register" *) 116 | reg m_flag_sync_reg_1 = 1'b0; 117 | (* srl_style = "register" *) 118 | reg m_flag_sync_reg_2 = 1'b0; 119 | 120 | reg [ADDR_WIDTH-1:0] s_axi_awaddr_reg = {ADDR_WIDTH{1'b0}}; 121 | 122 | reg [ID_WIDTH-1:0] s_axi_awid_reg = {ID_WIDTH{1'b0}}; 123 | reg [8-1:0] s_axi_awlen_reg = {8{1'b0}}; 124 | reg [3-1:0] s_axi_awsize_reg = {3{1'b0}}; 125 | reg [2-1:0] s_axi_awburst_reg = {2{1'b0}}; 126 | 127 | reg [2:0] s_axi_awprot_reg = 3'd0; 128 | reg s_axi_awvalid_reg = 1'b0; 129 | reg [DATA_WIDTH-1:0] s_axi_wdata_reg = {DATA_WIDTH{1'b0}}; 130 | reg [STRB_WIDTH-1:0] s_axi_wstrb_reg = {STRB_WIDTH{1'b0}}; 131 | 132 | reg s_axi_wlast_reg = 0; 133 | 134 | reg s_axi_wvalid_reg = 1'b0; 135 | 136 | reg [ID_WIDTH-1:0] s_axi_bid_reg = {ID_WIDTH{1'b0}}; 137 | 138 | reg [1:0] s_axi_bresp_reg = 2'b00; 139 | reg s_axi_bvalid_reg = 1'b0; 140 | 141 | reg [ADDR_WIDTH-1:0] m_axi_awaddr_reg = {ADDR_WIDTH{1'b0}}; 142 | 143 | reg [ID_WIDTH-1:0] m_axi_awid_reg = {ID_WIDTH{1'b0}}; 144 | reg [8-1:0] m_axi_awlen_reg = {8{1'b0}}; 145 | reg [3-1:0] m_axi_awsize_reg = {3{1'b0}}; 146 | reg [2-1:0] m_axi_awburst_reg = {2{1'b0}}; 147 | 148 | reg [2:0] m_axi_awprot_reg = 3'd0; 149 | reg m_axi_awvalid_reg = 1'b0; 150 | reg [DATA_WIDTH-1:0] m_axi_wdata_reg = {DATA_WIDTH{1'b0}}; 151 | reg [STRB_WIDTH-1:0] m_axi_wstrb_reg = {STRB_WIDTH{1'b0}}; 152 | 153 | reg m_axi_wlast_reg = 0; 154 | 155 | reg m_axi_wvalid_reg = 1'b0; 156 | 157 | reg [ID_WIDTH-1:0] m_axi_bid_reg = {ID_WIDTH{1'b0}}; 158 | 159 | reg [1:0] m_axi_bresp_reg = 2'b00; 160 | reg m_axi_bvalid_reg = 1'b1; 161 | 162 | assign s_axi_awready = !s_axi_awvalid_reg && !s_axi_bvalid_reg; 163 | assign s_axi_wready = !s_axi_wvalid_reg && !s_axi_bvalid_reg; 164 | 165 | assign s_axi_bid = s_axi_bid_reg; 166 | 167 | assign s_axi_bresp = s_axi_bresp_reg; 168 | assign s_axi_bvalid = s_axi_bvalid_reg; 169 | 170 | assign m_axi_awaddr = m_axi_awaddr_reg; 171 | 172 | assign m_axi_awid = m_axi_awid_reg; 173 | assign m_axi_awlen = m_axi_awlen_reg; 174 | assign m_axi_awsize = m_axi_awsize_reg; 175 | assign m_axi_awburst = m_axi_awburst_reg; 176 | 177 | assign m_axi_awprot = m_axi_awprot_reg; 178 | assign m_axi_awvalid = m_axi_awvalid_reg; 179 | assign m_axi_wdata = m_axi_wdata_reg; 180 | assign m_axi_wstrb = m_axi_wstrb_reg; 181 | 182 | assign m_axi_wlast = m_axi_wlast_reg; 183 | 184 | assign m_axi_wvalid = m_axi_wvalid_reg; 185 | assign m_axi_bready = !m_axi_bvalid_reg; 186 | 187 | // slave side 188 | always @(posedge s_clk) begin 189 | s_axi_bvalid_reg <= s_axi_bvalid_reg && !s_axi_bready; 190 | 191 | if (!s_axi_awvalid_reg && !s_axi_bvalid_reg) begin 192 | s_axi_awaddr_reg <= s_axi_awaddr; 193 | 194 | s_axi_awid_reg <= s_axi_awid; 195 | s_axi_awlen_reg <= s_axi_awlen; 196 | s_axi_awsize_reg <= s_axi_awsize; 197 | s_axi_awburst_reg <= s_axi_awburst; 198 | 199 | s_axi_awprot_reg <= s_axi_awprot; 200 | s_axi_awvalid_reg <= s_axi_awvalid; 201 | end 202 | 203 | if (!s_axi_wvalid_reg && !s_axi_bvalid_reg) begin 204 | s_axi_wdata_reg <= s_axi_wdata; 205 | s_axi_wstrb_reg <= s_axi_wstrb; 206 | 207 | s_axi_wlast_reg <= s_axi_wlast; 208 | 209 | s_axi_wvalid_reg <= s_axi_wvalid; 210 | end 211 | 212 | case (s_state_reg) 213 | 2'd0: begin 214 | if (s_axi_awvalid_reg && s_axi_wvalid_reg) begin 215 | s_state_reg <= 2'd1; 216 | s_flag_reg <= 1'b1; 217 | end 218 | end 219 | 2'd1: begin 220 | if (m_flag_sync_reg_2) begin 221 | s_state_reg <= 2'd2; 222 | s_flag_reg <= 1'b0; 223 | 224 | s_axi_bid_reg <= m_axi_bid_reg; 225 | 226 | s_axi_bresp_reg <= m_axi_bresp_reg; 227 | s_axi_bvalid_reg <= 1'b1; 228 | end 229 | end 230 | 2'd2: begin 231 | if (!m_flag_sync_reg_2) begin 232 | s_state_reg <= 2'd0; 233 | s_axi_awvalid_reg <= 1'b0; 234 | s_axi_wvalid_reg <= 1'b0; 235 | end 236 | end 237 | endcase 238 | 239 | if (s_rst) begin 240 | s_state_reg <= 2'd0; 241 | s_flag_reg <= 1'b0; 242 | s_axi_awvalid_reg <= 1'b0; 243 | s_axi_wvalid_reg <= 1'b0; 244 | s_axi_bvalid_reg <= 1'b0; 245 | end 246 | end 247 | 248 | // synchronization 249 | always @(posedge s_clk) begin 250 | m_flag_sync_reg_1 <= m_flag_reg; 251 | m_flag_sync_reg_2 <= m_flag_sync_reg_1; 252 | end 253 | 254 | always @(posedge m_clk) begin 255 | s_flag_sync_reg_1 <= s_flag_reg; 256 | s_flag_sync_reg_2 <= s_flag_sync_reg_1; 257 | end 258 | 259 | // master side 260 | always @(posedge m_clk) begin 261 | m_axi_awvalid_reg <= m_axi_awvalid_reg && !m_axi_awready; 262 | m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wready; 263 | 264 | if (!m_axi_bvalid_reg) begin 265 | m_axi_bid_reg <= m_axi_bid; 266 | 267 | m_axi_bresp_reg <= m_axi_bresp; 268 | m_axi_bvalid_reg <= m_axi_bvalid; 269 | end 270 | 271 | case (m_state_reg) 272 | 2'd0: begin 273 | if (s_flag_sync_reg_2) begin 274 | m_state_reg <= 2'd1; 275 | m_axi_awaddr_reg <= s_axi_awaddr_reg; 276 | 277 | m_axi_awid_reg <= s_axi_awid_reg; 278 | m_axi_awlen_reg <= s_axi_awlen_reg; 279 | m_axi_awsize_reg <= s_axi_awsize_reg; 280 | m_axi_awburst_reg <= s_axi_awburst_reg; 281 | 282 | m_axi_awprot_reg <= s_axi_awprot_reg; 283 | m_axi_awvalid_reg <= 1'b1; 284 | m_axi_wdata_reg <= s_axi_wdata_reg; 285 | m_axi_wstrb_reg <= s_axi_wstrb_reg; 286 | 287 | m_axi_wlast_reg <= s_axi_wlast_reg; 288 | 289 | m_axi_wvalid_reg <= 1'b1; 290 | 291 | m_axi_bid_reg <= s_axi_bid_reg; 292 | 293 | m_axi_bvalid_reg <= 1'b0; 294 | end 295 | end 296 | 2'd1: begin 297 | if (m_axi_bvalid_reg) begin 298 | m_flag_reg <= 1'b1; 299 | m_state_reg <= 2'd2; 300 | end 301 | end 302 | 2'd2: begin 303 | if (!s_flag_sync_reg_2) begin 304 | m_state_reg <= 2'd0; 305 | m_flag_reg <= 1'b0; 306 | end 307 | end 308 | endcase 309 | 310 | if (m_rst) begin 311 | m_state_reg <= 2'd0; 312 | m_flag_reg <= 1'b0; 313 | m_axi_awvalid_reg <= 1'b0; 314 | m_axi_wvalid_reg <= 1'b0; 315 | m_axi_bvalid_reg <= 1'b1; 316 | end 317 | end 318 | 319 | endmodule 320 | 321 | `resetall 322 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/Main.scala: -------------------------------------------------------------------------------- 1 | package nagicore 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import _root_.circt.stage._ 6 | 7 | object Main extends App { 8 | val target = args(0) 9 | val build_dir = "./build" 10 | println(target) 11 | def exportVerilog(core: () => chisel3.RawModule): Unit = { 12 | println("Export Verilog Started") 13 | val chiselStageOption = Seq( 14 | chisel3.stage.ChiselGeneratorAnnotation(() => core()), 15 | CIRCTTargetAnnotation(CIRCTTarget.Verilog) 16 | ) 17 | val firtoolOptions = Seq( 18 | // FirtoolOption("--lowering-options=disallowLocalVariables,locationInfoStyle=wrapInAtSquareBracket,noAlwaysComb"), 19 | FirtoolOption("--lowering-options=disallowLocalVariables,locationInfoStyle=wrapInAtSquareBracket,noAlwaysComb"), 20 | // FirtoolOption("--lowering-options=disallowLocalVariables,disallowPackedArrays,locationInfoStyle=wrapInAtSquareBracket,noAlwaysComb"), 21 | 22 | FirtoolOption("--split-verilog"), 23 | FirtoolOption("-o=" + build_dir), 24 | FirtoolOption("--disable-all-randomization"), 25 | FirtoolOption("--preserve-aggregate=none"), 26 | ) 27 | val executeOptions = chiselStageOption ++ firtoolOptions 28 | val executeArgs = Array("-td", build_dir) 29 | (new ChiselStage).execute(executeArgs, executeOptions) 30 | } 31 | target match { 32 | case "NSCSCC" => { 33 | GlobalConfg.SIM = false 34 | exportVerilog(() => new nagicore.loongarch.nscscc2024.CoreNSCSCC) 35 | } 36 | // case "TEST" => { 37 | // exportVerilog(() => new Module{ 38 | // val io = IO(new Bundle { 39 | // val clk = Input(Clock()) 40 | // }) 41 | // val a = "h123".U 42 | // val xbar = Module(new nagicore.unit.ip.axi_corssbar.AXI4XBar(32, 32, List((0, nagicore.unit.ip.axi_corssbar.Axi4RW.RW)), List(("0x80000000", "0x807FFFFF")))) 43 | // xbar.io.masters <> DontCare 44 | // xbar.io.slaves <> DontCare 45 | // xbar.io.slaves(0).ar.addr := 2.U(32.W) 46 | // }) 47 | // } 48 | case _ => { 49 | exportVerilog(() => new nagicore.loongarch.nscscc2024.Core) 50 | } 51 | } 52 | } 53 | 54 | object GlobalConfg{ 55 | var SIM = true 56 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/bus/RAM.scala: -------------------------------------------------------------------------------- 1 | package nagicore.bus 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.unit.cache.CacheMemType.{RAM_2cyc, Value} 6 | import nagicore.GlobalConfg 7 | 8 | object RamType extends Enumeration { 9 | type RamType = Value 10 | val RAM_2CYC, RAM_1CYC, BRAM_1CYC, DPIC_2CYC, DPIC_1CYC = Value 11 | } 12 | 13 | class RamIO(width: Int, depth: Long) extends Bundle{ 14 | val addr = Input(UInt(log2Up(depth).W)) 15 | val din = Input(UInt(width.W)) 16 | val dout = Output(UInt(width.W)) 17 | val en = Input(Bool()) 18 | val we = Input(Bool()) 19 | val re = Input(Bool()) 20 | val wmask = Input(UInt((width/8).W)) 21 | } 22 | 23 | /** 24 | * RAM 25 | * @note 26 | * @param addrBits 27 | * @param dataBits 28 | * @param imp 29 | */ 30 | class Ram(width: Int, depth: Long, imp: RamType.RamType=RamType.RAM_2CYC) extends Module{ 31 | val io = IO(new RamIO(width, depth)) 32 | val addrBits = log2Up(depth) 33 | imp match { 34 | case RamType.DPIC_2CYC => { 35 | import nagicore.unit.DPIC_RAM_2CYC 36 | val sram = Module(new DPIC_RAM_2CYC(addrBits, width)) 37 | sram.io.clk := clock 38 | sram.io.rst := reset 39 | sram.io.addr := io.addr 40 | sram.io.re := io.re 41 | sram.io.we := io.we 42 | sram.io.wdata := io.din 43 | sram.io.wmask := io.wmask 44 | sram.io.size := log2Up(width/8).U 45 | sram.io.en := io.en 46 | io.dout := sram.io.rdata 47 | } 48 | case RamType.DPIC_1CYC => { 49 | import nagicore.unit.DPIC_RAM_1CYC 50 | val sram = Module(new DPIC_RAM_1CYC(addrBits, width)) 51 | sram.io.clk := clock 52 | sram.io.rst := reset 53 | sram.io.addr := io.addr 54 | sram.io.wdata := io.din 55 | sram.io.re := io.re 56 | sram.io.we := io.we 57 | sram.io.wmask := io.wmask 58 | sram.io.size := log2Up(width/8).U 59 | sram.io.en := io.en 60 | io.dout := sram.io.rdata 61 | } 62 | case RamType.RAM_1CYC | RamType.BRAM_1CYC => { 63 | if(imp==RamType.BRAM_1CYC && !GlobalConfg.SIM){ 64 | Predef.println(s"Xilinx BlockRAM IP blk_mem_${width}_${depth} needed") 65 | class BlockRAMIP extends BlackBox{ 66 | override val desiredName = s"blk_mem_${width}_${depth}" 67 | val io = IO(new Bundle { 68 | val addra = Input(UInt(addrBits.W)) 69 | val clka = Input(Clock()) 70 | val dina = Input(UInt(width.W)) 71 | val douta = Output(UInt(width.W)) 72 | val ena = Input(Bool()) 73 | val wea = Input(Bool()) 74 | }) 75 | } 76 | val bram = Module(new BlockRAMIP) 77 | bram.io.clka := clock 78 | bram.io.addra := io.addr 79 | bram.io.dina := io.din 80 | bram.io.wea := io.we 81 | bram.io.ena := io.en 82 | io.dout := bram.io.douta 83 | }else{ 84 | if(width%8==0){ 85 | val bytes = width/8 86 | val mem = Mem(depth, Vec(bytes, UInt(8.W))) 87 | when(io.en&&io.we){ 88 | val wdata = VecInit.tabulate(bytes){ 89 | i => io.din(8*((bytes-1-i)+1)-1, 8*(bytes-1-i)) 90 | } 91 | assert(Cat(io.wmask.asBools)===io.wmask) 92 | mem.write(io.addr, wdata, io.wmask.asBools) 93 | } 94 | // WRITE_FIRST Mode 95 | io.dout := Cat(mem.read(io.addr)) 96 | }else{ 97 | val mem = Mem(depth, UInt(width.W)) 98 | when(io.en&&io.we){ 99 | mem.write(io.addr, io.din) 100 | } 101 | io.dout := mem.read(io.addr) 102 | } 103 | } 104 | } 105 | case _ => { 106 | /** 107 | * 两个周期的同步RAM 108 | * 当EN拉低时,不会写入任何数据,读数据将会保持在上一个状态;读后写时,将会继续读上一次读地址的数据 109 | * When inactive, no data is written to the RAM and the output bus remains in its previous state. 110 | * [NO_CHANGE Mode](https://docs.amd.com/r/en-US/am007-versal-memory/NO_CHANGE-Mode-DEFAULT) 111 | */ 112 | val mem = Mem(depth, UInt(width.W)) 113 | // val enable_read = io.en && !io.we 114 | // val rdata = mem.read(io.addr, enable_read) 115 | // io.dout := Mux(enable_read, rdata, RegEnable(rdata, enable_read)) 116 | // val rdata = mem.read(io.addr, enable_read) 117 | // io.dout = 118 | // io.dout := rdata 119 | // when(io.en&&io.we){ 120 | // mem.write(io.addr, io.din) 121 | // } 122 | val rdata = mem.read(RegEnable(io.addr, io.en && !io.we)) 123 | io.dout := rdata 124 | when(io.en&&io.we){ 125 | mem.write(io.addr, io.din) 126 | } 127 | assert(io.wmask.andR) 128 | /* 129 | val regs = Reg(Vec(1< if_part.io.preif2if 19 | if_part.io.if2id <> id_part.io.if2id 20 | id_part.io.id2ex <> ex_part.io.id2ex 21 | ex_part.io.ex2preif <> preif_part.io.ex2preif 22 | ex_part.io.ex2id <> id_part.io.ex2id 23 | ex_part.io.ex2mem <> mem_part.io.ex2mem 24 | mem_part.io.mem2id <> id_part.io.mem2id 25 | mem_part.io.stall_all := false.B 26 | 27 | val isram_ctrl = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, 1.toLong< dsram_ctrl.io.axi 34 | 35 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS)) 36 | xbar_imem.io.in(0) <> if_part.io.isram 37 | xbar_imem.io.out <> isram_ctrl.io.axi 38 | 39 | val xbar_dmem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS)) 40 | xbar_dmem.io.in(1) <> mia.io.mem 41 | xbar_dmem.io.out <> dsram_ctrl.io.axi 42 | 43 | val xbar_mem_stage = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List( 44 | (0x80000000L, 0x400000L, false), 45 | (0x80400000L, 0x400000L, false), 46 | (0xbfd00000L, 0x400000L, false), 47 | (0x90000000L, 0x400000L, false), 48 | ))) 49 | 50 | xbar_mem_stage.io.in <> mem_part.io.dmem 51 | xbar_mem_stage.io.out(0) <> xbar_imem.io.in(1) 52 | xbar_mem_stage.io.out(1) <> xbar_dmem.io.in(0) 53 | xbar_mem_stage.io.out(2) <> uart_axi4.io.axi 54 | xbar_mem_stage.io.out(3) <> mia.io.cmd 55 | 56 | val isram = Module(new Ram(XLEN, 1.toLong< isram.io 60 | dsram_ctrl.io.sram <> dsram.io 61 | uart_axi4.io.sram <> uart.io 62 | } 63 | 64 | class CoreNSCSCC extends Module with Config{ 65 | val RAM_DEPTH = 0x400000/4 66 | val io = IO(new Bundle{ 67 | val isram = Flipped(new RamIO(32, RAM_DEPTH)) 68 | val dsram = Flipped(new RamIO(32, RAM_DEPTH)) 69 | val uart = new AXI4IO(XLEN, XLEN) 70 | }) 71 | 72 | val preif_part = Module(new stages.PREIF) 73 | val if_part = Module(new stages.IF) 74 | val id_part = Module(new stages.ID) 75 | val ex_part = Module(new stages.EX) 76 | val mem_part = Module(new stages.MEM) 77 | 78 | preif_part.io.preif2if <> if_part.io.preif2if 79 | if_part.io.if2id <> id_part.io.if2id 80 | id_part.io.id2ex <> ex_part.io.id2ex 81 | ex_part.io.ex2preif <> preif_part.io.ex2preif 82 | ex_part.io.ex2id <> id_part.io.ex2id 83 | ex_part.io.ex2mem <> mem_part.io.ex2mem 84 | mem_part.io.mem2id <> id_part.io.mem2id 85 | mem_part.io.stall_all := false.B 86 | 87 | val isram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2)) 88 | val dsram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2)) 89 | val mia = Module(new MIAU(XLEN, XLEN, AXI4IDBITS)) 90 | 91 | if_part.io.isram <> isram_axi4_wrapper.io.axi 92 | 93 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS)) 94 | xbar_imem.io.in(0) <> if_part.io.isram 95 | xbar_imem.io.out <> isram_axi4_wrapper.io.axi 96 | 97 | val xbar_dmem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS)) 98 | xbar_dmem.io.in(1) <> mia.io.mem 99 | xbar_dmem.io.out <> dsram_axi4_wrapper.io.axi 100 | 101 | val xbar_mem_stage = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List( 102 | (0x80000000L, 0x400000L, false), 103 | (0x80400000L, 0x400000L, false), 104 | (0xbfd00000L, 0x400000L, false), 105 | (0x90000000L, 0x400000L, false), 106 | ))) 107 | 108 | xbar_mem_stage.io.in <> mem_part.io.dmem 109 | xbar_mem_stage.io.out(0) <> xbar_imem.io.in(1) 110 | xbar_mem_stage.io.out(1) <> xbar_dmem.io.in(0) 111 | xbar_mem_stage.io.out(2) <> io.uart 112 | xbar_mem_stage.io.out(3) <> mia.io.cmd 113 | 114 | isram_axi4_wrapper.io.sram <> io.isram 115 | dsram_axi4_wrapper.io.sram <> io.dsram 116 | } 117 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024/CtrlFlags.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | 7 | object CtrlFlags{ 8 | // trait FlagsEnum { 9 | // def value: String 10 | // } 11 | // object aluASel{ 12 | // sealed trait T extends FlagsEnum 13 | // case object ra extends T{ 14 | // def value = "01" 15 | // } 16 | // case object pc extends T{ 17 | // def value = "10" 18 | // } 19 | // } 20 | object aluASel{ 21 | val ra = "01" 22 | val pc = "10" 23 | def apply() = UInt(2.W) 24 | } 25 | object aluBSel{ 26 | val rb = "001" 27 | val imm = "010" 28 | val num4 = "100" 29 | def apply() = UInt(3.W) 30 | } 31 | object brpcAddSel{ 32 | val pc = "01" 33 | val ra_val = "10" 34 | def apply() = UInt(2.W) 35 | } 36 | object ldType{ 37 | val x = "000001" 38 | val b = "000010" 39 | val h = "000100" 40 | val w = "001000" 41 | val bu = "010000" 42 | val hu = "100000" 43 | def apply() = UInt(6.W) 44 | } 45 | object stType{ 46 | val x = "0001" 47 | val b = "0010" 48 | val h = "0100" 49 | val w = "1000" 50 | def apply() = UInt(4.W) 51 | } 52 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024/stages/EX.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.utils.Flags 6 | import nagicore.unit.ALU 7 | import nagicore.unit.BRU_SINGLE 8 | import nagicore.GlobalConfg 9 | import nagicore.unit.BTBUpdateIO 10 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags} 11 | import nagicore.unit.BR_TYPE 12 | import nagicore.unit.BP_TYPE 13 | import nagicore.unit.MULU_IMP 14 | import nagicore.unit.DIVU_IMP 15 | 16 | class ex2preifIO extends Bundle with Config{ 17 | val bpu_update = new BTBUpdateIO(BTB_ENTRYS, XLEN) 18 | val bpu_fail = Bool() 19 | val br_real_pc = UInt(XLEN.W) 20 | } 21 | 22 | class ex2idIO extends Bundle with Config{ 23 | // effective signal 24 | val bypass_rc = Output(UInt(GPR_LEN.W)) 25 | val bypass_val = Output(UInt(XLEN.W)) 26 | val bypass_en = Output(Bool()) 27 | } 28 | 29 | class ex2memBits extends Bundle with Config{ 30 | val instr = UInt(XLEN.W) 31 | val alu_out = UInt(XLEN.W) 32 | val rb_val = UInt(XLEN.W) 33 | val rc = UInt(GPR_LEN.W) 34 | val ld_type = CtrlFlags.ldType() 35 | val st_type = CtrlFlags.stType() 36 | val pc = UInt(XLEN.W) 37 | 38 | val valid = Bool() 39 | } 40 | 41 | class ex2memIO extends Bundle{ 42 | val bits = Output(new ex2memBits) 43 | val stall = Input(Bool()) 44 | } 45 | 46 | class EX extends Module with Config{ 47 | val io = IO(new Bundle{ 48 | val ex2preif = new ex2preifIO 49 | val id2ex = Flipped(new id2exIO) 50 | val ex2mem = new ex2memIO 51 | val ex2id = new ex2idIO 52 | }) 53 | // stall signal from next stage 54 | val stall_nxt = io.ex2mem.stall 55 | 56 | val alu = Module(new ALU(XLEN, MULU_IMP.synthesizer_DSP, DIVU_IMP.none)) 57 | val busy = alu.io.busy 58 | 59 | // accept instrs from pre stage 60 | val accp_pre = Wire(Bool()) 61 | // pipeline registers 62 | val preg = RegEnable(io.id2ex.bits, accp_pre) 63 | 64 | /* kill following *valid instrs*, max 3 instrs */ 65 | val kill_nxt = RegInit(0.U(3.W)) 66 | // stall pre stages in force 67 | val stall_pre_counter = RegInit(0.U(2.W)) 68 | 69 | val valid_instr = kill_nxt === 0.U && preg.valid && !busy && stall_pre_counter === 0.U 70 | val is_ld : Bool = valid_instr && !Flags.OHis(preg.ld_type, CtrlFlags.ldType.x) 71 | accp_pre := !(stall_nxt || busy) 72 | 73 | // must stall when ld comes immediately unlike kill 74 | io.id2ex.stall := stall_pre_counter(1) =/= 0.U || is_ld || busy || stall_nxt 75 | 76 | val bru = Module(new BRU_SINGLE(XLEN)) 77 | bru.io.a := preg.ra_val 78 | bru.io.b := preg.rb_val 79 | bru.io.br_type := preg.br_type 80 | 81 | val br_pc = preg.imm + Mux(Flags.OHis(preg.brpcAdd_sel, CtrlFlags.brpcAddSel.ra_val), preg.ra_val, preg.pc) 82 | 83 | // valid_instr && bru.io.br_take 84 | 85 | val br_pred_fail = Mux(preg.bpu_out.taken, !bru.io.br_take || preg.bpu_out.target =/= br_pc, 86 | bru.io.br_take) && valid_instr 87 | 88 | io.ex2preif.bpu_fail := br_pred_fail 89 | io.ex2preif.br_real_pc := Mux(bru.io.br_take, br_pc, preg.pc+4.U) 90 | 91 | io.ex2preif.bpu_update.bp_type := RegNext(Mux(Flags.OHis(preg.br_type, BR_TYPE.ALWAYS), 92 | Flags.U(BP_TYPE.jump), Flags.U(BP_TYPE.cond) 93 | )) 94 | io.ex2preif.bpu_update.hit := RegNext(preg.bpu_out.hit) 95 | io.ex2preif.bpu_update.index := RegNext(preg.bpu_out.index) 96 | io.ex2preif.bpu_update.pc := RegNext(preg.pc) 97 | io.ex2preif.bpu_update.target := RegNext(io.ex2preif.br_real_pc) 98 | io.ex2preif.bpu_update.taken := RegNext(bru.io.br_take) 99 | io.ex2preif.bpu_update.valid := RegNext(valid_instr && !Flags.OHis(preg.br_type, BR_TYPE.NEVER)) 100 | 101 | if(GlobalConfg.SIM){ 102 | import nagicore.unit.DPIC_PERF_BRU 103 | import nagicore.unit.BR_TYPE 104 | val dpic_perf_bru = Module(new DPIC_PERF_BRU) 105 | dpic_perf_bru.io.clk := clock 106 | dpic_perf_bru.io.rst := reset 107 | dpic_perf_bru.io.valid := !Flags.OHis(preg.br_type, BR_TYPE.NEVER) && valid_instr 108 | dpic_perf_bru.io.fail := br_pred_fail 109 | } 110 | 111 | io.ex2mem.bits.valid := valid_instr 112 | 113 | kill_nxt := Mux(!stall_nxt && !busy && (kill_nxt === 0.U || io.id2ex.bits.valid), 114 | /* 当分支预测失败时,应该无视接下来3条有效指令(PREIF,IF,ID) */ 115 | Mux(br_pred_fail, 3.U, 116 | // Mux(is_ld, 1.U, 117 | Mux(kill_nxt===0.U, 0.U, 118 | kill_nxt-1.U 119 | ) 120 | // ) 121 | ), kill_nxt 122 | ) 123 | stall_pre_counter := Mux(!stall_nxt, 124 | /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来1个周期的指令(EX) */ 125 | Mux(is_ld, 1.U, 126 | Mux(stall_pre_counter===0.U, 0.U, 127 | stall_pre_counter-1.U 128 | ) 129 | ), stall_pre_counter) 130 | // stall_pre_counter := Mux(!stall_nxt, 131 | // /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来2个周期的指令(EX, DMEM) */ 132 | // Mux(is_ld, 133 | // Mux(preg.ld_type === Flags.bp(CtrlFlags.ldType.w), 134 | // 1.U, 135 | // 2.U 136 | // ), 137 | // Mux(stall_pre_counter===0.U, 0.U, 138 | // stall_pre_counter-1.U 139 | // ) 140 | // ), stall_pre_counter) 141 | 142 | 143 | io.ex2mem.bits.instr := preg.instr 144 | 145 | val alu_a = Flags.onehotMux(preg.aluA_sel, Seq( 146 | CtrlFlags.aluASel.ra -> preg.ra_val, 147 | CtrlFlags.aluASel.pc -> preg.pc, 148 | )) 149 | val alu_b = Flags.onehotMux(preg.aluB_sel, Seq( 150 | CtrlFlags.aluBSel.rb -> preg.rb_val, 151 | CtrlFlags.aluBSel.imm -> preg.imm, 152 | CtrlFlags.aluBSel.num4 -> 4.U, 153 | )) 154 | 155 | // must assert for only one cycle 156 | // alu.io.valid := kill_nxt === 0.U && preg.valid && RegNext(accp_pre) 157 | alu.io.valid := kill_nxt === 0.U && stall_pre_counter === 0.U && preg.valid && RegNext(accp_pre) 158 | alu.io.a := alu_a 159 | alu.io.b := alu_b 160 | alu.io.op := preg.alu_op 161 | io.ex2mem.bits.alu_out := alu.io.out 162 | 163 | io.ex2mem.bits.rb_val := preg.rb_val 164 | 165 | io.ex2mem.bits.rc := preg.rc 166 | 167 | io.ex2mem.bits.ld_type := preg.ld_type 168 | 169 | io.ex2mem.bits.st_type := preg.st_type 170 | 171 | io.ex2mem.bits.pc := preg.pc 172 | 173 | io.ex2id.bypass_rc := preg.rc 174 | io.ex2id.bypass_val := alu.io.out 175 | io.ex2id.bypass_en := valid_instr 176 | 177 | if(GlobalConfg.SIM){ 178 | import nagicore.unit.DPIC_PERF_PIPE 179 | val perf_pipe_ex = Module(new DPIC_PERF_PIPE()) 180 | perf_pipe_ex.io.clk := clock 181 | perf_pipe_ex.io.rst := reset 182 | perf_pipe_ex.io.id := 1.U 183 | perf_pipe_ex.io.invalid := !io.ex2mem.bits.valid 184 | perf_pipe_ex.io.stall := io.id2ex.stall 185 | } 186 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024/stages/ID.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.unit.GPR 6 | import nagicore.unit.ALU_OP 7 | import nagicore.unit.BR_TYPE 8 | import nagicore.unit.BTBPredOutIO 9 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags, Decoder} 10 | import nagicore.GlobalConfg 11 | 12 | 13 | class id2exBits extends Bundle with Config{ 14 | val instr = UInt(XLEN.W) 15 | val pc = UInt(XLEN.W) 16 | val ra_val = UInt(XLEN.W) 17 | val aluA_sel = CtrlFlags.aluASel() 18 | val rb_val = UInt(XLEN.W) 19 | val aluB_sel = CtrlFlags.aluBSel() 20 | val alu_op = ALU_OP() 21 | val rc = UInt(GPR_LEN.W) 22 | val imm = UInt(XLEN.W) 23 | val br_type = BR_TYPE() 24 | val brpcAdd_sel = CtrlFlags.brpcAddSel() 25 | val ld_type = CtrlFlags.ldType() 26 | val st_type = CtrlFlags.stType() 27 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 28 | 29 | val valid = Bool() 30 | } 31 | 32 | class id2exIO extends Bundle{ 33 | val bits = Output(new id2exBits) 34 | val stall = Input(Bool()) 35 | } 36 | 37 | class ID extends Module with Config{ 38 | val io = IO(new Bundle{ 39 | val if2id = Flipped(new if2idIO) 40 | val id2ex = new id2exIO 41 | 42 | val ex2id = Flipped(new ex2idIO) 43 | val mem2id = Flipped(new mem2idIO) 44 | }) 45 | 46 | // pipeline registers 47 | val preg = RegEnable(io.if2id.bits, !io.id2ex.stall) 48 | 49 | io.id2ex.bits.valid := preg.valid 50 | io.if2id.stall := io.id2ex.stall 51 | 52 | val decoder = Module(new Decoder(XLEN, GPR_LEN)) 53 | decoder.io.instr := preg.instr 54 | 55 | io.id2ex.bits.instr := preg.instr 56 | io.id2ex.bits.pc := preg.pc 57 | 58 | val gpr = Module(new GPR(XLEN, GPR_NUM, 2, 1)) 59 | gpr.io.wen(0) := io.mem2id.gpr_wen 60 | gpr.io.waddr(0) := io.mem2id.gpr_wid 61 | gpr.io.wdata(0) := io.mem2id.gpr_wdata 62 | 63 | if(GlobalConfg.SIM){ 64 | import nagicore.unit.DPIC_UPDATE_GPR 65 | val dpic_update_gpr = Module(new DPIC_UPDATE_GPR(XLEN, GPR_NUM)) 66 | dpic_update_gpr.io.clk := clock 67 | dpic_update_gpr.io.rst := reset 68 | dpic_update_gpr.io.id := gpr.io.waddr(0) 69 | dpic_update_gpr.io.wen := gpr.io.wen(0) 70 | dpic_update_gpr.io.wdata := gpr.io.wdata(0) 71 | } 72 | 73 | def bypass_unit(rx: UInt, gpr_rdata: UInt):UInt = { 74 | Mux(rx === 0.U, 0.U, 75 | Mux(io.ex2id.bypass_rc === rx && io.ex2id.bypass_en, io.ex2id.bypass_val, 76 | Mux(io.mem2id.bypass_rc === rx && io.mem2id.bypass_en, io.mem2id.bypass_val, 77 | gpr_rdata 78 | ) 79 | ) 80 | ) 81 | } 82 | 83 | val ra = decoder.io.ra 84 | gpr.io.raddr(0) := ra 85 | // bypass 86 | io.id2ex.bits.ra_val := bypass_unit(ra, gpr.io.rdata(0)) 87 | io.id2ex.bits.aluA_sel := decoder.io.aluA_sel 88 | 89 | val rb = decoder.io.rb 90 | gpr.io.raddr(1) := rb 91 | // bypass 92 | io.id2ex.bits.rb_val := bypass_unit(rb, gpr.io.rdata(1)) 93 | 94 | io.id2ex.bits.aluB_sel := decoder.io.aluB_sel 95 | 96 | io.id2ex.bits.alu_op := decoder.io.alu_op 97 | 98 | io.id2ex.bits.rc := decoder.io.rc 99 | 100 | io.id2ex.bits.imm := decoder.io.imm 101 | 102 | io.id2ex.bits.br_type := decoder.io.br_type 103 | 104 | io.id2ex.bits.brpcAdd_sel := decoder.io.brpcAdd_sel 105 | 106 | io.id2ex.bits.ld_type := decoder.io.ld_type 107 | 108 | io.id2ex.bits.st_type := decoder.io.st_type 109 | 110 | io.id2ex.bits.bpu_out := preg.bpu_out 111 | 112 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024/stages/IF.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.bus.AXI4IO 6 | //import nagicore.unit.{InstrsBuff, InstrsBuffCacheBundle} 7 | import nagicore.unit.cache.Cache 8 | import nagicore.GlobalConfg 9 | import nagicore.unit.cache.CacheReplaceType 10 | import nagicore.unit.BTBPredOutIO 11 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags} 12 | import nagicore.bus.RamType 13 | 14 | 15 | class if2idBits extends Bundle with Config{ 16 | val pc = UInt(XLEN.W) 17 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 18 | val instr = UInt(XLEN.W) 19 | 20 | val valid = Bool() 21 | } 22 | 23 | class if2idIO extends Bundle{ 24 | val bits = Output(new if2idBits) 25 | val stall = Input(Bool()) 26 | } 27 | 28 | class IF extends Module with Config{ 29 | val io = IO(new Bundle { 30 | val preif2if = Flipped(new preif2ifIO) 31 | val if2id = new if2idIO 32 | val isram = new AXI4IO(XLEN, XLEN) 33 | }) 34 | // 2-stages 1cyc cache 35 | val icache = Module(new Cache(XLEN, XLEN, ICACHE_WAYS, ICACHE_LINES, ICACHE_WORDS, () => new preif2ifBits(), CacheReplaceType.LRU, 36 | dataRamType = RamType.RAM_1CYC, 37 | tagVRamType = RamType.RAM_1CYC, 38 | debug_id = 0)) 39 | icache.io.axi <> io.isram 40 | 41 | icache.io.master.front.bits.addr := io.preif2if.bits.pc 42 | icache.io.master.front.bits.size := 2.U 43 | icache.io.master.front.bits.uncache := false.B 44 | icache.io.master.front.bits.wmask := 0.U 45 | icache.io.master.front.bits.valid := io.preif2if.bits.valid 46 | icache.io.master.front.bits.wdata := DontCare 47 | icache.io.master.front.bits.pipedata := io.preif2if.bits 48 | icache.io.master.back.stall := io.if2id.stall 49 | 50 | 51 | io.if2id.bits.instr := icache.io.master.back.bits.rdata 52 | io.if2id.bits.valid := icache.io.master.back.bits.valid 53 | io.if2id.bits.pc := icache.io.master.back.bits.pipedata.pc 54 | io.if2id.bits.bpu_out := icache.io.master.back.bits.pipedata.bpu_out 55 | 56 | io.preif2if.stall := icache.io.master.front.stall 57 | 58 | if(GlobalConfg.SIM){ 59 | import nagicore.unit.DPIC_PERF_PIPE 60 | val perf_pipe_if = Module(new DPIC_PERF_PIPE()) 61 | perf_pipe_if.io.clk := clock 62 | perf_pipe_if.io.rst := reset 63 | perf_pipe_if.io.id := 0.U 64 | perf_pipe_if.io.invalid := !io.if2id.bits.valid 65 | perf_pipe_if.io.stall := io.preif2if.stall 66 | } 67 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024/stages/MEM.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.bus.AXI4IO 6 | import nagicore.unit.cache.CacheMini 7 | import nagicore.utils.Flags 8 | import nagicore.GlobalConfg 9 | import nagicore.unit.cache.CacheReplaceType 10 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags} 11 | import nagicore.unit.cache.UnCache 12 | 13 | class mem2idIO extends Bundle with Config{ 14 | // effective signal 15 | val bypass_rc = Output(UInt(GPR_LEN.W)) 16 | val bypass_val = Output(UInt(XLEN.W)) 17 | val bypass_en = Output(Bool()) 18 | 19 | val gpr_wid = Output(UInt(GPR_LEN.W)) 20 | val gpr_wdata = Output(UInt(XLEN.W)) 21 | val gpr_wen = Output(Bool()) 22 | } 23 | 24 | class MEM extends Module with Config{ 25 | val io = IO(new Bundle { 26 | val ex2mem = Flipped(new ex2memIO()) 27 | val mem2id = new mem2idIO() 28 | val dmem = new AXI4IO(XLEN, XLEN) 29 | val stall_all = Input(Bool()) 30 | }) 31 | 32 | class dcachePipeT extends Bundle { 33 | val instr = UInt(XLEN.W) 34 | val alu_out = UInt(XLEN.W) 35 | val rc = UInt(GPR_LEN.W) 36 | val ld_type = CtrlFlags.ldType() 37 | val pc = UInt(XLEN.W) 38 | val no_ldst = Bool() 39 | 40 | val valid = Bool() 41 | } 42 | 43 | // val dcache = Module(new CacheMini(XLEN, XLEN, 8, 8, 1)) 44 | val dcache = Module(new UnCache(XLEN, XLEN, WBUFF_LEN, 1)) 45 | 46 | // pipeline registers 47 | val preg = RegEnable(io.ex2mem.bits, !dcache.io.out.busy && !io.stall_all) 48 | io.ex2mem.stall := dcache.io.out.busy || io.stall_all 49 | 50 | dcache.io.axi <> io.dmem 51 | 52 | val addr = preg.alu_out 53 | 54 | dcache.io.in.bits.addr := addr 55 | // dcache.io.in.bits.uncache := addr(31, 28) === "hb".U 56 | dcache.io.in.bits.we := !Flags.OHis(preg.st_type, CtrlFlags.stType.x) 57 | dcache.io.in.bits.wdata := Flags.onehotMux(preg.st_type, Seq( 58 | CtrlFlags.stType.x -> 0.U, 59 | CtrlFlags.stType.b -> Fill(XLEN/8, preg.rb_val(7, 0)), 60 | CtrlFlags.stType.h -> Fill(XLEN/16, preg.rb_val(15, 0)), 61 | CtrlFlags.stType.w -> preg.rb_val(31, 0), 62 | )) 63 | dcache.io.in.bits.size := Flags.onehotMux(preg.st_type, Seq( 64 | CtrlFlags.stType.x -> 0.U, 65 | CtrlFlags.stType.b -> 0.U, 66 | CtrlFlags.stType.h -> 1.U, 67 | CtrlFlags.stType.w -> 2.U, 68 | )) | Flags.onehotMux(preg.ld_type, Seq( 69 | CtrlFlags.ldType.x -> 0.U, 70 | CtrlFlags.ldType.b -> 0.U, 71 | CtrlFlags.ldType.bu -> 0.U, 72 | CtrlFlags.ldType.h -> 1.U, 73 | CtrlFlags.ldType.hu -> 1.U, 74 | CtrlFlags.ldType.w -> 2.U, 75 | )) 76 | dcache.io.in.bits.wmask := Flags.onehotMux(preg.st_type, Seq( 77 | CtrlFlags.stType.x -> 0.U, 78 | CtrlFlags.stType.b -> ("b1".U< ("b11".U<<(addr(1)##0.U(1.W))), 80 | CtrlFlags.stType.w -> "b1111".U, 81 | )) 82 | // 不走Cache的指令 83 | val nolr = Flags.OHis(preg.ld_type, CtrlFlags.ldType.x) && Flags.OHis(preg.st_type, CtrlFlags.stType.x) 84 | dcache.io.in.req := preg.valid && !nolr && RegNext(!dcache.io.out.busy) && !io.stall_all 85 | 86 | val rdata_raw = dcache.io.out.rdata 87 | val wordData = if(XLEN == 64) Mux(addr(2), rdata_raw(63, 32), rdata_raw(31, 0)) 88 | else rdata_raw(31, 0) 89 | val halfData = Mux(addr(1), wordData(31, 16), wordData(15, 0)) 90 | val byteData = Mux(addr(0), halfData(15, 8), halfData(7, 0)) 91 | 92 | val rdata_mem = Flags.onehotMux(preg.ld_type, Seq( 93 | CtrlFlags.ldType.x -> (0.U).zext, 94 | CtrlFlags.ldType.b -> byteData.asSInt, 95 | CtrlFlags.ldType.bu -> byteData.zext, 96 | CtrlFlags.ldType.h -> halfData.asSInt, 97 | CtrlFlags.ldType.hu -> halfData.zext, 98 | CtrlFlags.ldType.w -> wordData.zext, 99 | )).asUInt 100 | 101 | val mem_valid = preg.valid && !dcache.io.out.busy 102 | 103 | io.mem2id.bypass_rc := preg.rc 104 | io.mem2id.bypass_en := mem_valid 105 | val wb_data = Mux(Flags.OHis(preg.ld_type, CtrlFlags.ldType.x), preg.alu_out, rdata_mem) 106 | io.mem2id.bypass_val := wb_data 107 | 108 | // when(nolr){ 109 | // io.mem2id.bypass_rc := Mux(preg.valid, preg.rc, 0.U) 110 | // io.mem2id.bypass_val := preg.alu_out 111 | // }.otherwise{ 112 | // io.mem2id.bypass_rc := Mux(preg.valid && preg.ld_type === Flags.bp(CtrlFlags.ldType.w), preg.rc, 0.U) 113 | // io.mem2id.bypass_val := dcache.io.out.rdata 114 | // } 115 | 116 | io.mem2id.gpr_wid := preg.rc 117 | io.mem2id.gpr_wdata := wb_data 118 | io.mem2id.gpr_wen := mem_valid 119 | 120 | if(GlobalConfg.SIM){ 121 | import nagicore.unit.DPIC_TRACE_MEM 122 | val dpic_trace_mem_w = Module(new DPIC_TRACE_MEM(XLEN, XLEN)) 123 | dpic_trace_mem_w.io.clk := clock 124 | dpic_trace_mem_w.io.rst := reset 125 | dpic_trace_mem_w.io.valid := dcache.io.in.req && dcache.io.in.bits.wmask.orR 126 | dpic_trace_mem_w.io.addr := dcache.io.in.bits.addr 127 | dpic_trace_mem_w.io.size := dcache.io.in.bits.size 128 | dpic_trace_mem_w.io.data := dcache.io.in.bits.wdata 129 | dpic_trace_mem_w.io.wmask := dcache.io.in.bits.wmask 130 | 131 | import nagicore.unit.DPIC_PERF_PIPE 132 | val perf_pipe_dcache = Module(new DPIC_PERF_PIPE()) 133 | perf_pipe_dcache.io.clk := clock 134 | perf_pipe_dcache.io.rst := reset 135 | perf_pipe_dcache.io.id := 2.U 136 | perf_pipe_dcache.io.invalid := !mem_valid 137 | perf_pipe_dcache.io.stall := io.ex2mem.stall 138 | 139 | import nagicore.unit.DPIC_UPDATE_PC 140 | val dpic_update_pc = Module(new DPIC_UPDATE_PC(XLEN)) 141 | dpic_update_pc.io.clk := clock 142 | dpic_update_pc.io.rst := reset 143 | dpic_update_pc.io.pc := preg.pc 144 | dpic_update_pc.io.wen := mem_valid 145 | 146 | import nagicore.unit.DPIC_TRACE_MEM 147 | val dpic_trace_mem_r = Module(new DPIC_TRACE_MEM(XLEN, XLEN)) 148 | dpic_trace_mem_r.io.clk := clock 149 | dpic_trace_mem_r.io.rst := reset 150 | dpic_trace_mem_r.io.valid := mem_valid && preg.ld_type =/= Flags.bp(CtrlFlags.ldType.x) 151 | dpic_trace_mem_r.io.addr := preg.alu_out 152 | dpic_trace_mem_r.io.size := Flags.onehotMux(preg.ld_type, Seq( 153 | CtrlFlags.ldType.x -> 0.U, 154 | CtrlFlags.ldType.b -> 0.U, 155 | CtrlFlags.ldType.bu -> 0.U, 156 | CtrlFlags.ldType.h -> 1.U, 157 | CtrlFlags.ldType.hu -> 1.U, 158 | CtrlFlags.ldType.w -> 2.U, 159 | )) 160 | dpic_trace_mem_r.io.data := rdata_mem 161 | dpic_trace_mem_r.io.wmask := 0.U 162 | 163 | 164 | } 165 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024/stages/PREIF.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.unit.BTB 6 | import nagicore.unit.BTBPredOutIO 7 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags} 8 | import nagicore.GlobalConfg 9 | 10 | 11 | class preif2ifBits extends Bundle with Config{ 12 | val pc = UInt(XLEN.W) 13 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 14 | 15 | val valid = Bool() 16 | } 17 | 18 | class preif2ifIO extends Bundle{ 19 | val bits = Output(new preif2ifBits) 20 | val stall = Input(Bool()) 21 | } 22 | 23 | class PREIF extends Module with Config{ 24 | val io = IO(new Bundle { 25 | val preif2if = new preif2ifIO 26 | val ex2preif = Flipped(new ex2preifIO) 27 | }) 28 | 29 | val nxt_pc = Wire(UInt(XLEN.W)) 30 | if(GlobalConfg.SIM){ 31 | dontTouch(nxt_pc) 32 | } 33 | val pc = RegEnable(nxt_pc, PC_START, !io.preif2if.stall) 34 | val pc4 = pc+4.U 35 | // 当流水线阻塞但分支预测又失败的时候,需要先暂存,等阻塞解除后再修改PC,不能直接覆盖,否则会少一个周期的气泡 36 | val bpu_fail_when_stall = RegInit(false.B) 37 | val bpu_fail_pc_when_stall = Reg(UInt(XLEN.W)) 38 | when(io.preif2if.stall && io.ex2preif.bpu_fail){ 39 | bpu_fail_when_stall := true.B 40 | bpu_fail_pc_when_stall := io.ex2preif.br_real_pc 41 | } 42 | when(!io.preif2if.stall){ 43 | bpu_fail_when_stall := false.B 44 | } 45 | 46 | // val bpu_fail = RegEnable(io.ex2preif.bpu_fail || bpu_fail_when_stall, true.B, !io.preif2if.stall) 47 | 48 | val bpu = Module(new BTB(BTB_ENTRYS, XLEN, XLEN/2)) 49 | bpu.io.pred.in.pc := pc 50 | bpu.io.update := io.ex2preif.bpu_update 51 | 52 | nxt_pc := Mux(bpu_fail_when_stall, bpu_fail_pc_when_stall, 53 | Mux(io.ex2preif.bpu_fail, io.ex2preif.br_real_pc, 54 | Mux(bpu.io.pred.out.taken, bpu.io.pred.out.target, 55 | pc4 56 | ) 57 | ) 58 | ) 59 | io.preif2if.bits.pc := pc 60 | io.preif2if.bits.bpu_out := bpu.io.pred.out 61 | io.preif2if.bits.valid := !reset.asBool 62 | } 63 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/Config.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | trait Config{ 7 | def XLEN = 32 8 | def GPR_NUM = 32 9 | def GPR_LEN = log2Up(GPR_NUM) 10 | 11 | def PC_START = "h80000000".U(XLEN.W) 12 | 13 | def ICACHE_WAYS = 2 14 | def ICACHE_LINES = 128 15 | def ICACHE_WORDS = 4 16 | 17 | def WBUFF_LEN = 8 18 | 19 | def BTB_ENTRYS = 8 20 | 21 | def AXI4IDBITS = 4 22 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/Core.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.bus.{AXI4SRAM, AXI4IO, Ram, RamType, RamIO} 6 | import nagicore.bus.{AXI4XBar1toN, AXI4XBarNto1, AXI4SRAM_MultiCycs} 7 | 8 | class Core extends Module with Config{ 9 | val io = IO(new Bundle{}) 10 | 11 | val preif_stage = Module(new stages.PREIF) 12 | val if_stage = Module(new stages.IF) 13 | val id_stage = Module(new stages.ID) 14 | val is_stage = Module(new stages.IS) 15 | val ex_stage = Module(new stages.EX) 16 | val mem_stage = Module(new stages.MEM) 17 | 18 | preif_stage.io.preif2if <> if_stage.io.preif2if 19 | if_stage.io.if2id <> id_stage.io.if2id 20 | id_stage.io.id2is <> is_stage.io.id2is 21 | is_stage.io.is2ex <> ex_stage.io.is2ex 22 | ex_stage.io.ex2preif <> preif_stage.io.ex2preif 23 | ex_stage.io.ex2is <> is_stage.io.ex2is 24 | ex_stage.io.ex2mem <> mem_stage.io.ex2mem 25 | mem_stage.io.mem2is <> is_stage.io.mem2is 26 | mem_stage.io.stall_all := false.B 27 | 28 | val isram_ctrl = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, 1.toLong< dsram_ctrl.io.axi 34 | 35 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS)) 36 | xbar_imem.io.in(0) <> if_stage.io.isram 37 | xbar_imem.io.out <> isram_ctrl.io.axi 38 | 39 | val xbar_dmem = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List( 40 | (0x80000000L, 0x400000L, false), 41 | (0x80400000L, 0x400000L, false), 42 | (0xbfd00000L, 0x400000L, false), 43 | ))) 44 | 45 | xbar_dmem.io.in <> mem_stage.io.dmem 46 | xbar_dmem.io.out(0) <> xbar_imem.io.in(1) 47 | xbar_dmem.io.out(1) <> dsram_ctrl.io.axi 48 | xbar_dmem.io.out(2) <> uart_axi4.io.axi 49 | 50 | val isram = Module(new Ram(XLEN, 1.toLong< isram.io 54 | dsram_ctrl.io.sram <> dsram.io 55 | uart_axi4.io.sram <> uart.io 56 | } 57 | 58 | class CoreNSCSCC extends Module with Config{ 59 | val RAM_DEPTH = 0x400000/4 60 | val io = IO(new Bundle{ 61 | val isram = Flipped(new RamIO(32, RAM_DEPTH)) 62 | val dsram = Flipped(new RamIO(32, RAM_DEPTH)) 63 | val uart = new AXI4IO(XLEN, XLEN) 64 | }) 65 | 66 | val preif_stage = Module(new stages.PREIF) 67 | val if_stage = Module(new stages.IF) 68 | val id_stage = Module(new stages.ID) 69 | val is_stage = Module(new stages.IS) 70 | val ex_stage = Module(new stages.EX) 71 | val mem_stage = Module(new stages.MEM) 72 | 73 | preif_stage.io.preif2if <> if_stage.io.preif2if 74 | if_stage.io.if2id <> id_stage.io.if2id 75 | id_stage.io.id2is <> is_stage.io.id2is 76 | is_stage.io.is2ex <> ex_stage.io.is2ex 77 | ex_stage.io.ex2preif <> preif_stage.io.ex2preif 78 | ex_stage.io.ex2is <> is_stage.io.ex2is 79 | ex_stage.io.ex2mem <> mem_stage.io.ex2mem 80 | mem_stage.io.mem2is <> is_stage.io.mem2is 81 | mem_stage.io.stall_all := false.B 82 | 83 | val isram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2)) 84 | val dsram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2)) 85 | 86 | if_stage.io.isram <> isram_axi4_wrapper.io.axi 87 | 88 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS)) 89 | xbar_imem.io.in(0) <> if_stage.io.isram 90 | xbar_imem.io.out <> isram_axi4_wrapper.io.axi 91 | 92 | val xbar_dmem = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List( 93 | (0x80000000L, 0x400000L, false), 94 | (0x80400000L, 0x400000L, false), 95 | (0xbfd00000L, 0x400000L, false), 96 | ))) 97 | 98 | xbar_dmem.io.in <> mem_stage.io.dmem 99 | xbar_dmem.io.out(0) <> xbar_imem.io.in(1) 100 | xbar_dmem.io.out(1) <> dsram_axi4_wrapper.io.axi 101 | xbar_dmem.io.out(2) <> io.uart 102 | 103 | isram_axi4_wrapper.io.sram <> io.isram 104 | dsram_axi4_wrapper.io.sram <> io.dsram 105 | } 106 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/CtrlFlags.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | 7 | object CtrlFlags{ 8 | // trait FlagsEnum { 9 | // def value: String 10 | // } 11 | // object aluASel{ 12 | // sealed trait T extends FlagsEnum 13 | // case object ra extends T{ 14 | // def value = "01" 15 | // } 16 | // case object pc extends T{ 17 | // def value = "10" 18 | // } 19 | // } 20 | object aluASel{ 21 | val ra = "01" 22 | val pc = "10" 23 | def apply() = UInt(2.W) 24 | } 25 | object aluBSel{ 26 | val rb = "001" 27 | val imm = "010" 28 | val num4 = "100" 29 | def apply() = UInt(3.W) 30 | } 31 | object brpcAddSel{ 32 | val pc = "01" 33 | val ra_val = "10" 34 | def apply() = UInt(2.W) 35 | } 36 | object ldType{ 37 | val x = "000001" 38 | val b = "000010" 39 | val h = "000100" 40 | val w = "001000" 41 | val bu = "010000" 42 | val hu = "100000" 43 | def apply() = UInt(6.W) 44 | } 45 | object stType{ 46 | val x = "0001" 47 | val b = "0010" 48 | val h = "0100" 49 | val w = "1000" 50 | def apply() = UInt(4.W) 51 | } 52 | object InstrType{ 53 | val alu = "00" 54 | val alu2 = "01" 55 | val ls = "10" 56 | val br = "11" 57 | def apply() = UInt(alu.length.W) 58 | } 59 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/EX.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.utils.Flags 6 | import nagicore.unit.ALU 7 | import nagicore.unit.BRU_SINGLE 8 | import nagicore.GlobalConfg 9 | import nagicore.unit.BTBUpdateIO 10 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags} 11 | import nagicore.unit.BR_TYPE 12 | import nagicore.unit.BP_TYPE 13 | import nagicore.unit.MULU_IMP 14 | import nagicore.unit.DIVU_IMP 15 | 16 | class ex2preifIO extends Bundle with Config{ 17 | val bpu_update = new BTBUpdateIO(BTB_ENTRYS, XLEN) 18 | val bpu_fail = Bool() 19 | val br_real_pc = UInt(XLEN.W) 20 | } 21 | 22 | class ex2isIO extends Bundle with Config{ 23 | // effective signal 24 | val bypass_rc1 = Output(UInt(GPR_LEN.W)) 25 | val bypass_val1 = Output(UInt(XLEN.W)) 26 | val bypass_en1 = Output(Bool()) 27 | 28 | val bypass_rc2 = Output(UInt(GPR_LEN.W)) 29 | val bypass_val2 = Output(UInt(XLEN.W)) 30 | val bypass_en2 = Output(Bool()) 31 | 32 | val clear_is = Output(Bool()) 33 | } 34 | 35 | class ex2memBits extends Bundle with Config{ 36 | val instr1 = UInt(XLEN.W) 37 | val instr2 = UInt(XLEN.W) 38 | 39 | val rc1 = UInt(GPR_LEN.W) 40 | val alu1_out = UInt(XLEN.W) 41 | val rc2 = UInt(GPR_LEN.W) 42 | val alu2_out = UInt(XLEN.W) 43 | 44 | val rb1_val = UInt(XLEN.W) 45 | 46 | val ld_type = CtrlFlags.ldType() 47 | val st_type = CtrlFlags.stType() 48 | 49 | val pc1 = UInt(XLEN.W) 50 | val pc2 = UInt(XLEN.W) 51 | 52 | val valid1 = Bool() 53 | val valid2 = Bool() 54 | } 55 | 56 | class ex2memIO extends Bundle{ 57 | val bits = Output(new ex2memBits) 58 | val stall = Input(Bool()) 59 | } 60 | 61 | class EX extends Module with Config{ 62 | val io = IO(new Bundle{ 63 | val ex2preif = new ex2preifIO 64 | val is2ex = Flipped(new is2exIO) 65 | val ex2mem = new ex2memIO 66 | val ex2is = new ex2isIO 67 | }) 68 | // stall signal from next stage 69 | val stall_nxt = io.ex2mem.stall 70 | 71 | val alu1 = Module(new ALU(XLEN, MULU_IMP.synthesizer_DSP, DIVU_IMP.none)) 72 | val alu2 = Module(new ALU(XLEN, MULU_IMP.none, DIVU_IMP.none)) 73 | val busy = alu1.io.busy 74 | 75 | // accept instrs from pre stage 76 | val ready_nxt = Wire(Bool()) 77 | // pipeline registers 78 | val preg = RegEnable(io.is2ex.bits, ready_nxt) 79 | 80 | 81 | // 分支预测失败后,等待新的指令 82 | val wait_refill = RegInit(false.B) 83 | val br_killed1 = wait_refill && !preg.pc_refill1 84 | val br_killed2 = wait_refill && !preg.pc_refill2 85 | 86 | // stall pre stages in force 87 | val stall_pre_counter = RegInit(0.U(2.W)) 88 | 89 | val valid_instr1 = !br_killed1 && preg.valid1 && !busy && stall_pre_counter === 0.U 90 | val valid_instr1_once = valid_instr1 && !stall_nxt 91 | 92 | 93 | val is_ld : Bool = valid_instr1 && !Flags.OHis(preg.ld_type, CtrlFlags.ldType.x) 94 | ready_nxt := !(stall_nxt || busy) 95 | 96 | // must stall when ld comes immediately unlike kill 97 | io.is2ex.stall := stall_pre_counter(1) =/= 0.U || is_ld || busy || stall_nxt 98 | 99 | val bru = Module(new BRU_SINGLE(XLEN)) 100 | bru.io.a := preg.ra1_val 101 | bru.io.b := preg.rb1_val 102 | bru.io.br_type := preg.br_type 103 | 104 | val br_pc = preg.imm1 + Mux(Flags.OHis(preg.brpcAdd_sel, CtrlFlags.brpcAddSel.ra_val), preg.ra1_val, preg.pc1) 105 | 106 | // valid_instr && bru.io.br_take 107 | 108 | val br_pred_fail = Mux(preg.bpu_out.taken, !bru.io.br_take || preg.bpu_out.target =/= br_pc, 109 | bru.io.br_take) && valid_instr1_once 110 | 111 | io.ex2preif.bpu_fail := br_pred_fail 112 | io.ex2preif.br_real_pc := Mux(bru.io.br_take, br_pc, preg.pc1+4.U) 113 | 114 | // 时序优化,延迟一拍 115 | io.ex2is.clear_is := RegNext(br_pred_fail) 116 | 117 | io.ex2preif.bpu_update.bp_type := RegNext(Mux(Flags.OHis(preg.br_type, BR_TYPE.ALWAYS), 118 | Flags.U(BP_TYPE.jump), Flags.U(BP_TYPE.cond) 119 | )) 120 | io.ex2preif.bpu_update.hit := RegNext(preg.bpu_out.hit) 121 | io.ex2preif.bpu_update.index := RegNext(preg.bpu_out.index) 122 | io.ex2preif.bpu_update.pc := RegNext(preg.pc1) 123 | io.ex2preif.bpu_update.target := RegNext(io.ex2preif.br_real_pc) 124 | io.ex2preif.bpu_update.taken := RegNext(bru.io.br_take) 125 | io.ex2preif.bpu_update.valid := RegNext(valid_instr1 && !Flags.OHis(preg.br_type, BR_TYPE.NEVER)) 126 | 127 | val valid_instr2 = !br_killed2 && preg.valid2 && !br_pred_fail && !busy && stall_pre_counter === 0.U 128 | val valid_instr2_once = valid_instr2 && !stall_nxt 129 | 130 | if(GlobalConfg.SIM){ 131 | import nagicore.unit.DPIC_PERF_BRU 132 | import nagicore.unit.BR_TYPE 133 | val dpic_perf_bru = Module(new DPIC_PERF_BRU) 134 | dpic_perf_bru.io.clk := clock 135 | dpic_perf_bru.io.rst := reset 136 | dpic_perf_bru.io.valid := !Flags.OHis(preg.br_type, BR_TYPE.NEVER) && valid_instr1 137 | dpic_perf_bru.io.fail := br_pred_fail 138 | } 139 | 140 | io.ex2mem.bits.valid1 := valid_instr1 141 | io.ex2mem.bits.valid2 := valid_instr2 142 | 143 | io.ex2mem.bits.pc1 := preg.pc1 144 | io.ex2mem.bits.pc2 := preg.pc2 145 | 146 | when(br_pred_fail){ 147 | wait_refill := true.B 148 | }.elsewhen((preg.pc_refill1 && preg.valid1) || (preg.pc_refill2 && preg.valid2)){ 149 | wait_refill := false.B 150 | } 151 | 152 | stall_pre_counter := Mux(!stall_nxt, 153 | /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来1个周期的指令(EX) */ 154 | Mux(is_ld, 1.U, 155 | Mux(stall_pre_counter===0.U, 0.U, 156 | stall_pre_counter-1.U 157 | ) 158 | ), stall_pre_counter) 159 | // stall_pre_counter := Mux(!stall_nxt, 160 | // /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来2个周期的指令(EX, DMEM) */ 161 | // Mux(is_ld, 162 | // Mux(preg.ld_type === Flags.bp(CtrlFlags.ldType.w), 163 | // 1.U, 164 | // 2.U 165 | // ), 166 | // Mux(stall_pre_counter===0.U, 0.U, 167 | // stall_pre_counter-1.U 168 | // ) 169 | // ), stall_pre_counter) 170 | 171 | 172 | io.ex2mem.bits.instr1 := preg.instr1 173 | io.ex2mem.bits.instr2 := preg.instr2 174 | 175 | val alu1_a = Flags.onehotMux(preg.alu1A_sel, Seq( 176 | CtrlFlags.aluASel.ra -> preg.ra1_val, 177 | CtrlFlags.aluASel.pc -> preg.pc1, 178 | )) 179 | val alu1_b = Flags.onehotMux(preg.alu1B_sel, Seq( 180 | CtrlFlags.aluBSel.rb -> preg.rb1_val, 181 | CtrlFlags.aluBSel.imm -> preg.imm1, 182 | CtrlFlags.aluBSel.num4 -> 4.U, 183 | )) 184 | val alu2_a = Flags.onehotMux(preg.alu2A_sel, Seq( 185 | CtrlFlags.aluASel.ra -> preg.ra2_val, 186 | CtrlFlags.aluASel.pc -> preg.pc2, 187 | )) 188 | val alu2_b = Flags.onehotMux(preg.alu2B_sel, Seq( 189 | CtrlFlags.aluBSel.rb -> preg.rb2_val, 190 | CtrlFlags.aluBSel.imm -> preg.imm2, 191 | CtrlFlags.aluBSel.num4 -> 4.U, 192 | )) 193 | 194 | // must assert for only one cycle 195 | // alu.io.valid := kill_nxt === 0.U && preg.valid && RegNext(accp_pre) 196 | alu1.io.valid := !br_killed1 && stall_pre_counter === 0.U && preg.valid1 && RegNext(ready_nxt) 197 | alu1.io.a := alu1_a 198 | alu1.io.b := alu1_b 199 | alu1.io.op := preg.alu1_op 200 | 201 | alu2.io.valid := preg.valid2 202 | alu2.io.a := alu2_a 203 | alu2.io.b := alu2_b 204 | alu2.io.op := preg.alu2_op 205 | 206 | io.ex2mem.bits.alu1_out := alu1.io.out 207 | io.ex2mem.bits.rb1_val := preg.rb1_val 208 | io.ex2mem.bits.rc1 := preg.rc1 209 | 210 | io.ex2mem.bits.alu2_out := alu2.io.out 211 | io.ex2mem.bits.rc2 := preg.rc2 212 | 213 | io.ex2mem.bits.ld_type := preg.ld_type 214 | io.ex2mem.bits.st_type := preg.st_type 215 | io.ex2mem.bits.pc1 := preg.pc1 216 | 217 | io.ex2is.bypass_rc1 := preg.rc1 218 | io.ex2is.bypass_val1 := alu1.io.out 219 | io.ex2is.bypass_en1 := valid_instr1 220 | 221 | io.ex2is.bypass_rc2 := preg.rc2 222 | io.ex2is.bypass_val2 := alu2.io.out 223 | io.ex2is.bypass_en2 := valid_instr2 224 | 225 | if(GlobalConfg.SIM){ 226 | import nagicore.unit.DPIC_PERF_PIPE 227 | val perf_pipe_ex = Module(new DPIC_PERF_PIPE()) 228 | perf_pipe_ex.io.clk := clock 229 | perf_pipe_ex.io.rst := reset 230 | perf_pipe_ex.io.id := 1.U 231 | perf_pipe_ex.io.invalid := !io.ex2mem.bits.valid1 232 | perf_pipe_ex.io.stall := io.is2ex.stall 233 | } 234 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/ID.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.unit.GPR 6 | import nagicore.unit.ALU_OP 7 | import nagicore.unit.BR_TYPE 8 | import nagicore.unit.BTBPredOutIO 9 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags, Decoder} 10 | import nagicore.GlobalConfg 11 | 12 | 13 | class id2isBits extends Bundle with Config{ 14 | val instr = UInt(XLEN.W) 15 | val pc = UInt(XLEN.W) 16 | val ra = UInt(GPR_LEN.W) 17 | val aluA_sel = CtrlFlags.aluASel() 18 | val rb = UInt(GPR_LEN.W) 19 | val aluB_sel = CtrlFlags.aluBSel() 20 | val alu_op = ALU_OP() 21 | val rc = UInt(GPR_LEN.W) 22 | val imm = UInt(XLEN.W) 23 | val br_type = BR_TYPE() 24 | val brpcAdd_sel = CtrlFlags.brpcAddSel() 25 | val ld_type = CtrlFlags.ldType() 26 | val st_type = CtrlFlags.stType() 27 | val instr_type = CtrlFlags.InstrType() 28 | val pc_refill = Bool() 29 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 30 | 31 | val valid = Bool() 32 | } 33 | 34 | class id2isIO extends Bundle{ 35 | val bits = Output(new id2isBits) 36 | val stall = Input(Bool()) 37 | } 38 | 39 | class ID extends Module with Config{ 40 | val io = IO(new Bundle{ 41 | val if2id = Flipped(new if2idIO) 42 | val id2is = new id2isIO 43 | }) 44 | 45 | // pipeline registers 46 | val preg = RegEnable(io.if2id.bits, !io.id2is.stall) 47 | 48 | io.id2is.bits.valid := preg.valid 49 | io.if2id.stall := io.id2is.stall 50 | 51 | val decoder = Module(new Decoder(XLEN, GPR_LEN)) 52 | decoder.io.instr := preg.instr 53 | 54 | io.id2is.bits.instr := preg.instr 55 | io.id2is.bits.pc := preg.pc 56 | 57 | // val gpr = Module(new GPR(XLEN, GPR_NUM, 2)) 58 | // gpr.io.wen := io.mem2id.gpr_wen 59 | // gpr.io.waddr := io.mem2id.gpr_wid 60 | // gpr.io.wdata := io.mem2id.gpr_wdata 61 | 62 | // if(GlobalConfg.SIM){ 63 | // import nagicore.unit.DPIC_UPDATE_GPR 64 | // val dpic_update_gpr = Module(new DPIC_UPDATE_GPR(XLEN, GPR_NUM)) 65 | // dpic_update_gpr.io.clk := clock 66 | // dpic_update_gpr.io.rst := reset 67 | // dpic_update_gpr.io.id := gpr.io.waddr 68 | // dpic_update_gpr.io.wen := gpr.io.wen 69 | // dpic_update_gpr.io.wdata := gpr.io.wdata 70 | // } 71 | 72 | // def bypass_unit(rx: UInt, gpr_rdata: UInt):UInt = { 73 | // Mux(rx === 0.U, 0.U, 74 | // Mux(io.ex2id.bypass_rc === rx && io.ex2id.bypass_en, io.ex2id.bypass_val, 75 | // Mux(io.mem2id.bypass_rc === rx && io.mem2id.bypass_en, io.mem2id.bypass_val, 76 | // gpr_rdata 77 | // ) 78 | // ) 79 | // ) 80 | // } 81 | 82 | // val ra = decoder.io.ra 83 | // gpr.io.raddr(0) := ra 84 | // // bypass 85 | io.id2is.bits.ra := decoder.io.ra 86 | io.id2is.bits.aluA_sel := decoder.io.aluA_sel 87 | 88 | // val rb = decoder.io.rb 89 | // gpr.io.raddr(1) := rb 90 | // // bypass 91 | // io.id2is.bits.rb_val := bypass_unit(rb, gpr.io.rdata(1)) 92 | 93 | io.id2is.bits.rb := decoder.io.rb 94 | io.id2is.bits.aluB_sel := decoder.io.aluB_sel 95 | io.id2is.bits.alu_op := decoder.io.alu_op 96 | io.id2is.bits.rc := decoder.io.rc 97 | io.id2is.bits.imm := decoder.io.imm 98 | io.id2is.bits.br_type := decoder.io.br_type 99 | io.id2is.bits.brpcAdd_sel := decoder.io.brpcAdd_sel 100 | io.id2is.bits.ld_type := decoder.io.ld_type 101 | io.id2is.bits.st_type := decoder.io.st_type 102 | io.id2is.bits.instr_type := decoder.io.instr_type 103 | io.id2is.bits.bpu_out := preg.bpu_out 104 | io.id2is.bits.pc_refill := preg.pc_refill 105 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/IF.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.bus.AXI4IO 6 | //import nagicore.unit.{InstrsBuff, InstrsBuffCacheBundle} 7 | import nagicore.unit.cache.Cache 8 | import nagicore.GlobalConfg 9 | import nagicore.unit.cache.CacheReplaceType 10 | import nagicore.unit.BTBPredOutIO 11 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags} 12 | import nagicore.bus.RamType 13 | 14 | 15 | class if2idBits extends Bundle with Config{ 16 | val pc = UInt(XLEN.W) 17 | val pc_refill = Bool() 18 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 19 | val instr = UInt(XLEN.W) 20 | 21 | val valid = Bool() 22 | } 23 | 24 | class if2idIO extends Bundle{ 25 | val bits = Output(new if2idBits) 26 | val stall = Input(Bool()) 27 | } 28 | 29 | class IF extends Module with Config{ 30 | val io = IO(new Bundle { 31 | val preif2if = Flipped(new preif2ifIO) 32 | val if2id = new if2idIO 33 | val isram = new AXI4IO(XLEN, XLEN) 34 | }) 35 | // 2-stages 1cyc cache 36 | val icache = Module(new Cache(XLEN, XLEN, ICACHE_WAYS, ICACHE_LINES, ICACHE_WORDS, () => new preif2ifBits(), CacheReplaceType.LRU, 37 | dataRamType = RamType.BRAM_1CYC, 38 | tagVRamType = RamType.BRAM_1CYC, 39 | debug_id = 0)) 40 | icache.io.axi <> io.isram 41 | 42 | icache.io.master.front.bits.addr := io.preif2if.bits.pc 43 | icache.io.master.front.bits.size := 2.U 44 | icache.io.master.front.bits.uncache := false.B 45 | icache.io.master.front.bits.wmask := 0.U 46 | icache.io.master.front.bits.valid := io.preif2if.bits.valid 47 | icache.io.master.front.bits.wdata := DontCare 48 | icache.io.master.front.bits.pipedata := io.preif2if.bits 49 | icache.io.master.back.stall := io.if2id.stall 50 | 51 | 52 | io.if2id.bits.instr := icache.io.master.back.bits.rdata 53 | io.if2id.bits.valid := icache.io.master.back.bits.valid 54 | io.if2id.bits.pc := icache.io.master.back.bits.pipedata.pc 55 | io.if2id.bits.pc_refill := icache.io.master.back.bits.pipedata.pc_refill 56 | io.if2id.bits.bpu_out := icache.io.master.back.bits.pipedata.bpu_out 57 | 58 | io.preif2if.stall := icache.io.master.front.stall 59 | 60 | if(GlobalConfg.SIM){ 61 | import nagicore.unit.DPIC_PERF_PIPE 62 | val perf_pipe_if = Module(new DPIC_PERF_PIPE()) 63 | perf_pipe_if.io.clk := clock 64 | perf_pipe_if.io.rst := reset 65 | perf_pipe_if.io.id := 0.U 66 | perf_pipe_if.io.invalid := !io.if2id.bits.valid 67 | perf_pipe_if.io.stall := io.preif2if.stall 68 | } 69 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/IS.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.unit.GPR 6 | import nagicore.unit.ALU_OP 7 | import nagicore.unit.BR_TYPE 8 | import nagicore.unit.BTBPredOutIO 9 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags, Decoder} 10 | import nagicore.GlobalConfg 11 | import nagicore.unit.RingBuff 12 | import nagicore.utils.Flags 13 | 14 | 15 | class is2exBits extends Bundle with Config{ 16 | val instr1 = UInt(XLEN.W) 17 | val pc1 = UInt(XLEN.W) 18 | val ra1_val = UInt(XLEN.W) 19 | val alu1A_sel = CtrlFlags.aluASel() 20 | val rb1_val = UInt(XLEN.W) 21 | val alu1B_sel = CtrlFlags.aluBSel() 22 | val alu1_op = ALU_OP() 23 | val rc1 = UInt(GPR_LEN.W) 24 | val imm1 = UInt(XLEN.W) 25 | val pc_refill1 = Bool() 26 | 27 | val br_type = BR_TYPE() 28 | val brpcAdd_sel = CtrlFlags.brpcAddSel() 29 | val ld_type = CtrlFlags.ldType() 30 | val st_type = CtrlFlags.stType() 31 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 32 | 33 | val valid1 = Bool() 34 | 35 | val instr2 = UInt(XLEN.W) 36 | val pc2 = UInt(XLEN.W) 37 | val ra2_val = UInt(XLEN.W) 38 | val alu2A_sel = CtrlFlags.aluASel() 39 | val rb2_val = UInt(XLEN.W) 40 | val alu2B_sel = CtrlFlags.aluBSel() 41 | val alu2_op = ALU_OP() 42 | val rc2 = UInt(GPR_LEN.W) 43 | val imm2 = UInt(XLEN.W) 44 | val pc_refill2 = Bool() 45 | 46 | val valid2 = Bool() 47 | } 48 | 49 | class is2exIO extends Bundle{ 50 | val bits = Output(new is2exBits) 51 | val stall = Input(Bool()) 52 | } 53 | 54 | class IS extends Module with Config{ 55 | val io = IO(new Bundle{ 56 | val id2is = Flipped(new id2isIO) 57 | val is2ex = new is2exIO 58 | 59 | val ex2is = Flipped(new ex2isIO) 60 | val mem2is = Flipped(new mem2isIO) 61 | }) 62 | 63 | val issue_buffer = Module(new RingBuff(()=>new id2isBits, 8, rchannel=2, debug_id=0)) 64 | 65 | issue_buffer.io.push := io.id2is.bits.valid 66 | issue_buffer.io.wdata := io.id2is.bits 67 | issue_buffer.io.clear := io.ex2is.clear_is 68 | 69 | val is1 = issue_buffer.io.rdatas(0) 70 | val is2 = issue_buffer.io.rdatas(1) 71 | val data_hazard = (is1.rc === is2.ra || is1.rc === is2.rb) && is1.rc =/= 0.U 72 | // 只双发is2是ALU类,且无数据冒险的指令 73 | val issue_double = 74 | Flags.is(is2.instr_type, CtrlFlags.InstrType.alu) && 75 | issue_buffer.io.rvalids(1) && 76 | !data_hazard 77 | 78 | issue_buffer.io.pop := !io.is2ex.stall && !issue_buffer.io.empty 79 | issue_buffer.io.popN := issue_double 80 | 81 | // io.id2is.stall := io.is2ex.stall 82 | io.id2is.stall := issue_buffer.io.full 83 | 84 | val gpr = Module(new GPR(XLEN, GPR_NUM, 4, 2)) 85 | gpr.io.wen(0) := io.mem2is.gpr_wen1 && (!io.mem2is.gpr_wen2 || io.mem2is.gpr_wid2 =/= io.mem2is.gpr_wid1) 86 | gpr.io.waddr(0) := io.mem2is.gpr_wid1 87 | gpr.io.wdata(0) := io.mem2is.gpr_wdata1 88 | 89 | gpr.io.wen(1) := io.mem2is.gpr_wen2 90 | gpr.io.waddr(1) := io.mem2is.gpr_wid2 91 | gpr.io.wdata(1) := io.mem2is.gpr_wdata2 92 | 93 | if(GlobalConfg.SIM){ 94 | import nagicore.unit.DPIC_UPDATE_GPR2 95 | val dpic_update_gpr = Module(new DPIC_UPDATE_GPR2(XLEN, GPR_NUM)) 96 | dpic_update_gpr.io.clk := clock 97 | dpic_update_gpr.io.rst := reset 98 | 99 | dpic_update_gpr.io.id1 := gpr.io.waddr(0) 100 | dpic_update_gpr.io.wen1 := gpr.io.wen(0) 101 | dpic_update_gpr.io.wdata1 := gpr.io.wdata(0) 102 | 103 | dpic_update_gpr.io.id2 := gpr.io.waddr(1) 104 | dpic_update_gpr.io.wen2 := gpr.io.wen(1) 105 | dpic_update_gpr.io.wdata2 := gpr.io.wdata(1) 106 | } 107 | 108 | def bypass_unit(rx: UInt, gpr_rdata: UInt):UInt = { 109 | Mux(rx === 0.U, 0.U, 110 | Mux(io.ex2is.bypass_rc2 === rx && io.ex2is.bypass_en2, io.ex2is.bypass_val2, 111 | Mux(io.ex2is.bypass_rc1 === rx && io.ex2is.bypass_en1, io.ex2is.bypass_val1, 112 | Mux(io.mem2is.bypass_rc2 === rx && io.mem2is.bypass_en2, io.mem2is.bypass_val2, 113 | Mux(io.mem2is.bypass_rc1 === rx && io.mem2is.bypass_en1, io.mem2is.bypass_val1, 114 | gpr_rdata 115 | ) 116 | ) 117 | ) 118 | ) 119 | ) 120 | } 121 | 122 | gpr.io.raddr(0) := is1.ra 123 | io.is2ex.bits.ra1_val := bypass_unit(is1.ra, gpr.io.rdata(0)) 124 | io.is2ex.bits.alu1A_sel := is1.aluA_sel 125 | gpr.io.raddr(1) := is1.rb 126 | io.is2ex.bits.rb1_val := bypass_unit(is1.rb, gpr.io.rdata(1)) 127 | io.is2ex.bits.alu1B_sel := is1.aluB_sel 128 | 129 | gpr.io.raddr(2) := is2.ra 130 | io.is2ex.bits.ra2_val := bypass_unit(is2.ra, gpr.io.rdata(2)) 131 | io.is2ex.bits.alu2A_sel := is2.aluA_sel 132 | gpr.io.raddr(3) := is2.rb 133 | io.is2ex.bits.rb2_val := bypass_unit(is2.rb, gpr.io.rdata(3)) 134 | io.is2ex.bits.alu2B_sel := is2.aluB_sel 135 | 136 | io.is2ex.bits.instr1 := is1.instr 137 | io.is2ex.bits.instr2 := is2.instr 138 | 139 | io.is2ex.bits.pc1 := is1.pc 140 | io.is2ex.bits.pc2 := is2.pc 141 | 142 | io.is2ex.bits.alu1_op := is1.alu_op 143 | io.is2ex.bits.alu2_op := is2.alu_op 144 | 145 | io.is2ex.bits.rc1 := is1.rc 146 | io.is2ex.bits.rc2 := is2.rc 147 | 148 | io.is2ex.bits.imm1 := is1.imm 149 | io.is2ex.bits.imm2 := is2.imm 150 | 151 | io.is2ex.bits.br_type := is1.br_type 152 | io.is2ex.bits.brpcAdd_sel := is1.brpcAdd_sel 153 | io.is2ex.bits.ld_type := is1.ld_type 154 | io.is2ex.bits.st_type := is1.st_type 155 | io.is2ex.bits.bpu_out := is1.bpu_out 156 | 157 | io.is2ex.bits.pc_refill1 := is1.pc_refill 158 | io.is2ex.bits.pc_refill2 := is2.pc_refill 159 | 160 | io.is2ex.bits.valid1 := issue_buffer.io.rvalids(0) 161 | io.is2ex.bits.valid2 := issue_buffer.io.rvalids(1) && issue_double 162 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/MEM.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.bus.AXI4IO 6 | import nagicore.unit.cache.CacheMini 7 | import nagicore.utils.Flags 8 | import nagicore.GlobalConfg 9 | import nagicore.unit.cache.CacheReplaceType 10 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags} 11 | import nagicore.unit.cache.UnCache 12 | 13 | class mem2isIO extends Bundle with Config{ 14 | val bypass_rc1 = Output(UInt(GPR_LEN.W)) 15 | val bypass_val1 = Output(UInt(XLEN.W)) 16 | val bypass_en1 = Output(Bool()) 17 | 18 | val bypass_rc2 = Output(UInt(GPR_LEN.W)) 19 | val bypass_val2 = Output(UInt(XLEN.W)) 20 | val bypass_en2 = Output(Bool()) 21 | 22 | val gpr_wid1 = Output(UInt(GPR_LEN.W)) 23 | val gpr_wdata1 = Output(UInt(XLEN.W)) 24 | val gpr_wen1 = Output(Bool()) 25 | 26 | val gpr_wid2 = Output(UInt(GPR_LEN.W)) 27 | val gpr_wdata2 = Output(UInt(XLEN.W)) 28 | val gpr_wen2 = Output(Bool()) 29 | } 30 | 31 | class MEM extends Module with Config{ 32 | val io = IO(new Bundle { 33 | val ex2mem = Flipped(new ex2memIO()) 34 | val mem2is = new mem2isIO() 35 | val dmem = new AXI4IO(XLEN, XLEN) 36 | val stall_all = Input(Bool()) 37 | }) 38 | 39 | class dcachePipeT extends Bundle { 40 | val instr = UInt(XLEN.W) 41 | val alu_out = UInt(XLEN.W) 42 | val rc = UInt(GPR_LEN.W) 43 | val ld_type = CtrlFlags.ldType() 44 | val pc = UInt(XLEN.W) 45 | val no_ldst = Bool() 46 | 47 | val valid = Bool() 48 | } 49 | 50 | // val dcache = Module(new CacheMini(XLEN, XLEN, 8, 8, 1)) 51 | val dcache = Module(new UnCache(XLEN, XLEN, WBUFF_LEN, 1)) 52 | 53 | // pipeline registers 54 | val preg = RegEnable(io.ex2mem.bits, !dcache.io.out.busy && !io.stall_all) 55 | io.ex2mem.stall := dcache.io.out.busy || io.stall_all 56 | 57 | dcache.io.axi <> io.dmem 58 | 59 | val addr = preg.alu1_out 60 | 61 | dcache.io.in.bits.addr := addr 62 | // dcache.io.in.bits.uncache := addr(31, 28) === "hb".U 63 | dcache.io.in.bits.we := !Flags.OHis(preg.st_type, CtrlFlags.stType.x) 64 | dcache.io.in.bits.wdata := Flags.onehotMux(preg.st_type, Seq( 65 | CtrlFlags.stType.x -> 0.U, 66 | CtrlFlags.stType.b -> Fill(XLEN/8, preg.rb1_val(7, 0)), 67 | CtrlFlags.stType.h -> Fill(XLEN/16, preg.rb1_val(15, 0)), 68 | CtrlFlags.stType.w -> preg.rb1_val(31, 0), 69 | )) 70 | dcache.io.in.bits.size := Flags.onehotMux(preg.st_type, Seq( 71 | CtrlFlags.stType.x -> 0.U, 72 | CtrlFlags.stType.b -> 0.U, 73 | CtrlFlags.stType.h -> 1.U, 74 | CtrlFlags.stType.w -> 2.U, 75 | )) | Flags.onehotMux(preg.ld_type, Seq( 76 | CtrlFlags.ldType.x -> 0.U, 77 | CtrlFlags.ldType.b -> 0.U, 78 | CtrlFlags.ldType.bu -> 0.U, 79 | CtrlFlags.ldType.h -> 1.U, 80 | CtrlFlags.ldType.hu -> 1.U, 81 | CtrlFlags.ldType.w -> 2.U, 82 | )) 83 | dcache.io.in.bits.wmask := Flags.onehotMux(preg.st_type, Seq( 84 | CtrlFlags.stType.x -> 0.U, 85 | CtrlFlags.stType.b -> ("b1".U< ("b11".U<<(addr(1)##0.U(1.W))), 87 | CtrlFlags.stType.w -> "b1111".U, 88 | )) 89 | // 不走Cache的指令 90 | val nolr = Flags.OHis(preg.ld_type, CtrlFlags.ldType.x) && Flags.OHis(preg.st_type, CtrlFlags.stType.x) 91 | dcache.io.in.req := preg.valid1 && !nolr && RegNext(!dcache.io.out.busy) && !io.stall_all 92 | 93 | val rdata_raw = dcache.io.out.rdata 94 | val wordData = if(XLEN == 64) Mux(addr(2), rdata_raw(63, 32), rdata_raw(31, 0)) 95 | else rdata_raw(31, 0) 96 | val halfData = Mux(addr(1), wordData(31, 16), wordData(15, 0)) 97 | val byteData = Mux(addr(0), halfData(15, 8), halfData(7, 0)) 98 | 99 | val rdata_mem = Flags.onehotMux(preg.ld_type, Seq( 100 | CtrlFlags.ldType.x -> (0.U).zext, 101 | CtrlFlags.ldType.b -> byteData.asSInt, 102 | CtrlFlags.ldType.bu -> byteData.zext, 103 | CtrlFlags.ldType.h -> halfData.asSInt, 104 | CtrlFlags.ldType.hu -> halfData.zext, 105 | CtrlFlags.ldType.w -> wordData.zext, 106 | )).asUInt 107 | 108 | val valid1 = preg.valid1 && !dcache.io.out.busy 109 | val valid2 = preg.valid2 && !dcache.io.out.busy 110 | 111 | io.mem2is.bypass_rc1 := preg.rc1 112 | io.mem2is.bypass_en1 := valid1 113 | val wb_data = Mux(Flags.OHis(preg.ld_type, CtrlFlags.ldType.x), preg.alu1_out, rdata_mem) 114 | io.mem2is.bypass_val1 := wb_data 115 | 116 | io.mem2is.bypass_rc2 := preg.rc2 117 | io.mem2is.bypass_en2 := preg.valid2 118 | io.mem2is.bypass_val2 := preg.alu2_out 119 | 120 | // when(nolr){ 121 | // io.mem2id.bypass_rc := Mux(preg.valid, preg.rc, 0.U) 122 | // io.mem2id.bypass_val := preg.alu_out 123 | // }.otherwise{ 124 | // io.mem2id.bypass_rc := Mux(preg.valid && preg.ld_type === Flags.bp(CtrlFlags.ldType.w), preg.rc, 0.U) 125 | // io.mem2id.bypass_val := dcache.io.out.rdata 126 | // } 127 | 128 | io.mem2is.gpr_wid1 := preg.rc1 129 | io.mem2is.gpr_wdata1 := wb_data 130 | io.mem2is.gpr_wen1 := valid1 131 | 132 | io.mem2is.gpr_wid2 := preg.rc2 133 | io.mem2is.gpr_wdata2 := preg.alu2_out 134 | io.mem2is.gpr_wen2 := valid2 135 | 136 | if(GlobalConfg.SIM){ 137 | import nagicore.unit.DPIC_TRACE_MEM 138 | val dpic_trace_mem_w = Module(new DPIC_TRACE_MEM(XLEN, XLEN)) 139 | dpic_trace_mem_w.io.clk := clock 140 | dpic_trace_mem_w.io.rst := reset 141 | dpic_trace_mem_w.io.valid := dcache.io.in.req && dcache.io.in.bits.wmask.orR 142 | dpic_trace_mem_w.io.addr := dcache.io.in.bits.addr 143 | dpic_trace_mem_w.io.size := dcache.io.in.bits.size 144 | dpic_trace_mem_w.io.data := dcache.io.in.bits.wdata 145 | dpic_trace_mem_w.io.wmask := dcache.io.in.bits.wmask 146 | 147 | import nagicore.unit.DPIC_PERF_PIPE 148 | val perf_pipe_dcache = Module(new DPIC_PERF_PIPE()) 149 | perf_pipe_dcache.io.clk := clock 150 | perf_pipe_dcache.io.rst := reset 151 | perf_pipe_dcache.io.id := 2.U 152 | perf_pipe_dcache.io.invalid := !valid1 153 | perf_pipe_dcache.io.stall := io.ex2mem.stall 154 | 155 | import nagicore.unit.DPIC_UPDATE_PC2 156 | val dpic_update_pc = Module(new DPIC_UPDATE_PC2(XLEN)) 157 | dpic_update_pc.io.clk := clock 158 | dpic_update_pc.io.rst := reset 159 | dpic_update_pc.io.pc1 := preg.pc1 160 | dpic_update_pc.io.pc2 := preg.pc2 161 | dpic_update_pc.io.wen1:= valid1 162 | dpic_update_pc.io.wen2:= valid2 163 | 164 | import nagicore.unit.DPIC_TRACE_MEM 165 | val dpic_trace_mem_r = Module(new DPIC_TRACE_MEM(XLEN, XLEN)) 166 | dpic_trace_mem_r.io.clk := clock 167 | dpic_trace_mem_r.io.rst := reset 168 | dpic_trace_mem_r.io.valid := valid1 && preg.ld_type =/= Flags.bp(CtrlFlags.ldType.x) 169 | dpic_trace_mem_r.io.addr := addr 170 | dpic_trace_mem_r.io.size := Flags.onehotMux(preg.ld_type, Seq( 171 | CtrlFlags.ldType.x -> 0.U, 172 | CtrlFlags.ldType.b -> 0.U, 173 | CtrlFlags.ldType.bu -> 0.U, 174 | CtrlFlags.ldType.h -> 1.U, 175 | CtrlFlags.ldType.hu -> 1.U, 176 | CtrlFlags.ldType.w -> 2.U, 177 | )) 178 | dpic_trace_mem_r.io.data := rdata_mem 179 | dpic_trace_mem_r.io.wmask := 0.U 180 | 181 | 182 | } 183 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/PREIF.scala: -------------------------------------------------------------------------------- 1 | package nagicore.loongarch.nscscc2024Dual.stages 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.unit.BTB 6 | import nagicore.unit.BTBPredOutIO 7 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags} 8 | import nagicore.GlobalConfg 9 | 10 | 11 | class preif2ifBits extends Bundle with Config{ 12 | val pc = UInt(XLEN.W) 13 | val pc_refill = Bool() 14 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN) 15 | 16 | val valid = Bool() 17 | } 18 | 19 | class preif2ifIO extends Bundle{ 20 | val bits = Output(new preif2ifBits) 21 | val stall = Input(Bool()) 22 | } 23 | 24 | class PREIF extends Module with Config{ 25 | val io = IO(new Bundle { 26 | val preif2if = new preif2ifIO 27 | val ex2preif = Flipped(new ex2preifIO) 28 | }) 29 | 30 | val nxt_pc = Wire(UInt(XLEN.W)) 31 | if(GlobalConfg.SIM){ 32 | dontTouch(nxt_pc) 33 | } 34 | val pc = RegEnable(nxt_pc, PC_START, !io.preif2if.stall || io.ex2preif.bpu_fail) 35 | val pc4 = pc+4.U 36 | 37 | val bpu = Module(new BTB(BTB_ENTRYS, XLEN, XLEN/2)) 38 | bpu.io.pred.in.pc := pc 39 | bpu.io.update := io.ex2preif.bpu_update 40 | 41 | nxt_pc := Mux(io.ex2preif.bpu_fail, io.ex2preif.br_real_pc, 42 | Mux(bpu.io.pred.out.taken, bpu.io.pred.out.target, 43 | pc4 44 | ) 45 | ) 46 | io.preif2if.bits.pc := pc 47 | io.preif2if.bits.bpu_out := bpu.io.pred.out 48 | io.preif2if.bits.pc_refill := RegEnable(io.ex2preif.bpu_fail, !io.preif2if.stall || io.ex2preif.bpu_fail) 49 | io.preif2if.bits.valid := !reset.asBool 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/ALU.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import nagicore.utils.Flags 7 | import nagicore.GlobalConfg 8 | 9 | object ALU_OP{ 10 | // val X = Value(0.U) 11 | // val ADD = Value(1.U) 12 | // val SUB = Value(2.U) 13 | // val AND = Value(4.U) 14 | // val OR = Value(8.U) 15 | // val XOR = Value(16.U) 16 | // val LT = Value(32.U) 17 | // val LTU = Value(64.U) 18 | // val SL = Value(128.U) 19 | // val SR = Value(256.U) 20 | // val SRA = Value(512.U) 21 | // val COPY_A = Value(1024.U) 22 | // val COPY_B = Value(2048.U) 23 | // val NOR = Value(4096.U) 24 | // val MUL = Value(8192.U) 25 | // val X, ADD, SUB, AND, OR, XOR, LT, LTU, SL, SR, SRA, COPY_A, COPY_B, NOR, 26 | // MUL, MULH, MULHU, DIV, DIVU, MOD, MODU = Value 27 | val X = "00000" 28 | val ADD = "00001" 29 | val SUB = "00010" 30 | val AND = "00011" 31 | val OR = "00100" 32 | val XOR = "00101" 33 | val LT = "00110" 34 | val LTU = "00111" 35 | val SL = "01000" 36 | val SR = "01001" 37 | val SRA = "01010" 38 | val COPY_A = "01011" 39 | val COPY_B = "01100" 40 | val NOR = "01101" 41 | val MUL = "01110" 42 | val MULH = "01111" 43 | val MULHU = "10000" 44 | val DIV = "10001" 45 | val DIVU = "10010" 46 | val MOD = "10011" 47 | val MODU = "10100" 48 | def apply() = UInt(X.length().W) 49 | } 50 | 51 | class ALUIO(dataBits: Int) extends Bundle{ 52 | val a = Input(UInt(dataBits.W)) 53 | val b = Input(UInt(dataBits.W)) 54 | val op = Input(ALU_OP()) 55 | val sum = Output(UInt(dataBits.W)) 56 | val out = Output(UInt(dataBits.W)) 57 | val valid = Input(Bool()) 58 | val busy = Output(Bool()) 59 | } 60 | 61 | class ALU(dataBits: Int, mulu_imp: MULU_IMP.MULU_IMP, divu_imp: DIVU_IMP.DIVU_IMP) extends Module { 62 | val io = IO(new ALUIO(dataBits)) 63 | 64 | val shamt = io.b(4, 0).asUInt 65 | val sum = io.a + io.b; 66 | val mins = (0.U ## io.a) + (1.U ## ~io.b) + 1.U; 67 | val isLT = Mux(io.a(dataBits-1)^io.b(dataBits-1), io.a(dataBits-1), mins(dataBits)) 68 | val isLTU = mins(dataBits) 69 | val isEQ = mins(dataBits-1, 0) === 0.U 70 | val or = io.a | io.b 71 | 72 | io.sum := sum 73 | import ALU_OP._ 74 | 75 | // val mulu_imp = if(GlobalConfg.SIM) MULU_IMP.synthesizer_1cyc else MULU_IMP.xsArrayMul 76 | 77 | val mulu = Module(new MULU(dataBits, mulu_imp)) 78 | mulu.io.a := io.a 79 | mulu.io.b := io.b 80 | mulu.io.op := io.op(1, 0) 81 | mulu.io.vaild := io.valid && Flags.CasesMux(io.op, Seq( 82 | MUL -> true.B, 83 | MULH -> true.B, 84 | MULHU -> true.B, 85 | ), false.B) 86 | 87 | val divu = Module(new DIVU(dataBits, divu_imp)) 88 | divu.io.a := io.a 89 | divu.io.b := io.b 90 | divu.io.signed := io.op(0) 91 | divu.io.valid := io.valid && Flags.CasesMux(io.op, Seq( 92 | DIV -> true.B, 93 | DIVU -> true.B, 94 | MOD -> true.B, 95 | MODU -> true.B, 96 | ), false.B) 97 | 98 | // must assert when mul or div type comes immediately or can not stall instrs from pre stage 99 | io.busy := mulu.io.busy || divu.io.busy || mulu.io.vaild || divu.io.valid 100 | 101 | 102 | io.out := Flags.CasesMux(io.op, Seq( 103 | ADD -> sum, 104 | SUB -> mins(dataBits-1, 0), 105 | SL -> (io.a << shamt), 106 | SR -> (io.a >> shamt), 107 | SRA -> (io.a.asSInt >> shamt.asUInt).asUInt, 108 | AND -> (io.a & io.b), 109 | OR -> or, 110 | XOR -> (io.a ^ io.b), 111 | LT -> isLT, 112 | LTU -> isLTU, 113 | COPY_A -> io.a, 114 | COPY_B -> io.b, 115 | NOR -> (~or), 116 | 117 | MUL -> mulu.io.out, 118 | MULH -> mulu.io.out, 119 | MULHU -> mulu.io.out, 120 | DIV -> divu.io.quo, 121 | DIVU -> divu.io.quo, 122 | MOD -> divu.io.rem, 123 | MODU -> divu.io.rem, 124 | 125 | // MUL -> (io.a.asSInt * io.b.asSInt)(31, 0).asUInt, 126 | // MULH -> (io.a.asSInt * io.b.asSInt)(63, 32).asUInt, 127 | // MULHU -> (io.a * io.b)(63, 32), 128 | // DIV -> (io.a.asSInt / io.b.asSInt)(31, 0).asUInt, 129 | // DIVU -> (io.a / io.b)(31, 0), 130 | // MOD -> (io.a.asSInt % io.b.asSInt)(31, 0).asUInt, 131 | // MODU -> (io.a % io.b)(31, 0), 132 | ), 0.U) 133 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/BPU.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.utils.Flags 6 | import nagicore.utils.isPowerOf2 7 | 8 | object BP_TYPE{ 9 | val dontcare = "??" 10 | val jump = "01" // unconditional jump 11 | val cond = "10" // conditional jump 12 | def apply() = UInt(2.W) 13 | } 14 | 15 | class BTBPredInIO(pcBits: Int) extends Bundle { 16 | val pc = UInt(pcBits.W) 17 | } 18 | 19 | class BTBPredOutIO(entryNum: Int, pcBits: Int) extends Bundle { 20 | val taken = Bool() 21 | val target = UInt(pcBits.W) 22 | val hit = Bool() 23 | val index = UInt(log2Ceil(entryNum).W) 24 | } 25 | 26 | class BTBUpdateIO(entryNum: Int, pcBits: Int) extends Bundle { 27 | val bp_type = BP_TYPE() 28 | val taken = Bool() // cond jump taken or not 29 | val pc = UInt(pcBits.W) 30 | val target = UInt(pcBits.W) 31 | val hit = Bool() 32 | val index = UInt(log2Ceil(entryNum).W) // hit btb index 33 | val valid = Bool() 34 | } 35 | 36 | class NoBTB(entryNum: Int, pcBits: Int, tagBits: Int, scInit: Int=0, instrBytes: Int=4) extends Module { 37 | require(pcBits >= tagBits && tagBits > 0 && instrBytes > 0) 38 | val io = IO(new Bundle { 39 | val pred = new Bundle { 40 | val in = Input(new BTBPredInIO(pcBits)) 41 | val out = Output(new BTBPredOutIO(entryNum, pcBits)) 42 | } 43 | val update = Input(new BTBUpdateIO(entryNum, pcBits)) 44 | }) 45 | io.pred.out := DontCare 46 | io.pred.out.taken := false.B 47 | } 48 | 49 | class BTB(entryNum: Int, pcBits: Int, tagBits: Int, scInit: Int=0, instrBytes: Int=4) extends Module { 50 | require(pcBits >= tagBits && tagBits > 0 && instrBytes > 0 && isPowerOf2(entryNum)) 51 | val io = IO(new Bundle { 52 | val pred = new Bundle { 53 | val in = Input(new BTBPredInIO(pcBits)) 54 | val out = Output(new BTBPredOutIO(entryNum, pcBits)) 55 | } 56 | val update = Input(new BTBUpdateIO(entryNum, pcBits)) 57 | }) 58 | class BTBTableEntry extends Bundle { 59 | // 两位饱和计数器 60 | // 00: strongly not taken, 01: not taken, 10: taken, 11: strongly taken 61 | val sc = UInt(2.W) 62 | val tag = UInt(tagBits.W) 63 | val target = UInt(pcBits.W) 64 | val valid = Bool() 65 | } 66 | def get_tag(pc: UInt): UInt = { 67 | pc(log2Ceil(instrBytes)+tagBits-1, log2Ceil(instrBytes)) 68 | } 69 | val table = RegInit(VecInit(Seq.fill(entryNum)({ 70 | val bundle = Wire(new BTBTableEntry()) 71 | bundle.sc := scInit.U(2.W) 72 | bundle.tag := 0.U 73 | bundle.target := 0.U 74 | bundle.valid := false.B 75 | bundle 76 | }))) 77 | 78 | val entry_p = RegInit(0.U(log2Ceil(entryNum).W)) 79 | 80 | val pred_tag = get_tag(io.pred.in.pc) 81 | val pred_hits = VecInit.tabulate(entryNum){ 82 | i => pred_tag === table(i).tag && table(i).valid 83 | } 84 | val pred_hit = pred_hits.reduceTree(_ || _) 85 | val pred_hit_index = OHToUInt(pred_hits) 86 | val pred_hit_entry = table(pred_hit_index) 87 | io.pred.out.taken := pred_hit && pred_hit_entry.sc(1) 88 | io.pred.out.target := pred_hit_entry.target 89 | io.pred.out.hit := pred_hit 90 | io.pred.out.index := pred_hit_index 91 | 92 | when(io.update.valid){ 93 | when(Flags.OHis(io.update.bp_type, BP_TYPE.jump)){ 94 | when(io.update.hit){ 95 | table(io.update.index).sc := 3.U 96 | table(io.update.index).target := io.update.target 97 | }.otherwise{ 98 | table(entry_p).sc := 3.U 99 | table(entry_p).target := io.update.target 100 | table(entry_p).tag := get_tag(io.update.pc) 101 | table(entry_p).valid := true.B 102 | entry_p := entry_p + 1.U 103 | } 104 | }.elsewhen(Flags.OHis(io.update.bp_type, BP_TYPE.cond)){ 105 | when(io.update.hit){ 106 | val sc = table(io.update.index).sc 107 | table(io.update.index).sc := Mux(io.update.taken, 108 | // 11 -> 11, 00 -> 01, 01 -> 10, 10 -> 11 109 | Mux(sc===3.U, 3.U, Cat(sc(1)|sc(0), ~sc(0))), 110 | // 00 -> 00, 01 -> 00, 10 -> 01, 11 -> 10 111 | Mux(sc===0.U, 0.U, Cat(sc(1)&sc(0), ~sc(0))) 112 | ) 113 | table(io.update.index).target := io.update.target 114 | }.otherwise{ 115 | table(entry_p).sc := 2.U // weekly taken 116 | table(entry_p).target := io.update.target 117 | table(entry_p).tag := get_tag(io.update.pc) 118 | table(entry_p).valid := true.B 119 | entry_p := entry_p + 1.U 120 | } 121 | } 122 | } 123 | 124 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/BRU.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import nagicore.utils.Flags 7 | 8 | object BR_TYPE{ 9 | val NEVER = "00000001" 10 | val EQ = "00000010" 11 | val NE = "00000100" 12 | val LT = "00001000" 13 | val LTU = "00010000" 14 | val GE = "00100000" 15 | val GEU = "01000000" 16 | val ALWAYS = "10000000" 17 | def apply() = UInt(NEVER.length.W) 18 | } 19 | 20 | class BRU_WITH_ALU_IO(dataBits: Int) extends Bundle{ 21 | val alu_out = Input(UInt(dataBits.W)) 22 | val br_type = Input(BR_TYPE()) 23 | val br_take = Output(Bool()) 24 | } 25 | 26 | class BRU_WITH_ALU(dataBits: Int) extends Module{ 27 | val io = IO(new BRU_WITH_ALU_IO(dataBits)) 28 | 29 | val eq = io.alu_out === 0.U 30 | 31 | import BR_TYPE._ 32 | 33 | 34 | io.br_take := Flags.onehotMux(io.br_type, Seq( 35 | NEVER -> false.B, 36 | EQ -> eq, 37 | NE -> !eq, 38 | LT -> io.alu_out(0), 39 | LTU -> io.alu_out(0), 40 | GE -> !io.alu_out(0), 41 | GEU -> !io.alu_out(0), 42 | ALWAYS -> true.B, 43 | )) 44 | 45 | // io.br_take := MuxLookup(io.br_type, false.B)(Seq( 46 | // EQ -> eq, 47 | // NE -> !eq, 48 | // LT -> io.alu_out(0), 49 | // LTU -> io.alu_out(0), 50 | // GE -> !io.alu_out(0), 51 | // GEU -> !io.alu_out(0), 52 | // )) 53 | } 54 | 55 | class BRU_SINGLE(dataBits: Int) extends Module{ 56 | val io = IO(new Bundle{ 57 | val a = Input(UInt(dataBits.W)) 58 | val b = Input(UInt(dataBits.W)) 59 | val br_type = Input(BR_TYPE()) 60 | val br_take = Output(Bool()) 61 | }) 62 | 63 | val mins = (0.U ## io.a) + (1.U ## ~io.b) + 1.U; 64 | val isLT = Mux(io.a(dataBits-1)^io.b(dataBits-1), io.a(dataBits-1), mins(dataBits)) 65 | val isLTU = mins(dataBits) 66 | val eq = mins === 0.U 67 | 68 | import BR_TYPE._ 69 | 70 | io.br_take := Flags.onehotMux(io.br_type, Seq( 71 | NEVER -> false.B, 72 | EQ -> eq, 73 | NE -> !eq, 74 | LT -> isLT, 75 | LTU -> isLTU, 76 | GE -> !isLT, 77 | GEU -> !isLTU, 78 | ALWAYS -> true.B, 79 | )) 80 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/DIVU.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.utils.Flags 6 | 7 | object DIVU_IMP extends Enumeration { 8 | type DIVU_IMP = Value 9 | val none, radix2 = Value 10 | } 11 | 12 | class DIVU(dataBits: Int, imp_way: DIVU_IMP.DIVU_IMP = DIVU_IMP.radix2) extends Module{ 13 | val io = IO(new Bundle{ 14 | val a = Input(UInt(dataBits.W)) 15 | val b = Input(UInt(dataBits.W)) 16 | val signed = Input(Bool()) 17 | val quo = Output(UInt(dataBits.W)) 18 | val rem = Output(UInt(dataBits.W)) 19 | val valid = Input(Bool()) 20 | val busy = Output(Bool()) 21 | }) 22 | 23 | imp_way match { 24 | case DIVU_IMP.radix2 => { 25 | /* ref: https://github.com/MaZirui2001/LAdataBitsR-pipeline-scala */ 26 | 27 | /* stage1: solve sign */ 28 | val sign_s = Mux(io.signed, io.a(dataBits-1) ^ io.b(dataBits-1), false.B) 29 | val sign_r = Mux(io.signed, io.a(dataBits-1), false.B) 30 | val src1 = Mux(io.signed && io.a(dataBits-1), ~io.a + 1.U, io.a) 31 | val src2 = Mux(io.signed && io.b(dataBits-1), ~io.b + 1.U, io.b) 32 | 33 | // get highest 1 in src1 34 | // TODO use log2 35 | val high_rev = PriorityEncoder(Reverse(src1)) 36 | 37 | val cnt = RegInit(0.U(6.W)) 38 | val stage1_fire = cnt === 0.U 39 | 40 | val src1_reg1 = RegEnable(src1, stage1_fire) 41 | val src2_reg1 = RegEnable(src2, stage1_fire) 42 | val signed_reg1 = RegEnable(io.signed, stage1_fire) 43 | val sign_s_reg1 = RegEnable(sign_s, stage1_fire) 44 | val sign_r_reg1 = RegEnable(sign_r, stage1_fire) 45 | val en_reg1 = RegEnable(io.valid, stage1_fire) 46 | val high_rev_reg1 = RegEnable(high_rev, stage1_fire) 47 | 48 | /* stage2+: div */ 49 | val stage2_init = en_reg1 && cnt === 0.U 50 | 51 | val src2_reg2 = RegEnable(src2_reg1, stage2_init) 52 | val signed_reg2 = RegEnable(signed_reg1, stage2_init) 53 | val sign_s_reg2 = RegEnable(sign_s_reg1, stage2_init) 54 | val sign_r_reg2 = RegEnable(sign_r_reg1, stage2_init) 55 | 56 | when(cnt =/= 0.U){ 57 | cnt := cnt - 1.U 58 | }.elsewhen(en_reg1){ 59 | cnt := (dataBits+1).U - high_rev_reg1 60 | } 61 | 62 | val quo_rem_reg = RegInit(0.U((dataBits*2+1).W)) 63 | val quo = quo_rem_reg(dataBits-1, 0) 64 | val rem = quo_rem_reg(dataBits*2-1, dataBits) 65 | when(cnt =/= 0.U){ 66 | val mins = rem - src2_reg2 67 | when(rem >= src2_reg2){ 68 | quo_rem_reg := mins(dataBits-1, 0) ## quo ## 1.U(1.W) 69 | }.otherwise{ 70 | quo_rem_reg := quo_rem_reg(dataBits*2-1, 0) ## 0.U(1.W) 71 | } 72 | }.elsewhen(en_reg1){ 73 | quo_rem_reg := (0.U((dataBits+1).W) ## src1_reg1) << high_rev_reg1 74 | } 75 | 76 | io.busy := cnt =/= 0.U || en_reg1 77 | 78 | io.quo := Mux(signed_reg2, 79 | Mux(sign_s_reg2, ~quo + 1.U, quo), 80 | quo 81 | ) 82 | 83 | val rem_res = quo_rem_reg(dataBits*2, dataBits+1) 84 | io.rem := Mux(signed_reg2, 85 | Mux(sign_r_reg2, ~rem_res + 1.U, rem_res), 86 | rem_res 87 | ) 88 | } 89 | case DIVU_IMP.none => { 90 | io.busy := false.B 91 | io.quo := DontCare 92 | io.rem := DontCare 93 | } 94 | } 95 | } 96 | 97 | /* 98 | class DIVU(dataBits: Int) extends Module{ 99 | val io = IO(new Bundle{ 100 | val a = Input(UInt(dataBits.W)) 101 | val b = Input(UInt(dataBits.W)) 102 | val signed = Input(Bool()) 103 | val quo = Output(UInt(dataBits.W)) 104 | val rem = Output(UInt(dataBits.W)) 105 | val valid = Input(Bool()) 106 | val busy = Output(Bool()) 107 | }) 108 | 109 | /* ref: https://github.com/MaZirui2001/LAdataBitsR-pipeline-scala */ 110 | 111 | /* stage1: solve sign */ 112 | val sign_s = Mux(io.signed, io.a(dataBits-1) ^ io.b(dataBits-1), false.B) 113 | val sign_r = Mux(io.signed, io.a(dataBits-1), false.B) 114 | val src1 = Mux(io.signed && io.a(dataBits-1), ~io.a + 1.U, io.a) 115 | val src2 = Mux(io.signed && io.b(dataBits-1), ~io.b + 1.U, io.b) 116 | 117 | // get highest 1 in src1 118 | val high_rev = PriorityEncoder(Reverse(src1)) 119 | 120 | val src1_reg1 = ShiftRegister(src1, 1, !io.busy) 121 | val src2_reg1 = ShiftRegister(src2, 1, !io.busy) 122 | val signed_reg1 = ShiftRegister(io.signed, 1, !io.busy) 123 | val sign_s_reg1 = ShiftRegister(sign_s, 1, !io.busy) 124 | val sign_r_reg1 = ShiftRegister(sign_r, 1, !io.busy) 125 | val en_reg1 = ShiftRegister(io.valid, 1, !io.busy) 126 | val high_rev_reg1 = ShiftRegister(high_rev, 1, !io.busy) 127 | 128 | /* stage2+: div */ 129 | val cnt = RegInit(0.U(6.W)) 130 | val stage2_init = en_reg1 && cnt === 0.U 131 | 132 | val src2_reg2 = RegEnable(src2_reg1, stage2_init) 133 | val signed_reg2 = RegEnable(signed_reg1, stage2_init) 134 | val sign_s_reg2 = RegEnable(sign_s_reg1, stage2_init) 135 | val sign_r_reg2 = RegEnable(sign_r_reg1, stage2_init) 136 | 137 | when(cnt =/= 0.U){ 138 | cnt := cnt - 1.U 139 | }.elsewhen(en_reg1){ 140 | cnt := (dataBits+1).U - high_rev_reg1 141 | } 142 | 143 | val quo_rem_reg = RegInit(0.U((dataBits*2+1).W)) 144 | val quo = quo_rem_reg(dataBits-1, 0) 145 | val rem = quo_rem_reg(dataBits*2-1, dataBits) 146 | when(cnt =/= 0.U){ 147 | val mins = rem - src2_reg2 148 | when(rem >= src2_reg2){ 149 | quo_rem_reg := mins ## quo ## 1.U(1.W) 150 | }.otherwise{ 151 | quo_rem_reg := quo_rem_reg(dataBits*2-1, 0) ## 0.U(1.W) 152 | } 153 | }.elsewhen(en_reg1){ 154 | quo_rem_reg := (0.U((dataBits+1).W) ## src1_reg1) << high_rev_reg1 155 | } 156 | 157 | !io.busy := cnt === 0.U 158 | 159 | io.quo := Mux(signed_reg2, 160 | Mux(sign_s_reg2, ~quo + 1.U, quo), 161 | quo 162 | ) 163 | 164 | io.rem := Mux(signed_reg2, 165 | Mux(sign_r_reg2, ~quo_rem_reg(dataBits*2, dataBits+1) + 1.U, quo_rem_reg(dataBits*2, dataBits+1)), 166 | quo_rem_reg(dataBits*2, dataBits+1) 167 | ) 168 | } 169 | */ -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/DPIC.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | import chisel3._ 3 | import chisel3.util._ 4 | import nagicore.bus.RamIO 5 | 6 | 7 | class DPIC_RAM_1CYC(addr_width: Int, data_width: Int) extends BlackBox(Map("ADDR_WIDTH" -> addr_width, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 8 | val io = IO(new Bundle { 9 | val clk = Input(Clock()) 10 | val rst = Input(Bool()) 11 | val en = Input(Bool()) 12 | val addr = Input(UInt(addr_width.W)) 13 | val re = Input(Bool()) 14 | val we = Input(Bool()) 15 | val wmask = Input(UInt((data_width/8).W)) 16 | val size = Input(UInt(2.W)) 17 | val wdata = Input(UInt(data_width.W)) 18 | val rdata = Output(UInt(data_width.W)) 19 | }) 20 | addResource("/sv/DPIC_RAM_1CYC.sv") 21 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 22 | } 23 | 24 | class DPIC_RAM_2CYC(addr_width: Int, data_width: Int) extends BlackBox(Map("ADDR_WIDTH" -> addr_width, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 25 | val io = IO(new Bundle { 26 | val clk = Input(Clock()) 27 | val rst = Input(Bool()) 28 | val en = Input(Bool()) 29 | val addr = Input(UInt(addr_width.W)) 30 | val re = Input(Bool()) 31 | val we = Input(Bool()) 32 | val wmask = Input(UInt((data_width/8).W)) 33 | val size = Input(UInt(2.W)) 34 | val wdata = Input(UInt(data_width.W)) 35 | val rdata = Output(UInt(data_width.W)) 36 | }) 37 | addResource("/sv/DPIC_RAM_2CYC.sv") 38 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 39 | } 40 | 41 | class DPIC_UPDATE_GPR(gpr_num: Int, data_width: Int) extends BlackBox(Map("GPR_NUM" -> gpr_num, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 42 | val io = IO(new Bundle{ 43 | val clk = Input(Clock()) 44 | val rst = Input(Bool()) 45 | val id = Input(UInt(log2Ceil(gpr_num).W)) 46 | val wen = Input(Bool()) 47 | val wdata = Input(UInt(data_width.W)) 48 | }) 49 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 50 | addResource("/sv/DPIC_UPDATE_GPR.sv") 51 | } 52 | 53 | class DPIC_UPDATE_GPR2(gpr_num: Int, data_width: Int) extends BlackBox(Map("GPR_NUM" -> gpr_num, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 54 | val io = IO(new Bundle{ 55 | val clk = Input(Clock()) 56 | val rst = Input(Bool()) 57 | val id1 = Input(UInt(log2Ceil(gpr_num).W)) 58 | val wen1 = Input(Bool()) 59 | val wdata1 = Input(UInt(data_width.W)) 60 | val id2 = Input(UInt(log2Ceil(gpr_num).W)) 61 | val wen2 = Input(Bool()) 62 | val wdata2 = Input(UInt(data_width.W)) 63 | }) 64 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 65 | addResource("/sv/DPIC_UPDATE_GPR2.sv") 66 | } 67 | 68 | class DPIC_UPDATE_PC(data_width: Int) extends BlackBox(Map("DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 69 | val io = IO(new Bundle{ 70 | val clk = Input(Clock()) 71 | val rst = Input(Bool()) 72 | val wen = Input(Bool()) 73 | val pc = Input(UInt(data_width.W)) 74 | }) 75 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 76 | addResource("/sv/DPIC_UPDATE_PC.sv") 77 | } 78 | 79 | class DPIC_UPDATE_PC2(data_width: Int) extends BlackBox(Map("DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 80 | val io = IO(new Bundle{ 81 | val clk = Input(Clock()) 82 | val rst = Input(Bool()) 83 | val wen1 = Input(Bool()) 84 | val wen2 = Input(Bool()) 85 | val pc1 = Input(UInt(data_width.W)) 86 | val pc2 = Input(UInt(data_width.W)) 87 | }) 88 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 89 | addResource("/sv/DPIC_UPDATE_PC2.sv") 90 | } 91 | 92 | class DPIC_TRACE_MEM(addr_width: Int, data_width: Int) extends BlackBox(Map("ADDR_WIDTH" -> addr_width, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{ 93 | val io = IO(new Bundle{ 94 | val clk = Input(Clock()) 95 | val rst = Input(Bool()) 96 | val valid = Input(Bool()) 97 | val addr = Input(UInt(addr_width.W)) 98 | val wmask = Input(UInt((data_width/8).W)) 99 | val size = Input(UInt(2.W)) 100 | val data = Input(UInt(data_width.W)) 101 | }) 102 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 103 | addResource("/sv/DPIC_TRACE_MEM.sv") 104 | } 105 | 106 | class DPIC_PERF_CACHE extends BlackBox with HasBlackBoxResource{ 107 | val io = IO(new Bundle{ 108 | val clk = Input(Clock()) 109 | val rst = Input(Bool()) 110 | val valid = Input(Bool()) 111 | val id = Input(UInt(8.W)) 112 | val access_type = Input(UInt(8.W)) 113 | }) 114 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 115 | addResource("/sv/DPIC_PERF_CACHE.sv") 116 | } 117 | 118 | class DPIC_PERF_BRU extends BlackBox with HasBlackBoxResource{ 119 | val io = IO(new Bundle{ 120 | val clk = Input(Clock()) 121 | val rst = Input(Bool()) 122 | val valid = Input(Bool()) 123 | val fail = Input(UInt(8.W)) 124 | }) 125 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 126 | addResource("/sv/DPIC_PERF_BRU.sv") 127 | } 128 | 129 | class DPIC_PERF_PIPE extends BlackBox with HasBlackBoxResource{ 130 | val io = IO(new Bundle{ 131 | val clk = Input(Clock()) 132 | val rst = Input(Bool()) 133 | val id = Input(UInt(8.W)) 134 | // 对下一流水级的有效指令计数 135 | val invalid = Input(Bool()) 136 | // 对上一流水级的阻塞计数 137 | val stall = Input(Bool()) 138 | }) 139 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 140 | addResource("/sv/DPIC_PERF_PIPE.sv") 141 | } 142 | 143 | class DPIC_PERF_BUFF extends BlackBox with HasBlackBoxResource{ 144 | val io = IO(new Bundle{ 145 | val clk = Input(Clock()) 146 | val rst = Input(Bool()) 147 | val id = Input(UInt(8.W)) 148 | val head = Input(UInt(8.W)) 149 | val tail = Input(UInt(8.W)) 150 | val full = Input(UInt(8.W)) 151 | val reload = Input(UInt(8.W)) 152 | }) 153 | addResource("/sv/DPIC_TYPES_DEFINE.sv") 154 | addResource("/sv/DPIC_PERF_BUFF.sv") 155 | } 156 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/GPR.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | class GPRIO(dataBits: Int, addrBits: Int, rchannel: Int, wchannel: Int) extends Bundle { 7 | val raddr = Input(Vec(rchannel, UInt(addrBits.W))) 8 | val rdata = Output(Vec(rchannel, UInt(dataBits.W))) 9 | val wen = Input(Vec(wchannel, Bool())) 10 | val waddr = Input(Vec(wchannel, UInt(addrBits.W))) 11 | val wdata = Input(Vec(wchannel, UInt(dataBits.W))) 12 | } 13 | 14 | class GPR(dataBits: Int, regNum: Int, rchannel: Int, wchannel: Int) extends Module { 15 | val io = IO(new GPRIO(dataBits, log2Up(regNum), rchannel, wchannel)) 16 | val regs = Reg(Vec(regNum, UInt(dataBits.W))) 17 | // val regs = Reg(VecInit.fill(regNum)(0.U(dataBits.W))) 18 | // val regs = Mem(regNum, UInt(dataBits.W)) 19 | for(i <- 0 until rchannel){ 20 | io.rdata(i) := Mux(io.raddr(i) =/= 0.U, regs(io.raddr(i)), 0.U) 21 | } 22 | for(i <- 0 until wchannel){ 23 | when(io.wen(i) && io.waddr(i) =/= 0.U){ 24 | regs(io.waddr(i)) := io.wdata(i) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/InstrsBuff.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.loongarch.nscscc2024.CtrlFlags.ldType.bu 6 | import nagicore.GlobalConfg 7 | import cache.CachePipedIO 8 | 9 | class InstrsBuffCacheBundle extends Bundle{ 10 | val new_trans = Bool() 11 | } 12 | 13 | class InstrsBuffIO(addrBits:Int, dataBits: Int, cacheBlockWords: Int) extends Bundle{ 14 | val in = Input(new Bundle { 15 | // 是否需要清空缓存并指定新的预取开始地址 16 | val new_trans = Bool() 17 | // 新的预取开始地址 18 | val trans_addr = UInt(addrBits.W) 19 | // 从缓存中读一个数据 20 | val fetch = Bool() 21 | }) 22 | val cache = Flipped(new CachePipedIO(addrBits, dataBits, cacheBlockWords, ()=>new InstrsBuffCacheBundle)) 23 | val out = Output(new Bundle { 24 | val busy = Output(Bool()) 25 | val instr = Output(UInt(dataBits.W)) 26 | }) 27 | } 28 | 29 | /** 30 | * 指令预取,下一个周期读出预取指令 31 | * 32 | * @param addrBits 33 | * @param dataBits 34 | * @param cacheBlockWords 每个Cache Block有多少个dataBits 35 | * @param blockLen 缓存多少个Cache Block,必须是2的幂次 36 | */ 37 | class InstrsBuff(addrBits:Int, dataBits: Int, cacheBlockWords: Int, blockLen: Int) extends Module{ 38 | require((blockLen&(blockLen-1))==0) 39 | val io = IO(new InstrsBuffIO(addrBits, dataBits, cacheBlockWords)) 40 | val buff = RegInit(VecInit(Seq.fill(blockLen*cacheBlockWords)(0.U(dataBits.W)))) 41 | val buff_head = RegInit(0.U(log2Up(blockLen*cacheBlockWords).W)) 42 | val buff_tail = RegInit(0.U(log2Up(blockLen*cacheBlockWords).W)) 43 | if(GlobalConfg.SIM){ 44 | dontTouch(buff_head) 45 | dontTouch(buff_tail) 46 | } 47 | val buff_valid = RegInit(VecInit.fill(blockLen*cacheBlockWords)(false.B)) 48 | val empty = !buff_valid(buff_head) 49 | val full = buff_valid(buff_tail + (cacheBlockWords-1).U) 50 | 51 | val cache_addr = RegInit(0.U(addrBits.W)) 52 | io.cache.front.bits.addr := cache_addr 53 | 54 | object State extends ChiselEnum { 55 | // 0 1 2 3 56 | val idle, wait_cache, wait_new_trans, continue_read = Value 57 | } 58 | 59 | val state = RegInit(State.idle) 60 | 61 | 62 | io.out.busy := io.in.new_trans || (state === State.wait_new_trans) || (state === State.wait_cache) || (state === State.continue_read && empty) 63 | io.out.instr := buff(buff_head) 64 | 65 | 66 | io.cache.front.bits.pipedata.new_trans := false.B 67 | io.cache.front.bits.valid := true.B 68 | 69 | when(!io.cache.front.stall && state =/= State.wait_cache){ 70 | cache_addr := cache_addr + (cacheBlockWords*dataBits/8).U 71 | } 72 | 73 | val new_trans_offset = RegInit(0.U(log2Up(cacheBlockWords).W)) 74 | val word_len = log2Ceil(cacheBlockWords) 75 | val byte_len = log2Ceil(dataBits/8) 76 | 77 | def cache_new_trans()={ 78 | val addr = io.in.trans_addr(addrBits-1, word_len+byte_len) ## 0.U((word_len+byte_len).W) 79 | cache_addr := addr 80 | io.cache.front.bits.addr := addr 81 | io.cache.front.bits.pipedata.new_trans := true.B 82 | io.cache.front.bits.valid := true.B 83 | state := State.wait_new_trans 84 | new_trans_offset := io.in.trans_addr(word_len+byte_len-1, byte_len) 85 | } 86 | 87 | switch(state){ 88 | is(State.idle){ 89 | io.cache.front.bits.valid := false.B 90 | when(io.in.new_trans){ 91 | cache_new_trans() 92 | } 93 | } 94 | is(State.wait_cache){ 95 | when(!io.cache.front.stall){ 96 | cache_new_trans() 97 | } 98 | } 99 | is(State.wait_new_trans){ 100 | when(io.cache.back.bits.pipedata_s2.new_trans && io.cache.back.bits.valid){ 101 | state := State.continue_read 102 | } 103 | } 104 | is(State.continue_read){ 105 | when(io.in.new_trans){ 106 | buff_head := 0.U 107 | buff_tail := 0.U 108 | buff_valid := VecInit.fill(blockLen*cacheBlockWords)(false.B) 109 | when(!io.cache.front.stall){ 110 | cache_new_trans() 111 | }otherwise{ 112 | // 如果Cache阻塞中,需要先等Cache空闲 113 | state := State.wait_cache 114 | } 115 | }.otherwise{ 116 | when(io.cache.back.bits.valid && !full){ 117 | for(i <- 0 until cacheBlockWords){ 118 | buff(buff_tail+i.U) := io.cache.back.bits.rline(i) 119 | buff_valid(buff_tail+i.U) := (i.U >= new_trans_offset) 120 | } 121 | buff_tail := buff_tail + cacheBlockWords.U 122 | buff_head := buff_head + new_trans_offset 123 | new_trans_offset := 0.U 124 | } 125 | when(io.in.fetch && !io.out.busy){ 126 | buff_head := buff_head + 1.U 127 | buff_valid(buff_head) := false.B 128 | } 129 | } 130 | } 131 | } 132 | 133 | io.cache.front.bits.size := log2Up(dataBits/8).U 134 | io.cache.front.bits.uncache := false.B 135 | io.cache.front.bits.wdata := DontCare 136 | io.cache.front.bits.wmask := 0.U 137 | io.cache.back.stall := full 138 | 139 | if(GlobalConfg.SIM){ 140 | val dpic_perf_instrs_buff = Module(new DPIC_PERF_BUFF) 141 | dpic_perf_instrs_buff.io.clk := clock 142 | dpic_perf_instrs_buff.io.rst := reset 143 | dpic_perf_instrs_buff.io.id := 0.U 144 | dpic_perf_instrs_buff.io.head := buff_head 145 | dpic_perf_instrs_buff.io.tail := buff_tail 146 | dpic_perf_instrs_buff.io.full := full 147 | dpic_perf_instrs_buff.io.reload := io.in.new_trans 148 | } 149 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/MIAU.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.bus._ 6 | 7 | /** 8 | * Max In Array Unit 9 | * 求数组中的最大值,为龙芯杯个人赛决赛设计 10 | * 11 | * @param addrBits 12 | * @param dataBits 13 | */ 14 | class MIAU(addrBits:Int, dataBits: Int, idBits: Int) extends Module{ 15 | val io = IO(new Bundle{ 16 | val cmd = Flipped(new AXI4IO(addrBits, dataBits, idBits)) 17 | val mem = new AXI4IO(addrBits, dataBits, idBits) 18 | }) 19 | 20 | val MXR = RegInit(0.U(dataBits.W)) 21 | val CMPR = RegInit(0.U(dataBits.W)) 22 | val FIR = RegInit(0.U(dataBits.W)) 23 | 24 | val raddr = Reg(UInt(addrBits.W)) 25 | val rid = Reg(UInt(idBits.W)) 26 | val rlen = Reg(UInt(8.W)) 27 | 28 | val rs_idle :: rs_r :: Nil = Enum(2) 29 | val rs = RegInit(rs_idle) 30 | 31 | when(io.cmd.ar.fire){ 32 | raddr := io.cmd.ar.bits.addr 33 | rid := io.cmd.ar.bits.id 34 | rlen := io.cmd.ar.bits.len 35 | rs := rs_r 36 | } 37 | 38 | when(io.cmd.r.fire){ 39 | raddr := raddr + (dataBits/8).U 40 | when(rlen === 0.U){ 41 | rs := rs_idle 42 | }otherwise{ 43 | rlen := rlen - 1.U 44 | } 45 | } 46 | io.cmd.ar.ready := rs === rs_idle 47 | io.cmd.r.valid := rs === rs_r 48 | io.cmd.r.bits.id := rid 49 | io.cmd.r.bits.last := rlen === 0.U 50 | io.cmd.r.bits.resp := 0.U 51 | io.cmd.r.bits.data := FIR 52 | 53 | io.cmd.aw <> DontCare 54 | io.cmd.w <> DontCare 55 | io.cmd.b <> DontCare 56 | 57 | val axi_w_agent = Module(new AXI4WriteAgent(addrBits, dataBits, 1)) 58 | axi_w_agent.io.axi.aw <> io.mem.aw 59 | axi_w_agent.io.axi.w <> io.mem.w 60 | axi_w_agent.io.axi.b <> io.mem.b 61 | axi_w_agent.io.cmd.in <> DontCare 62 | axi_w_agent.io.cmd.in.req := false.B 63 | 64 | object State extends ChiselEnum { 65 | val idle = Value(1.U) 66 | val read = Value(2.U) 67 | val cmp = Value(4.U) 68 | val write = Value(8.U) 69 | val end = Value(16.U) 70 | } 71 | 72 | val state = RegInit(State.idle) 73 | 74 | val mem_addr = RegInit("h80400000".U(dataBits.W)) 75 | 76 | val axi_r_agent = Module(new AXI4ReadAgent(addrBits, dataBits, 1)) 77 | axi_r_agent.io.axi.ar <> io.mem.ar 78 | axi_r_agent.io.axi.r <> io.mem.r 79 | axi_r_agent.io.cmd.in.addr := mem_addr 80 | axi_r_agent.io.cmd.in.len := 0.U 81 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U 82 | axi_r_agent.io.cmd.in.req := false.B 83 | 84 | when(state === State.idle){ 85 | state := State.read 86 | axi_r_agent.io.cmd.in.req := true.B 87 | } 88 | 89 | when(state === State.read && axi_r_agent.io.cmd.out.ready){ 90 | // printf(cf"state read at ${mem_addr}\n") 91 | state := State.cmp 92 | CMPR := axi_r_agent.io.cmd.out.rdata 93 | mem_addr := mem_addr + (dataBits/8).U 94 | } 95 | when(state === State.cmp){ 96 | when(CMPR > MXR){ 97 | MXR := CMPR 98 | } 99 | when(mem_addr === "h80700000".U){ 100 | state := State.write 101 | }.otherwise{ 102 | state := State.read 103 | axi_r_agent.io.cmd.in.req := true.B 104 | } 105 | } 106 | when(state === State.write){ 107 | printf(cf"mia finish\n") 108 | axi_w_agent.io.cmd.in.req := true.B 109 | axi_w_agent.io.cmd.in.addr := "h80700000".U 110 | axi_w_agent.io.cmd.in.len := 0.U 111 | axi_w_agent.io.cmd.in.size := log2Up(dataBits).U 112 | axi_w_agent.io.cmd.in.wdata(0) := MXR 113 | axi_w_agent.io.cmd.in.wmask(0) := Fill(dataBits/4, "b1".U) 114 | state := State.end 115 | } 116 | when(state === State.end && axi_w_agent.io.cmd.out.ready){ 117 | FIR := 233.U 118 | } 119 | 120 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/MULU.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.utils._ 6 | import nagicore.GlobalConfg 7 | 8 | object MULU_IMP extends Enumeration { 9 | type MULU_IMP = Value 10 | val none, synthesizer_1cyc, oneBitShift, xsArrayMul, MultiplierIP, synthesizer_DSP = Value 11 | } 12 | 13 | object MULU_OP{ 14 | val MUL = ALU_OP.MUL.takeRight(2) 15 | val MULH = ALU_OP.MULH.takeRight(2) 16 | val MULHU = ALU_OP.MULHU.takeRight(2) 17 | } 18 | 19 | /** 20 | * 乘法器 21 | * 22 | * @param dataBits 位宽 23 | * @param imp_way 实现方法,有3种实现方式,分别为: 24 | * synthesizer: 直接使用*,依靠综合器生成单周期乘法器 25 | * oneBitShift: 一位移位乘法实现 26 | * xsArrayMul: 使用香山的三周期ArrayMulDataModule实现 27 | * @note 注意valid信号只拉高一周期即可,busy在下一个周期开始拉高,直到乘法运算结束时拉低 28 | */ 29 | class MULU(dataBits: Int, imp_way: MULU_IMP.MULU_IMP = MULU_IMP.synthesizer_1cyc) extends Module{ 30 | val io = IO(new Bundle{ 31 | val a = Input(UInt(dataBits.W)) 32 | val b = Input(UInt(dataBits.W)) 33 | val op = Input(UInt(2.W)) 34 | val out = Output(UInt(dataBits.W)) 35 | val vaild = Input(Bool()) 36 | val busy = Output(Bool()) 37 | }) 38 | if(GlobalConfg.SIM){ 39 | imp_way match { 40 | case MULU_IMP.xsArrayMul | MULU_IMP.MultiplierIP | MULU_IMP.synthesizer_DSP => { 41 | io.busy := io.vaild || RegNext(io.vaild) 42 | } 43 | case _ => { 44 | io.busy := false.B 45 | } 46 | } 47 | io.out := Flags.CasesMux(io.op, Seq( 48 | MULU_OP.MUL -> (io.a.asSInt * io.b.asSInt)(31, 0).asUInt, 49 | MULU_OP.MULH -> (io.a.asSInt * io.b.asSInt)(63, 32).asUInt, 50 | MULU_OP.MULHU -> (io.a * io.b)(63, 32), 51 | ), 0.U) 52 | }else{ 53 | imp_way match { 54 | case MULU_IMP.xsArrayMul => { 55 | import nagicore.unit.ip.Xiangshan.ArrayMulDataModule 56 | val arrayMul = Module(new ArrayMulDataModule(dataBits+1)) 57 | arrayMul.io.a := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.a(dataBits-1)) ## io.a 58 | arrayMul.io.b := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.b(dataBits-1)) ## io.b 59 | val valid_reg1 = RegNext(io.vaild) 60 | arrayMul.io.regEnables(0) := io.vaild 61 | arrayMul.io.regEnables(1) := valid_reg1 62 | // val res = arrayMul.io.result 63 | val res = RegNext(arrayMul.io.result) 64 | io.out := Flags.CasesMux(io.op, Seq( 65 | MULU_OP.MUL -> res(31, 0), 66 | MULU_OP.MULH -> SignExt(res(63, 32), dataBits), 67 | MULU_OP.MULHU -> res(63, 32), 68 | ), 0.U) 69 | io.busy := io.vaild || valid_reg1 70 | } 71 | case MULU_IMP.synthesizer_DSP => { 72 | def DSPInPipe[T <: Data](a: T) = RegNext(a) 73 | def DSPOutPipe[T <: Data](a: T) = RegNext(a) 74 | val a = Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.a(dataBits-1)) ## io.a 75 | val b = Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.b(dataBits-1)) ## io.b 76 | val res = DSPOutPipe(DSPInPipe(a) * DSPInPipe(b)) 77 | io.out := Flags.CasesMux(io.op, Seq( 78 | MULU_OP.MUL -> res(31, 0), 79 | MULU_OP.MULH -> SignExt(res(63, 32), dataBits), 80 | MULU_OP.MULHU -> res(63, 32), 81 | ), 0.U) 82 | val busy = RegInit(false.B) 83 | when(io.vaild && !busy){ busy := true.B } 84 | val ready = DSPOutPipe(DSPInPipe(io.vaild)) 85 | when(ready){ busy := false.B } 86 | io.busy := busy 87 | } 88 | case MULU_IMP.MultiplierIP => { 89 | Predef.println(s"Xilinx Multiplier IP mult_${dataBits+1}_unsigned_2stages needed") 90 | class MultiplierIP extends BlackBox{ 91 | override val desiredName = s"mult_${dataBits+1}_unsigned_2stages" 92 | val io = IO(new Bundle { 93 | val CLK = Input(Clock()) 94 | val A = Input(UInt((dataBits+1).W)) 95 | val B = Input(UInt((dataBits+1).W)) 96 | val P = Output(UInt(((dataBits+1)*2).W)) 97 | }) 98 | } 99 | val ip = Module(new MultiplierIP) 100 | ip.io.CLK := clock 101 | ip.io.A := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.a(dataBits-1)) ## io.a 102 | ip.io.B := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.b(dataBits-1)) ## io.b 103 | val res = ip.io.P 104 | io.out := Flags.CasesMux(io.op, Seq( 105 | MULU_OP.MUL -> res(31, 0), 106 | MULU_OP.MULH -> SignExt(res(63, 32), dataBits), 107 | MULU_OP.MULHU -> res(63, 32), 108 | ), 0.U) 109 | io.busy := io.vaild || RegNext(io.vaild) || RegNext(RegNext(io.vaild)) 110 | } 111 | case MULU_IMP.none => { 112 | io.busy := false.B 113 | io.out := DontCare 114 | } 115 | case _ => { 116 | io.busy := false.B 117 | io.out := Flags.CasesMux(io.op, Seq( 118 | MULU_OP.MUL -> (io.a.asSInt * io.b.asSInt)(31, 0).asUInt, 119 | MULU_OP.MULH -> (io.a.asSInt * io.b.asSInt)(63, 32).asUInt, 120 | MULU_OP.MULHU -> (io.a * io.b)(63, 32), 121 | ), 0.U) 122 | } 123 | } 124 | } 125 | 126 | // if(imp_way == MULU_IMP.synthesizer){ 127 | 128 | // }else{ 129 | // // TODO 130 | // /* 131 | // 原理: 132 | // n位数和n位数的乘法,Booth乘法将其转换为n/2个2*n位数(即部分积)相加, 133 | // 而华莱士树再将其转换为2*n个n/2 bits华莱士树,最终转换成两个2*n位数的加法, 134 | // 其中,每个n/2 bits华莱士树,有n/2个一位数相加, 135 | // */ 136 | // // x * y 137 | // def booth2(x: UInt, y: UInt, n: Int, yi: Int) = { 138 | // assert(yi>=1&&yi<=y.getWidth-1) 139 | // val t = WireDefault(x) 140 | // switch(y(yi+1,yi-1)){ 141 | // is(0.U){ t := 0.U } 142 | // is(3.U){ t := x(n-2, 0) ## 0.U(1.W) } 143 | // is(4.U){ t := x(n-2, 0) ## 0.U(1.W) } 144 | // is(7.U){ t := 0.U } 145 | // } 146 | // Mux(y(yi+1), ~t + 1.U, t) 147 | // } 148 | // // Carry-Save Adder 149 | // def CSA(a: UInt, b: UInt, cin: UInt) = { 150 | // assert(a.getWidth==b.getWidth&&b.getWidth==cin.getWidth) 151 | // val res = Vec(2, UInt(a.getWidth.W)) 152 | // val a_xor_b = a ^ b 153 | // val a_and_b = a & b 154 | // val sum = a_xor_b ^ cin 155 | // val cout = a_and_b | (a_xor_b & cin) 156 | // res(0) := sum 157 | // res(1) := cout 158 | // res 159 | // } 160 | // } 161 | 162 | } 163 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/RingBuff.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import nagicore.GlobalConfg 6 | 7 | class RingBuffIO[T <: Bundle](dataT: ()=> T, rchannel: Int) extends Bundle{ 8 | val full = Output(Bool()) 9 | val empty = Output(Bool()) 10 | 11 | val push = Input(Bool()) 12 | val wdata = Input(dataT()) 13 | val pop = Input(Bool()) 14 | val popN = Input(UInt(log2Up(rchannel).W)) 15 | val rdatas = Output(Vec(rchannel, dataT())) 16 | val rvalids = Output(Vec(rchannel, Bool())) 17 | val clear = Input(Bool()) 18 | } 19 | 20 | /** 21 | * 环形队列,多端口读,单端口写,读出多个时,需要拉高pop,并且传入popN指定读并弹出的数据个数 22 | * 注意,读出数据个数(popN+1)需要根据rvalids判断,不能超过有效的数据个数,模块不做检查 23 | * 24 | * @param dataT 25 | * @param len 26 | * @param rchannel 27 | * @param wchannel 28 | * @param id 29 | */ 30 | class RingBuff[T <: Bundle](dataT: ()=> T, len: Int, rchannel: Int, debug_id: Int) extends Module{ 31 | require((len&(len-1))==0) 32 | val io = IO(new RingBuffIO(dataT, rchannel)) 33 | val buff = Reg(Vec(len, dataT())) 34 | val buff_head = RegInit(0.U(log2Up(len).W)) 35 | val buff_tail = RegInit(0.U(log2Up(len).W)) 36 | val buff_valid = RegInit(VecInit.fill(len)(false.B)) 37 | val empty = !buff_valid(buff_head) 38 | val full = buff_valid(buff_tail) 39 | 40 | io.empty := empty 41 | io.full := full 42 | for(i <- 0 until rchannel){ 43 | io.rdatas(i) := buff(buff_head+i.U) 44 | io.rvalids(i) := buff_valid(buff_head+i.U) 45 | } 46 | 47 | when(io.clear){ 48 | buff_head := 0.U 49 | buff_tail := 0.U 50 | for(i <- 0 until len) 51 | buff_valid(i) := false.B 52 | }.otherwise{ 53 | when(io.push && !full){ 54 | buff_tail := buff_tail + 1.U 55 | buff(buff_tail) := io.wdata 56 | buff_valid(buff_tail) := true.B 57 | } 58 | 59 | when(io.pop){ 60 | for(i <- 0 until rchannel){ 61 | when(io.popN === i.U){ 62 | buff_head := buff_head + (i+1).U 63 | for(j <- 0 to i){ 64 | buff_valid(buff_head + j.U) := false.B 65 | } 66 | } 67 | } 68 | } 69 | } 70 | 71 | 72 | 73 | if(GlobalConfg.SIM){ 74 | val dpic_perf_instrs_buff = Module(new DPIC_PERF_BUFF) 75 | dpic_perf_instrs_buff.io.clk := clock 76 | dpic_perf_instrs_buff.io.rst := reset 77 | dpic_perf_instrs_buff.io.id := debug_id.U 78 | dpic_perf_instrs_buff.io.head := buff_head 79 | dpic_perf_instrs_buff.io.tail := buff_tail 80 | dpic_perf_instrs_buff.io.full := full 81 | dpic_perf_instrs_buff.io.reload := io.clear 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/cache/CacheMini.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit.cache 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import nagicore.bus._ 7 | import chisel3.util.random.LFSR 8 | import nagicore.utils.isPowerOf2 9 | import nagicore.GlobalConfg 10 | import nagicore.unit.RingBuff 11 | /** 12 | * 有个写缓存队列(write buffer),每次写的时候,直接一拍写到缓存队列里面,不阻塞前面的流水线,后台调AXI4自己慢慢写去 13 | * 14 | * @note busy拉高表示请求阻塞前级流水 15 | * 16 | * @param addrBits 17 | * @param dataBits 18 | * @param writeBuffLen 写缓存队列大小 19 | */ 20 | class CacheMini(addrBits:Int, dataBits: Int, writeBuffLen: Int, L0Size: Int, debug_id: Int) extends Module{ 21 | require(isPowerOf2(writeBuffLen)) 22 | val io = IO(new Bundle{ 23 | val axi = new AXI4IO(addrBits, dataBits) 24 | val in = Input(new Bundle { 25 | val req = Bool() 26 | val bits = new Bundle { 27 | val addr = UInt(addrBits.W) 28 | val size = UInt(2.W) 29 | val we = Bool() 30 | val wmask = UInt((dataBits/8).W) 31 | val wdata = UInt(dataBits.W) 32 | val uncache = Bool() 33 | } 34 | }) 35 | val out = Output(new Bundle { 36 | val busy = Bool() 37 | val rdata = UInt(dataBits.W) 38 | }) 39 | }) 40 | class WriteInfo extends Bundle{ 41 | val addr = UInt(addrBits.W) 42 | val size = UInt(2.W) 43 | val wmask = UInt((dataBits/8).W) 44 | val wdata = UInt(dataBits.W) 45 | } 46 | val write_buff = Module(new RingBuff(()=>new WriteInfo, writeBuffLen, 1, debug_id)) 47 | write_buff.io.push := false.B 48 | write_buff.io.pop := false.B 49 | write_buff.io.wdata := DontCare 50 | write_buff.io.clear := false.B 51 | 52 | val axi_w_agent = Module(new AXI4WriteAgent(addrBits, dataBits, 1)) 53 | axi_w_agent.io.axi.aw <> io.axi.aw 54 | axi_w_agent.io.axi.w <> io.axi.w 55 | axi_w_agent.io.axi.b <> io.axi.b 56 | axi_w_agent.io.cmd.in <> DontCare 57 | axi_w_agent.io.cmd.in.req := false.B 58 | 59 | val axi_r_agent = Module(new AXI4ReadAgent(addrBits, dataBits, 1)) 60 | axi_r_agent.io.axi.ar <> io.axi.ar 61 | axi_r_agent.io.axi.r <> io.axi.r 62 | axi_r_agent.io.cmd.in <> DontCare 63 | axi_r_agent.io.cmd.in.req := false.B 64 | 65 | val cmd_reg = Reg(io.in.bits.cloneType) 66 | 67 | val rdata_reg = Reg(UInt(dataBits.W)) 68 | io.out.rdata := rdata_reg 69 | 70 | 71 | object State extends ChiselEnum { 72 | val idle = Value(1.U) 73 | val waitWriteBuff = Value(2.U) 74 | val waitReadReady = Value(4.U) 75 | val waitRead = Value(8.U) 76 | } 77 | val state = RegInit(State.idle) 78 | 79 | io.out.busy := state =/= State.idle // ... 80 | 81 | class L0Data extends Bundle{ 82 | val addr = UInt(addrBits.W) 83 | val data = UInt(dataBits.W) 84 | val valid = Bool() 85 | } 86 | val L0 = RegInit(VecInit(Seq.fill(L0Size){ 87 | val bundle = Wire(new L0Data) 88 | bundle := DontCare 89 | bundle.valid := false.B 90 | bundle 91 | })) 92 | // val L0 = Vec(L0Size, RegInit({ 93 | // val bundle = Wire(new L0Data) 94 | // bundle := DontCare 95 | // bundle.valid := false.B 96 | // bundle 97 | // })) 98 | val hits = VecInit.tabulate(L0Size)(i =>{ 99 | L0(i).addr === io.in.bits.addr && L0(i).valid 100 | }) 101 | val hit = hits.reduceTree(_||_) && !io.in.bits.uncache 102 | val hit_data = L0(PriorityEncoder(hits)).data 103 | // 将uncache的数据写入到L0中 104 | def updateL0(addr: UInt, data: UInt) = { 105 | for(i <- 0 until (L0Size-1)){ 106 | L0(i+1) := L0(i) 107 | } 108 | L0(0).addr := addr 109 | L0(0).data := data 110 | L0(0).valid := true.B 111 | } 112 | 113 | val ready_read = axi_r_agent.io.cmd.out.ready && write_buff.io.empty && axi_w_agent.io.cmd.out.ready 114 | 115 | if(GlobalConfg.SIM){ 116 | import nagicore.unit.DPIC_PERF_CACHE 117 | val dpic_perf_cache = Module(new DPIC_PERF_CACHE) 118 | dpic_perf_cache.io.clk := clock 119 | dpic_perf_cache.io.rst := reset 120 | dpic_perf_cache.io.valid := io.in.req 121 | dpic_perf_cache.io.id := debug_id.U 122 | dpic_perf_cache.io.access_type := Cat(0.U, !io.out.busy) 123 | } 124 | 125 | switch(state){ 126 | is(State.idle){ 127 | when(io.in.req){ 128 | when(io.in.bits.we){ 129 | // Write 130 | when(write_buff.io.full){ 131 | state := State.waitWriteBuff 132 | io.out.busy := true.B 133 | 134 | cmd_reg := io.in.bits 135 | }.otherwise{ 136 | write_buff.io.push := true.B 137 | write_buff.io.wdata := io.in.bits 138 | } 139 | 140 | when(!io.in.bits.uncache){ 141 | updateL0(io.in.bits.addr, io.in.bits.wdata) 142 | } 143 | }.otherwise{ 144 | // Read 145 | when(hit){ 146 | io.out.rdata := hit_data 147 | io.out.busy := false.B 148 | }.elsewhen(ready_read){ 149 | axi_r_agent.io.cmd.in.req := true.B 150 | axi_r_agent.io.cmd.in.addr := io.in.bits.addr 151 | axi_r_agent.io.cmd.in.len := 0.U 152 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U 153 | 154 | state := State.waitRead 155 | io.out.busy := true.B 156 | }.otherwise{ 157 | cmd_reg := io.in.bits 158 | 159 | state := State.waitReadReady 160 | io.out.busy := true.B 161 | } 162 | 163 | } 164 | } 165 | } 166 | is(State.waitWriteBuff){ 167 | when(!write_buff.io.full){ 168 | write_buff.io.push := true.B 169 | write_buff.io.wdata := cmd_reg 170 | 171 | io.out.busy := false.B 172 | state := State.idle 173 | } 174 | } 175 | is(State.waitReadReady){ 176 | when(ready_read){ 177 | axi_r_agent.io.cmd.in.req := true.B 178 | axi_r_agent.io.cmd.in.addr := cmd_reg.addr 179 | axi_r_agent.io.cmd.in.len := 0.U 180 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U 181 | 182 | state := State.waitRead 183 | } 184 | } 185 | is(State.waitRead){ 186 | when(axi_r_agent.io.cmd.out.ready){ 187 | rdata_reg := axi_r_agent.io.cmd.out.rdata 188 | assert(axi_r_agent.io.cmd.out.resp === 0.U) 189 | when(!io.in.bits.uncache){ 190 | updateL0(io.in.bits.addr, axi_r_agent.io.cmd.out.rdata) 191 | } 192 | state := State.idle 193 | } 194 | } 195 | } 196 | 197 | when(!write_buff.io.empty){ 198 | when(axi_w_agent.io.cmd.out.ready){ 199 | axi_w_agent.io.cmd.in.req := true.B 200 | axi_w_agent.io.cmd.in.addr := write_buff.io.rdatas(0).addr 201 | axi_w_agent.io.cmd.in.len := 0.U 202 | axi_w_agent.io.cmd.in.size := write_buff.io.rdatas(0).size 203 | axi_w_agent.io.cmd.in.wdata(0) := write_buff.io.rdatas(0).wdata 204 | axi_w_agent.io.cmd.in.wmask(0) := write_buff.io.rdatas(0).wmask 205 | 206 | write_buff.io.pop := true.B 207 | } 208 | } 209 | 210 | } 211 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/cache/CacheType.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit.cache 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import nagicore.bus.{Ram, RamIO} 7 | 8 | object CacheMemType extends Enumeration { 9 | type CacheMemType = Value 10 | val RAM_2cyc, BRAM_1cyc, RAM_1cyc = Value 11 | } 12 | 13 | object CacheReplaceType extends Enumeration { 14 | type CacheReplaceType = Value 15 | val Random, LRU = Value 16 | } 17 | 18 | /** 19 | * CacheRAM 第二个周期返回读内容的同步RAM 20 | * 21 | * @param width 22 | * @param depth 23 | * @param imp 24 | */ 25 | class CacheMem(width: Int, depth: Int, imp: CacheMemType.CacheMemType=CacheMemType.RAM_2cyc) extends Module{ 26 | val io = IO(new RamIO(width, depth)) 27 | imp match { 28 | case _ => { 29 | val sram = Module(new Ram(width, depth)) 30 | sram.io <> io 31 | } 32 | } 33 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/cache/UnCache.scala: -------------------------------------------------------------------------------- 1 | package nagicore.unit.cache 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | import nagicore.bus._ 7 | import chisel3.util.random.LFSR 8 | import nagicore.utils.isPowerOf2 9 | import nagicore.GlobalConfg 10 | import nagicore.unit.RingBuff 11 | /** 12 | * 为uncache设计的cache(x),读写均直达 13 | * 有个写缓存队列(write buffer),每次写的时候,直接一拍写到缓存队列里面,不阻塞前面的流水线,后台调AXI4自己慢慢写去 14 | * 15 | * @note busy拉高表示请求阻塞前级流水 16 | * 17 | * @param addrBits 18 | * @param dataBits 19 | * @param writeBuffLen 写缓存队列大小 20 | */ 21 | class UnCache(addrBits:Int, dataBits: Int, writeBuffLen: Int, debug_id: Int=0) extends Module{ 22 | require(isPowerOf2(writeBuffLen)) 23 | val io = IO(new Bundle{ 24 | val axi = new AXI4IO(addrBits, dataBits) 25 | val in = Input(new Bundle { 26 | val req = Bool() 27 | val bits = new Bundle { 28 | val addr = UInt(addrBits.W) 29 | val we = Bool() 30 | val wmask = UInt((dataBits/8).W) 31 | val size = UInt(2.W) 32 | val wdata = UInt(dataBits.W) 33 | } 34 | }) 35 | val out = Output(new Bundle { 36 | val busy = Bool() 37 | val rdata = UInt(dataBits.W) 38 | }) 39 | }) 40 | class WriteInfo extends Bundle{ 41 | val addr = UInt(addrBits.W) 42 | val size = UInt(2.W) 43 | val wmask = UInt((dataBits/8).W) 44 | val wdata = UInt(dataBits.W) 45 | } 46 | val write_buff = Module(new RingBuff(()=>new WriteInfo, writeBuffLen, rchannel=1, debug_id=debug_id)) 47 | write_buff.io.push := false.B 48 | write_buff.io.pop := false.B 49 | write_buff.io.wdata := DontCare 50 | write_buff.io.clear := false.B 51 | write_buff.io.popN := 0.U 52 | 53 | val axi_w_agent = Module(new AXI4WriteAgent(addrBits, dataBits, 1)) 54 | axi_w_agent.io.axi.aw <> io.axi.aw 55 | axi_w_agent.io.axi.w <> io.axi.w 56 | axi_w_agent.io.axi.b <> io.axi.b 57 | axi_w_agent.io.cmd.in <> DontCare 58 | axi_w_agent.io.cmd.in.req := false.B 59 | 60 | val axi_r_agent = Module(new AXI4ReadAgent(addrBits, dataBits, 1)) 61 | axi_r_agent.io.axi.ar <> io.axi.ar 62 | axi_r_agent.io.axi.r <> io.axi.r 63 | axi_r_agent.io.cmd.in <> DontCare 64 | axi_r_agent.io.cmd.in.req := false.B 65 | 66 | val cmd_reg = Reg(io.in.bits.cloneType) 67 | 68 | val rdata_reg = Reg(UInt(dataBits.W)) 69 | io.out.rdata := rdata_reg 70 | 71 | 72 | object State extends ChiselEnum { 73 | val idle = Value(1.U) 74 | val waitWriteBuff = Value(2.U) 75 | val waitReadReady = Value(4.U) 76 | val waitRead = Value(8.U) 77 | } 78 | val state = RegInit(State.idle) 79 | 80 | io.out.busy := state =/= State.idle // ... 81 | 82 | val ready_read = axi_r_agent.io.cmd.out.ready&&write_buff.io.empty&&axi_w_agent.io.cmd.out.ready 83 | 84 | switch(state){ 85 | is(State.idle){ 86 | when(io.in.req){ 87 | when(io.in.bits.we){ 88 | // Write 89 | when(write_buff.io.empty && axi_w_agent.io.cmd.out.ready){ 90 | axi_w_agent.io.cmd.in.req := true.B 91 | axi_w_agent.io.cmd.in.addr := io.in.bits.addr 92 | axi_w_agent.io.cmd.in.len := 0.U 93 | axi_w_agent.io.cmd.in.size := io.in.bits.size 94 | axi_w_agent.io.cmd.in.wdata(0) := io.in.bits.wdata 95 | axi_w_agent.io.cmd.in.wmask(0) := io.in.bits.wmask 96 | }.elsewhen(write_buff.io.full){ 97 | state := State.waitWriteBuff 98 | io.out.busy := true.B 99 | 100 | cmd_reg := io.in.bits 101 | }.otherwise{ 102 | write_buff.io.push := true.B 103 | write_buff.io.wdata := io.in.bits 104 | } 105 | }.otherwise{ 106 | // Read 107 | when(ready_read){ 108 | axi_r_agent.io.cmd.in.req := true.B 109 | axi_r_agent.io.cmd.in.addr := io.in.bits.addr 110 | axi_r_agent.io.cmd.in.len := 0.U 111 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U 112 | 113 | state := State.waitRead 114 | }.otherwise{ 115 | cmd_reg := io.in.bits 116 | 117 | state := State.waitReadReady 118 | } 119 | io.out.busy := true.B 120 | } 121 | } 122 | } 123 | is(State.waitWriteBuff){ 124 | when(!write_buff.io.full){ 125 | write_buff.io.push := true.B 126 | write_buff.io.wdata := cmd_reg 127 | 128 | io.out.busy := false.B 129 | state := State.idle 130 | } 131 | } 132 | is(State.waitReadReady){ 133 | when(ready_read){ 134 | axi_r_agent.io.cmd.in.req := true.B 135 | axi_r_agent.io.cmd.in.addr := cmd_reg.addr 136 | axi_r_agent.io.cmd.in.len := 0.U 137 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U 138 | 139 | state := State.waitRead 140 | } 141 | } 142 | is(State.waitRead){ 143 | when(axi_r_agent.io.cmd.out.ready){ 144 | rdata_reg := axi_r_agent.io.cmd.out.rdata 145 | assert(axi_r_agent.io.cmd.out.resp === 0.U) 146 | state := State.idle 147 | } 148 | } 149 | } 150 | 151 | when(!write_buff.io.empty){ 152 | when(axi_w_agent.io.cmd.out.ready){ 153 | axi_w_agent.io.cmd.in.req := true.B 154 | axi_w_agent.io.cmd.in.addr := write_buff.io.rdatas(0).addr 155 | axi_w_agent.io.cmd.in.len := 0.U 156 | axi_w_agent.io.cmd.in.size := write_buff.io.rdatas(0).size 157 | axi_w_agent.io.cmd.in.wdata(0) := write_buff.io.rdatas(0).wdata 158 | axi_w_agent.io.cmd.in.wmask(0) := write_buff.io.rdatas(0).wmask 159 | 160 | write_buff.io.pop := true.B 161 | } 162 | } 163 | 164 | if(GlobalConfg.SIM){ 165 | import nagicore.unit.DPIC_PERF_CACHE 166 | val dpic_perf_cache = Module(new DPIC_PERF_CACHE) 167 | dpic_perf_cache.io.clk := clock 168 | dpic_perf_cache.io.rst := reset 169 | dpic_perf_cache.io.valid := io.in.req 170 | dpic_perf_cache.io.id := debug_id.U 171 | dpic_perf_cache.io.access_type := Cat(0.U, !io.out.busy) 172 | } 173 | 174 | } 175 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/ip/Xiangshan/ArrayMulDataModule.scala: -------------------------------------------------------------------------------- 1 | /*************************************************************************************** 2 | * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 | * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 | * 5 | * XiangShan is licensed under Mulan PSL v2. 6 | * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 | * You may obtain a copy of Mulan PSL v2 at: 8 | * http://license.coscl.org.cn/MulanPSL2 9 | * 10 | * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 | * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 | * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 | * 14 | * See the Mulan PSL v2 for more details. 15 | ***************************************************************************************/ 16 | 17 | package nagicore.unit.ip.Xiangshan 18 | 19 | import chisel3._ 20 | import chisel3.util._ 21 | import nagicore.utils.SignExt 22 | 23 | class ArrayMulDataModule(len: Int) extends Module { 24 | val io = IO(new Bundle() { 25 | val a, b = Input(UInt(len.W)) 26 | val regEnables = Input(Vec(2, Bool())) 27 | val result = Output(UInt((2 * len).W)) 28 | }) 29 | val (a, b) = (io.a, io.b) 30 | 31 | val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W)) 32 | b_sext := SignExt(b, len+1) 33 | bx2 := b_sext << 1 34 | neg_b := (~b_sext).asUInt 35 | neg_bx2 := neg_b << 1 36 | 37 | val columns: Array[Seq[Bool]] = Array.fill(2*len)(Seq()) 38 | 39 | var last_x = WireInit(0.U(3.W)) 40 | for(i <- Range(0, len, 2)){ 41 | val x = if(i==0) Cat(a(1,0), 0.U(1.W)) else if(i+1==len) SignExt(a(i, i-1), 3) else a(i+1, i-1) 42 | val pp_temp = MuxLookup(x, 0.U)(Seq( 43 | 1.U -> b_sext, 44 | 2.U -> b_sext, 45 | 3.U -> bx2, 46 | 4.U -> neg_bx2, 47 | 5.U -> neg_b, 48 | 6.U -> neg_b 49 | )) 50 | val s = pp_temp(len) 51 | val t = MuxLookup(last_x, 0.U(2.W))(Seq( 52 | 4.U -> 2.U(2.W), 53 | 5.U -> 1.U(2.W), 54 | 6.U -> 1.U(2.W) 55 | )) 56 | last_x = x 57 | val (pp, weight) = i match { 58 | case 0 => 59 | (Cat(~s, s, s, pp_temp), 0) 60 | case n if (n==len-1) || (n==len-2) => 61 | (Cat(~s, pp_temp, t), i-2) 62 | case _ => 63 | (Cat(1.U(1.W), ~s, pp_temp, t), i-2) 64 | } 65 | for(j <- columns.indices){ 66 | if(j >= weight && j < (weight + pp.getWidth)){ 67 | columns(j) = columns(j) :+ pp(j-weight) 68 | } 69 | } 70 | } 71 | 72 | def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = { 73 | var sum = Seq[Bool]() 74 | var cout1 = Seq[Bool]() 75 | var cout2 = Seq[Bool]() 76 | col.size match { 77 | case 1 => // do nothing 78 | sum = col ++ cin 79 | case 2 => 80 | val c22 = Module(new C22) 81 | c22.io.in := col 82 | sum = c22.io.out(0).asBool +: cin 83 | cout2 = Seq(c22.io.out(1).asBool) 84 | case 3 => 85 | val c32 = Module(new C32) 86 | c32.io.in := col 87 | sum = c32.io.out(0).asBool +: cin 88 | cout2 = Seq(c32.io.out(1).asBool) 89 | case 4 => 90 | val c53 = Module(new C53) 91 | for((x, y) <- c53.io.in.take(4) zip col){ 92 | x := y 93 | } 94 | c53.io.in.last := (if(cin.nonEmpty) cin.head else 0.U) 95 | sum = Seq(c53.io.out(0).asBool) ++ (if(cin.nonEmpty) cin.drop(1) else Nil) 96 | cout1 = Seq(c53.io.out(1).asBool) 97 | cout2 = Seq(c53.io.out(2).asBool) 98 | case n => 99 | val cin_1 = if(cin.nonEmpty) Seq(cin.head) else Nil 100 | val cin_2 = if(cin.nonEmpty) cin.drop(1) else Nil 101 | val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1) 102 | val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2) 103 | sum = s_1 ++ s_2 104 | cout1 = c_1_1 ++ c_2_1 105 | cout2 = c_1_2 ++ c_2_2 106 | } 107 | (sum, cout1, cout2) 108 | } 109 | 110 | def max(in: Iterable[Int]): Int = in.reduce((a, b) => if(a>b) a else b) 111 | def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = { 112 | if(max(cols.map(_.size)) <= 2){ 113 | val sum = Cat(cols.map(_(0)).reverse) 114 | var k = 0 115 | while(cols(k).size == 1) k = k+1 116 | val carry = Cat(cols.drop(k).map(_(1)).reverse) 117 | (sum, Cat(carry, 0.U(k.W))) 118 | } else { 119 | val columns_next = Array.fill(2*len)(Seq[Bool]()) 120 | var cout1, cout2 = Seq[Bool]() 121 | for( i <- cols.indices){ 122 | val (s, c1, c2) = addOneColumn(cols(i), cout1) 123 | columns_next(i) = s ++ cout2 124 | cout1 = c1 125 | cout2 = c2 126 | } 127 | 128 | val needReg = depth == 4 129 | val toNextLayer = if(needReg) 130 | columns_next.map(_.map(x => RegEnable(x, io.regEnables(1)))) 131 | else 132 | columns_next 133 | 134 | addAll(toNextLayer, depth+1) 135 | } 136 | } 137 | 138 | val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0)))) 139 | val (sum, carry) = addAll(cols = columns_reg, depth = 0) 140 | 141 | io.result := sum + carry 142 | } -------------------------------------------------------------------------------- /src/main/scala/nagicore/unit/ip/Xiangshan/CSA.scala: -------------------------------------------------------------------------------- 1 | /*************************************************************************************** 2 | * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 | * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 | * 5 | * XiangShan is licensed under Mulan PSL v2. 6 | * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 | * You may obtain a copy of Mulan PSL v2 at: 8 | * http://license.coscl.org.cn/MulanPSL2 9 | * 10 | * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 | * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 | * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 | * 14 | * See the Mulan PSL v2 for more details. 15 | ***************************************************************************************/ 16 | 17 | package nagicore.unit.ip.Xiangshan 18 | 19 | import chisel3._ 20 | import chisel3.util._ 21 | 22 | abstract class CarrySaveAdderMToN(m: Int, n: Int)(len: Int) extends Module{ 23 | val io = IO(new Bundle() { 24 | val in = Input(Vec(m, UInt(len.W))) 25 | val out = Output(Vec(n, UInt(len.W))) 26 | }) 27 | } 28 | 29 | class CSA2_2(len: Int) extends CarrySaveAdderMToN(2, 2)(len) { 30 | val temp = Wire(Vec(len, UInt(2.W))) 31 | for((t, i) <- temp.zipWithIndex){ 32 | val (a, b) = (io.in(0)(i), io.in(1)(i)) 33 | val sum = a ^ b 34 | val cout = a & b 35 | t := Cat(cout, sum) 36 | } 37 | io.out.zipWithIndex.foreach({case(x, i) => x := Cat(temp.reverse map(_(i)))}) 38 | } 39 | 40 | class CSA3_2(len: Int) extends CarrySaveAdderMToN(3, 2)(len){ 41 | val temp = Wire(Vec(len, UInt(2.W))) 42 | for((t, i) <- temp.zipWithIndex){ 43 | val (a, b, cin) = (io.in(0)(i), io.in(1)(i), io.in(2)(i)) 44 | val a_xor_b = a ^ b 45 | val a_and_b = a & b 46 | val sum = a_xor_b ^ cin 47 | val cout = a_and_b | (a_xor_b & cin) 48 | t := Cat(cout, sum) 49 | } 50 | io.out.zipWithIndex.foreach({case(x, i) => x := Cat(temp.reverse map(_(i)))}) 51 | } 52 | 53 | class CSA5_3(len: Int)extends CarrySaveAdderMToN(5, 3)(len){ 54 | val FAs = Array.fill(2)(Module(new CSA3_2(len))) 55 | FAs(0).io.in := io.in.take(3) 56 | FAs(1).io.in := VecInit(FAs(0).io.out(0), io.in(3), io.in(4)) 57 | io.out := VecInit(FAs(1).io.out(0), FAs(0).io.out(1), FAs(1).io.out(1)) 58 | } 59 | 60 | class C22 extends CSA2_2(1) 61 | class C32 extends CSA3_2(1) 62 | class C53 extends CSA5_3(1) 63 | 64 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/utils/Flags.scala: -------------------------------------------------------------------------------- 1 | package nagicore.utils 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import chisel3.util.experimental.decode._ 6 | 7 | object Flags{ 8 | def bp(x : String) : BitPat = BitPat(s"b${x}") 9 | def U(x: String):UInt = s"b$x".U 10 | def castFlags2Bitpat(x : Iterable[String]) : BitPat = BitPat(s"b${x.reduce(_ ++ _)}") 11 | def onehotMux[T <: Data](input: UInt, cases: Iterable[(String, T)]) = { 12 | // check one-hot 13 | assert(cases.map(x => x._1.count(_ == '1')==1).reduce(_ && _)) 14 | // check no duplicate 15 | assert(cases.map(x=>x._1).toSet.size == cases.size) 16 | chisel3.util.Mux1H(cases.map(x => input(x._1.length-1 - findFirstOne(x._1).get) -> x._2)) 17 | } 18 | /** 19 | * One-hot Flag Check 20 | * 21 | * @param input 22 | * @param expect 23 | * @return 24 | */ 25 | def OHis[T <: Data](input: UInt, expect: String): Bool = { 26 | // check one-hot 27 | assert(expect.count(_ == '1')==1) 28 | input(expect.length-1-findFirstOne(expect).get).asBool 29 | } 30 | def is[T <: Data](input: UInt, expect: String): Bool = { 31 | assert(input.getWidth == expect.length) 32 | input === Flags.bp(expect) 33 | } 34 | def CasesMux[T <: Data](input: UInt, cases: Iterable[(String, T)], default: T) : T = { 35 | // check no duplicate 36 | assert(cases.map(x=>x._1).toSet.size == cases.size) 37 | // chisel3.util.Mux1H(cases.map(x => (input === BitPat(s"b${x._1}")) -> x._2)) 38 | // decoder(EspressoMinimizer, input, TruthTable( 39 | // cases.map(x => bp(x._1) -> BitPat(x._2.asUInt)), 40 | // BitPat(s"b0") 41 | // )) 42 | MuxCase(default, cases.map(x => (input === BitPat(s"b${x._1}")) -> x._2).toSeq) 43 | } 44 | def ifEqu[T <: Data](input: UInt, target: String, true_res: T, false_res: T) : T = { 45 | Mux(input === BitPat(s"b${target}"), true_res, false_res) 46 | } 47 | /** 48 | * 译码器, 使用decoder进行真值表优化 49 | * 50 | * @param flag_name 控制信号名称 51 | * @param input 输入信号 52 | * @param decode_map 译码表, 格式为 (BitPat, Map[控制信号名, 控制信号值]) 53 | * @param default_map 默认译码表, 格式为 Map[控制信号名, 控制信号值] 54 | * @return 在input输入下,flag_name对应的控制信号值 55 | */ 56 | def decode_flag(flag_name: String, input: UInt, decode_map: Seq[(BitPat, Map[String, String])], default_map: Map[String, String]) = { 57 | decoder(EspressoMinimizer, input, TruthTable( 58 | decode_map.map(x=> x._1 -> BitPat(s"b${x._2.get(flag_name).get}")), 59 | BitPat(s"b${default_map.get(flag_name).get}") 60 | )) 61 | } 62 | private def findFirstOne(str: String): Option[Int] = { 63 | str.indexOf("1") match { 64 | case -1 => None 65 | case index => Some(index) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/scala/nagicore/utils/utils.scala: -------------------------------------------------------------------------------- 1 | package nagicore.utils 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | object SignExt { 7 | def apply(a: UInt, len: Int): UInt = { 8 | val aLen = a.getWidth 9 | val signBit = a(aLen-1) 10 | if (aLen >= len) a(len-1,0) else Cat(Fill(len - aLen, signBit), a) 11 | } 12 | } 13 | 14 | object isPowerOf2{ 15 | def apply(x: Int): Boolean = { 16 | x > 0 && (x & (x-1)) == 0 17 | } 18 | } 19 | --------------------------------------------------------------------------------