├── .gitignore
├── .sv.style
├── LICENSE
├── Makefile
├── README.md
├── build.sc
├── docs
├── imgs
│ ├── SoC.png
│ ├── nagi.jpeg
│ ├── nagicore-5-stages.drawio.png
│ └── nagicore-dual.dual.drawio.png
└── nagicore.drawio
├── nscscc
├── async.v
├── ram_wrapper.v
├── thinpad_top.v
└── uart_wrapper.sv
└── src
└── main
├── resources
└── sv
│ ├── DPIC_PERF_BRU.sv
│ ├── DPIC_PERF_BUFF.sv
│ ├── DPIC_PERF_CACHE.sv
│ ├── DPIC_PERF_PIPE.sv
│ ├── DPIC_RAM_1CYC.sv
│ ├── DPIC_RAM_2CYC.sv
│ ├── DPIC_TRACE_MEM.sv
│ ├── DPIC_TYPES_DEFINE.sv
│ ├── DPIC_UPDATE_GPR.sv
│ ├── DPIC_UPDATE_GPR2.sv
│ ├── DPIC_UPDATE_PC.sv
│ ├── DPIC_UPDATE_PC2.sv
│ └── axi_cdc
│ ├── axi_cdc.v
│ ├── axi_cdc_rd.v
│ └── axi_cdc_wr.v
└── scala
└── nagicore
├── Main.scala
├── bus
├── AXI4.scala
└── RAM.scala
├── loongarch
├── ISA.scala
├── nscscc2024
│ ├── Config.scala
│ ├── Core.scala
│ ├── CtrlFlags.scala
│ ├── Decoder.scala
│ └── stages
│ │ ├── EX.scala
│ │ ├── ID.scala
│ │ ├── IF.scala
│ │ ├── MEM.scala
│ │ └── PREIF.scala
└── nscscc2024Dual
│ ├── Config.scala
│ ├── Core.scala
│ ├── CtrlFlags.scala
│ ├── Decoder.scala
│ └── stages
│ ├── EX.scala
│ ├── ID.scala
│ ├── IF.scala
│ ├── IS.scala
│ ├── MEM.scala
│ └── PREIF.scala
├── unit
├── ALU.scala
├── BPU.scala
├── BRU.scala
├── DIVU.scala
├── DPIC.scala
├── GPR.scala
├── InstrsBuff.scala
├── MIAU.scala
├── MULU.scala
├── RingBuff.scala
├── cache
│ ├── Cache.scala
│ ├── CacheMini.scala
│ ├── CachePiped.scala
│ ├── CacheType.scala
│ ├── CacheWT.scala
│ └── UnCache.scala
└── ip
│ └── Xiangshan
│ ├── ArrayMulDataModule.scala
│ └── CSA.scala
└── utils
├── Flags.scala
└── utils.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | /.bloop
2 | /.metals
3 | /.scala-build
4 | /.vscode
5 | /out
6 | /diagram
7 | /.cache
8 | /build
9 | compile_commands.json
10 | *.backup
11 | /.idea
12 | *.iml
13 | # *.drawio
14 | *.dtmp
15 | *.bkp
16 | /test_run_dir
17 | /nscscc/nagicore
18 |
19 |
20 | # Xilinx Vivado
21 | *.jou
22 | *.log
--------------------------------------------------------------------------------
/.sv.style:
--------------------------------------------------------------------------------
1 | # https://chipsalliance.github.io/verible/lint.html
2 | -package-filename
3 | -enum-name-style
4 | -struct-union-name-style
5 | -line-length=150
6 | -no-tabs=false
7 | -explicit-function-lifetime=false
8 | -parameter-name-style
9 | -generate-label=false
10 | -always-comb=false
11 | -explicit-parameter-storage-type=false
12 | -no-trailing-spaces=false
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | BUILD_DIR = build
2 | SRC = $(shell find src/main/scala -name "*.scala")
3 | SRC += $(shell find src/main/resources/sv -name "*.sv" -or -name "*.v")
4 |
5 | TARGET = $(BUILD_DIR)/Core.v
6 |
7 | $(TARGET): $(SRC)
8 | -rm -rf $(BUILD_DIR)
9 | mill nagicore.run hello
10 |
11 | generate: $(TARGET)
12 |
13 | generate-nscscc: $(SRC)
14 | -rm -rf $(BUILD_DIR)
15 | mill nagicore.run NSCSCC
16 | rm -rf ./nscscc/nagicore
17 | mkdir -p ./nscscc/nagicore
18 | cp build/*.sv nscscc/nagicore
19 |
20 | test: generate
21 | xmake b diff
22 | xmake r diff
23 |
24 | wave:
25 | xmake r wave
26 |
27 | config:
28 | xmake f --menu
29 |
30 | clean:
31 | -rm -rf $(BUILD_DIR)
32 |
33 | intellij-init:
34 | mill mill.idea.GenIdea/idea
35 |
36 | .PHONY: intellij-init clean config wave
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 凪(Nagi) 顺序单/双发射LA32R处理器
2 |
3 |
4 |
5 |
6 |
7 | ## Intro
8 |
9 | 凪(**Nagi**,意为“风平浪静”)包含两个支持部分LoongArch32R指令集的处理器:
10 | - 名为**NagiCore**的顺序单发射五级流水线标量处理器
11 | - 名为**NagiDual**的顺序双发射六级流水线超标量处理器
12 |
13 | Nagi系列处理器都具有高度可配置的Cache(缓存)、BTB(分支预测)、多周期乘除法器等部件,并支持AXI4总线访问(包括突发传输),能够正确通过在仿真环境和FPGA上的所有等级测试和性能测试。
14 |
15 | Nagi系列处理器是使用Chisel进行开发的,与敏捷开发环境[NagiDev](https://github.com/MrAMS/Nagi)紧密联系,可基于Verilator进行完整仿真,同时也可生成龙芯杯所需的FPGA上板工程。
16 |
17 | Nagi系列处理器是为2024年[龙芯杯](http://www.nscscc.com/)个人赛设计的,在决赛现场设计了名为MIA的协处理器进行硬件加速,获得决赛第1名的成绩(`0.000s`),最终成绩为LoongArch指令集赛道全国第4名。
18 |
19 | ## 性能指标
20 |
21 | NagiCore性能一览
22 |
23 | | 测试程序 | 耗时(s) | 频率(MHz) | IPC | 分支预测准确率 | ICache命中率 |
24 | | ----------- | ------- | --------- | ---------- | -------------- | ------------ |
25 | | MATRIX | 0.117 | 198 | 0.40116772 | 0.994812547 | 0.999997993 |
26 | | CRYPTONIGHT | 0.214 | 198 | 0.5569556 | 0.999994914 | 0.99999922 |
27 |
28 | NagiDual性能一览
29 |
30 | | 测试程序 | 耗时(s) | 频率(MHz) | IPC | 分支预测准确率 | ICache命中率 |
31 | | ----------- | ------- | --------- | ---------- | -------------- | ------------ |
32 | | MATRIX | 0.132 | 162 | 0.43596125 | 0.994812547 | 0.999997562 |
33 | | CRYPTONIGHT | 0.230 | 162 | 0.63767296 | 0.999994914 | 0.999999003 |
34 |
35 | *注:IPC等性能指标是从计时器开始时记录的(即串口输出`0x06`后开始)*
36 |
37 | *注:仍有相当大的超频空间*
38 |
39 | ## CPU架构
40 |
41 |
42 |
43 |
44 |
45 |
46 | Nagi系列处理器的流水线采用各模块解耦的分布式控制。由于个人赛的性能测试程序(CRYPTONIGHT)对DCache极不友好,而且在高频率下访存代价非常大(采用四周期访存,因为SoC上的SRAM最高工作频率仅约50MHz),权衡之下,故均去除了DCache。
47 |
48 | NagiCore处理器采用五级流水线,包括预取指(PREIF)、取指(IF)、译码(ID)、执行(EX)、访存(MEM)五个阶段。
49 |
50 | 
51 |
52 | NagiDual 处理器采用六级流水线,包括预取指(PREIF)、取指(IF)、译码(ID)、发射(IS)、执行(EX)、访存(MEM) 六个阶段。采用简单的非对称双发射结构以追求频率。
53 |
54 | 
55 |
56 | 更详尽的设计介绍请参见大赛设计报告`design.pdf`。
57 |
58 |
--------------------------------------------------------------------------------
/build.sc:
--------------------------------------------------------------------------------
1 | // import Mill dependency
2 | import mill._
3 | import mill.define.Sources
4 | import mill.modules.Util
5 | import mill.scalalib.TestModule.ScalaTest
6 | import scalalib._
7 | // support BSP
8 | import mill.bsp._
9 |
10 | import os.Path
11 |
12 | trait base extends SbtModule { m =>
13 | override def millSourcePath = os.pwd
14 | override def scalaVersion = "2.13.12"
15 | override def scalacOptions = Seq(
16 | "-language:reflectiveCalls",
17 | "-deprecation",
18 | "-feature",
19 | "-Xcheckinit",
20 | )
21 | override def ivyDeps = Agg(
22 | ivy"org.chipsalliance::chisel:5.1.0",
23 | )
24 | override def scalacPluginIvyDeps = Agg(
25 | ivy"org.chipsalliance:::chisel-plugin:5.1.0",
26 | )
27 | // object test extends SbtModuleTests with TestModule.ScalaTest {
28 | // override def ivyDeps = m.ivyDeps() ++ Agg(
29 | // ivy"org.scalatest::scalatest::3.2.16"
30 | // )
31 | // }
32 | }
33 |
34 | object nagicore extends base
35 |
--------------------------------------------------------------------------------
/docs/imgs/SoC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/SoC.png
--------------------------------------------------------------------------------
/docs/imgs/nagi.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/nagi.jpeg
--------------------------------------------------------------------------------
/docs/imgs/nagicore-5-stages.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/nagicore-5-stages.drawio.png
--------------------------------------------------------------------------------
/docs/imgs/nagicore-dual.dual.drawio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrAMS/NagiCore/95a1d8bf26ba6a93709d5c94ec8f4eb83c6e924d/docs/imgs/nagicore-dual.dual.drawio.png
--------------------------------------------------------------------------------
/nscscc/async.v:
--------------------------------------------------------------------------------
1 | ////////////////////////////////////////////////////////
2 | // RS-232 RX and TX module
3 | // (c) fpga4fun.com & KNJN LLC - 2003 to 2016
4 |
5 | // The RS-232 settings are fixed
6 | // TX: 8-bit data, 2 stop, no-parity
7 | // RX: 8-bit data, 1 stop, no-parity (the receiver can accept more stop bits of course)
8 |
9 | //`define SIMULATION // in this mode, TX outputs one bit per clock cycle
10 | // and RX receives one bit per clock cycle (for fast simulations)
11 |
12 | ////////////////////////////////////////////////////////
13 |
14 | module async_transmitter(
15 | input wire clk,
16 | input wire TxD_start,
17 | input wire [7:0] TxD_data,
18 | output wire TxD,
19 | output wire TxD_busy
20 | );
21 |
22 | // Assert TxD_start for (at least) one clock cycle to start transmission of TxD_data
23 | // TxD_data is latched so that it doesn't have to stay valid while it is being sent
24 |
25 | parameter ClkFrequency = 25000000; // 25MHz
26 | parameter Baud = 115200;
27 |
28 | // generate
29 | // if(ClkFrequency> 1);
52 |
53 | case(TxD_state)
54 | 4'b0000: if(TxD_start) TxD_state <= 4'b0100;
55 | 4'b0100: if(BitTick) TxD_state <= 4'b1000; // start bit
56 | 4'b1000: if(BitTick) TxD_state <= 4'b1001; // bit 0
57 | 4'b1001: if(BitTick) TxD_state <= 4'b1010; // bit 1
58 | 4'b1010: if(BitTick) TxD_state <= 4'b1011; // bit 2
59 | 4'b1011: if(BitTick) TxD_state <= 4'b1100; // bit 3
60 | 4'b1100: if(BitTick) TxD_state <= 4'b1101; // bit 4
61 | 4'b1101: if(BitTick) TxD_state <= 4'b1110; // bit 5
62 | 4'b1110: if(BitTick) TxD_state <= 4'b1111; // bit 6
63 | 4'b1111: if(BitTick) TxD_state <= 4'b0010; // bit 7
64 | 4'b0010: if(BitTick) TxD_state <= 4'b0000; // stop1
65 | //4'b0011: if(BitTick) TxD_state <= 4'b0000; // stop2
66 | default: if(BitTick) TxD_state <= 4'b0000;
67 | endcase
68 | end
69 |
70 | assign TxD = (TxD_state<4) | (TxD_state[3] & TxD_shift[0]); // put together the start, data and stop bits
71 | endmodule
72 |
73 |
74 | ////////////////////////////////////////////////////////
75 | module async_receiver(
76 | input wire clk,
77 | input wire RxD,
78 | output reg RxD_data_ready,
79 | input wire RxD_clear,
80 | output reg [7:0] RxD_data // data received, valid only (for one clock cycle) when RxD_data_ready is asserted
81 | );
82 |
83 | parameter ClkFrequency = 25000000; // 25MHz
84 | parameter Baud = 115200;
85 |
86 | parameter Oversampling = 8; // needs to be a power of 2
87 | // we oversample the RxD line at a fixed rate to capture each RxD data bit at the "right" time
88 | // 8 times oversampling by default, use 16 for higher quality reception
89 |
90 | // generate
91 | // if(ClkFrequency>log2) log2=log2+1; end endfunction
136 | localparam l2o = log2(Oversampling);
137 | reg [l2o-2:0] OversamplingCnt = 0;
138 | always @(posedge clk) if(OversamplingTick) OversamplingCnt <= (RxD_state==0) ? 1'd0 : OversamplingCnt + 1'd1;
139 | wire sampleNow = OversamplingTick && (OversamplingCnt==Oversampling/2-1);
140 | `endif
141 |
142 | // now we can accumulate the RxD bits in a shift-register
143 | always @(posedge clk)
144 | case(RxD_state)
145 | 4'b0000: if(~RxD_bit) RxD_state <= `ifdef SIMULATION 4'b1000 `else 4'b0001 `endif; // start bit found?
146 | 4'b0001: if(sampleNow) RxD_state <= 4'b1000; // sync start bit to sampleNow
147 | 4'b1000: if(sampleNow) RxD_state <= 4'b1001; // bit 0
148 | 4'b1001: if(sampleNow) RxD_state <= 4'b1010; // bit 1
149 | 4'b1010: if(sampleNow) RxD_state <= 4'b1011; // bit 2
150 | 4'b1011: if(sampleNow) RxD_state <= 4'b1100; // bit 3
151 | 4'b1100: if(sampleNow) RxD_state <= 4'b1101; // bit 4
152 | 4'b1101: if(sampleNow) RxD_state <= 4'b1110; // bit 5
153 | 4'b1110: if(sampleNow) RxD_state <= 4'b1111; // bit 6
154 | 4'b1111: if(sampleNow) RxD_state <= 4'b0010; // bit 7
155 | 4'b0010: if(sampleNow) RxD_state <= 4'b0000; // stop bit
156 | default: RxD_state <= 4'b0000;
157 | endcase
158 |
159 | always @(posedge clk)
160 | if(sampleNow && RxD_state[3]) RxD_data <= {RxD_bit, RxD_data[7:1]};
161 |
162 | //reg RxD_data_error = 0;
163 | always @(posedge clk)
164 | begin
165 | if(RxD_clear)
166 | RxD_data_ready <= 0;
167 | else
168 | RxD_data_ready <= RxD_data_ready | (sampleNow && RxD_state==4'b0010 && RxD_bit); // make sure a stop bit is received
169 | //RxD_data_error <= (sampleNow && RxD_state==4'b0010 && ~RxD_bit); // error if a stop bit is not received
170 | end
171 |
172 | `ifdef SIMULATION
173 | assign RxD_idle = 0;
174 | `else
175 | reg [l2o+1:0] GapCnt = 0;
176 | always @(posedge clk) if (RxD_state!=0) GapCnt<=0; else if(OversamplingTick & ~GapCnt[log2(Oversampling)+1]) GapCnt <= GapCnt + 1'h1;
177 | assign RxD_idle = GapCnt[l2o+1];
178 | always @(posedge clk) RxD_endofpacket <= OversamplingTick & ~GapCnt[l2o+1] & &GapCnt[l2o:0];
179 | `endif
180 |
181 | endmodule
182 |
183 |
184 | ////////////////////////////////////////////////////////
185 | // dummy module used to be able to raise an assertion in Verilog
186 | module ASSERTION_ERROR();
187 | endmodule
188 |
189 |
190 | ////////////////////////////////////////////////////////
191 | module BaudTickGen(
192 | input wire clk, enable,
193 | output wire tick // generate a tick at the specified baud rate * oversampling
194 | );
195 | parameter ClkFrequency = 25000000;
196 | parameter Baud = 115200;
197 | parameter Oversampling = 1;
198 |
199 | function integer log2(input integer v); begin log2=0; while(v>>log2) log2=log2+1; end endfunction
200 | localparam AccWidth = log2(ClkFrequency/Baud)+8; // +/- 2% max timing error over a byte
201 | reg [AccWidth:0] Acc = 0;
202 | localparam ShiftLimiter = log2(Baud*Oversampling >> (31-AccWidth)); // this makes sure Inc calculation doesn't overflow
203 | localparam Inc = ((Baud*Oversampling << (AccWidth-ShiftLimiter))+(ClkFrequency>>(ShiftLimiter+1)))/(ClkFrequency>>ShiftLimiter);
204 | always @(posedge clk) if(enable) Acc <= Acc[AccWidth-1:0] + Inc[AccWidth:0]; else Acc <= Inc[AccWidth:0];
205 | assign tick = Acc[AccWidth];
206 | endmodule
207 |
208 |
209 | ////////////////////////////////////////////////////////
210 |
--------------------------------------------------------------------------------
/nscscc/ram_wrapper.v:
--------------------------------------------------------------------------------
1 | module ram_wrapper(
2 | inout wire[31:0] ram_data, //RAM数据
3 | output wire[19:0] ram_addr, //RAM地址
4 | output wire[3:0] ram_be_n, //RAM字节使能,低有效。如果不使用字节使能,请保持为0
5 | output wire ram_ce_n, //RAM片选,低有效
6 | output wire ram_oe_n, //RAM读使能,低有效
7 | output wire ram_we_n, //RAM写使能,低有效
8 |
9 | output [31:0] io_sram_dout,
10 | input [19:0] io_sram_addr,
11 | input [31:0] io_sram_din,
12 | input io_sram_en,
13 | io_sram_re,
14 | io_sram_we,
15 | input [3:0] io_sram_wmask
16 | );
17 |
18 | assign ram_addr = io_sram_addr;
19 | wire we = io_sram_en&&io_sram_we;
20 | wire re = !we; // 这样比io_sram_en&&io_sram_re时序要好点
21 | assign ram_be_n = we?~io_sram_wmask:0;
22 | assign ram_ce_n = 0;
23 | assign ram_oe_n = !re;
24 | assign ram_we_n = !we;
25 | assign ram_data = we ? io_sram_din : 32'dz;
26 | assign io_sram_dout = we ? 0 : ram_data;
27 |
28 |
29 | endmodule
--------------------------------------------------------------------------------
/nscscc/thinpad_top.v:
--------------------------------------------------------------------------------
1 | `default_nettype wire
2 |
3 | module thinpad_top(
4 | input wire clk_50M, //50MHz 时钟输入
5 | input wire clk_11M0592, //11.0592MHz 时钟输入
6 |
7 | input wire clock_btn, //BTN5手动时钟按钮开关,带消抖电路,按下时为1
8 | input wire reset_btn, //BTN6手动复位按钮开关,带消抖电路,按下时为1
9 |
10 | input wire[3:0] touch_btn, //BTN1~BTN4,按钮开关,按下时为1
11 | input wire[31:0] dip_sw, //32位拨码开关,拨到"ON"时为1
12 | output wire[15:0] leds, //16位LED,输出时1点亮
13 | output wire[7:0] dpy0, //数码管低位信号,包括小数点,输出1点亮
14 | output wire[7:0] dpy1, //数码管高位信号,包括小数点,输出1点亮
15 |
16 | //CPLD串口控制器信号
17 | output wire uart_rdn, //读串口信号,低有效
18 | output wire uart_wrn, //写串口信号,低有效
19 | input wire uart_dataready, //串口数据准备好
20 | input wire uart_tbre, //发送数据标志
21 | input wire uart_tsre, //数据发送完毕标志
22 |
23 | //BaseRAM信号
24 | inout wire[31:0] base_ram_data, //BaseRAM数据,低8位与CPLD串口控制器共享
25 | output wire[19:0] base_ram_addr, //BaseRAM地址
26 | output wire[3:0] base_ram_be_n, //BaseRAM字节使能,低有效。如果不使用字节使能,请保持为0
27 | output wire base_ram_ce_n, //BaseRAM片选,低有效
28 | output wire base_ram_oe_n, //BaseRAM读使能,低有效
29 | output wire base_ram_we_n, //BaseRAM写使能,低有效
30 |
31 | //ExtRAM信号
32 | inout wire[31:0] ext_ram_data, //ExtRAM数据
33 | output wire[19:0] ext_ram_addr, //ExtRAM地址
34 | output wire[3:0] ext_ram_be_n, //ExtRAM字节使能,低有效。如果不使用字节使能,请保持为0
35 | output wire ext_ram_ce_n, //ExtRAM片选,低有效
36 | output wire ext_ram_oe_n, //ExtRAM读使能,低有效
37 | output wire ext_ram_we_n, //ExtRAM写使能,低有效
38 |
39 | //直连串口信号
40 | output wire txd, //直连串口发送端
41 | input wire rxd, //直连串口接收端
42 |
43 | //Flash存储器信号,参考 JS28F640 芯片手册
44 | output wire [22:0]flash_a, //Flash地址,a0仅在8bit模式有效,16bit模式无意义
45 | inout wire [15:0]flash_d, //Flash数据
46 | output wire flash_rp_n, //Flash复位信号,低有效
47 | output wire flash_vpen, //Flash写保护信号,低电平时不能擦除、烧写
48 | output wire flash_ce_n, //Flash片选信号,低有效
49 | output wire flash_oe_n, //Flash读使能信号,低有效
50 | output wire flash_we_n, //Flash写使能信号,低有效
51 | output wire flash_byte_n, //Flash 8bit模式选择,低有效。在使用flash的16位模式时请设为1
52 |
53 | //图像输出信号
54 | output wire[2:0] video_red, //红色像素,3位
55 | output wire[2:0] video_green, //绿色像素,3位
56 | output wire[1:0] video_blue, //蓝色像素,2位
57 | output wire video_hsync, //行同步(水平同步)信号
58 | output wire video_vsync, //场同步(垂直同步)信号
59 | output wire video_clk, //像素时钟输出
60 | output wire video_de //行数据有效信号,用于区分消隐区
61 | );
62 |
63 | //assign leds = dip_sw[15:0];
64 |
65 | wire [31:0] io_isram_dout;
66 | wire [19:0] io_isram_addr;
67 | wire [31:0] io_isram_din;
68 | wire io_isram_en;
69 | wire io_isram_re;
70 | wire io_isram_we;
71 | wire [3:0] io_isram_wmask;
72 |
73 | ram_wrapper iwrapper(
74 | .ram_data (base_ram_data),
75 | .ram_addr (base_ram_addr),
76 | .ram_be_n (base_ram_be_n),
77 | .ram_ce_n (base_ram_ce_n),
78 | .ram_oe_n (base_ram_oe_n),
79 | .ram_we_n (base_ram_we_n),
80 |
81 | .io_sram_dout (io_isram_dout),
82 | .io_sram_addr (io_isram_addr),
83 | .io_sram_din (io_isram_din),
84 | .io_sram_en (io_isram_en),
85 | .io_sram_re (io_isram_re),
86 | .io_sram_we (io_isram_we),
87 | .io_sram_wmask (io_isram_wmask)
88 | );
89 |
90 | wire [31:0] io_dsram_dout;
91 | wire [19:0] io_dsram_addr;
92 | wire [31:0] io_dsram_din;
93 | wire io_dsram_en;
94 | wire io_dsram_we;
95 | wire io_dsram_re;
96 | wire [3:0] io_dsram_wmask;
97 |
98 | ram_wrapper dwrapper(
99 | .ram_data (ext_ram_data),
100 | .ram_addr (ext_ram_addr),
101 | .ram_be_n (ext_ram_be_n),
102 | .ram_ce_n (ext_ram_ce_n),
103 | .ram_oe_n (ext_ram_oe_n),
104 | .ram_we_n (ext_ram_we_n),
105 |
106 | .io_sram_dout (io_dsram_dout),
107 | .io_sram_addr (io_dsram_addr),
108 | .io_sram_din (io_dsram_din),
109 | .io_sram_en (io_dsram_en),
110 | .io_sram_re (io_dsram_re),
111 | .io_sram_we (io_dsram_we),
112 | .io_sram_wmask (io_dsram_wmask)
113 |
114 | );
115 |
116 | wire io_uart_ar_ready;
117 | wire [7:0] io_uart_r_id;
118 | wire [1:0] io_uart_r_resp;
119 | wire [31:0] io_uart_r_data;
120 | wire io_uart_r_last;
121 | wire io_uart_r_valid;
122 | wire io_uart_aw_ready;
123 | wire io_uart_w_ready;
124 | wire [7:0] io_uart_b_id;
125 | wire [1:0] io_uart_b_resp;
126 | wire io_uart_b_valid;
127 |
128 |
129 | wire [7:0] io_uart_ar_id;
130 | wire [31:0] io_uart_ar_addr;
131 | wire [7:0] io_uart_ar_len;
132 | wire [2:0] io_uart_ar_size;
133 | wire [1:0] io_uart_ar_burst;
134 | wire io_uart_ar_valid;
135 | wire io_uart_r_ready;
136 | wire [7:0] io_uart_aw_id;
137 | wire [31:0] io_uart_aw_addr;
138 | wire [7:0] io_uart_aw_len;
139 | wire [2:0] io_uart_aw_size;
140 | wire [1:0] io_uart_aw_burst;
141 | wire io_uart_aw_valid;
142 | wire [31:0] io_uart_w_data;
143 | wire [3:0] io_uart_w_strb;
144 | wire io_uart_w_last,
145 | io_uart_w_valid,
146 | io_uart_b_ready;
147 |
148 |
149 | wire clk_cpu;
150 | wire clk_locked;
151 | reg rst_cpu;
152 |
153 | clk_wiz_0 clk_wiz_0_inst(
154 | .reset(reset_btn),
155 | .clk_50M(clk_50M),
156 | .clk_cpu(clk_cpu),
157 | .locked(clk_locked)
158 | );
159 |
160 | always @(posedge clk_cpu or negedge clk_locked) begin
161 | if (~clk_locked) rst_cpu <= 1'b1;
162 | else rst_cpu <= 1'b0;
163 | end
164 |
165 | CoreNSCSCC core(
166 | .clock(clk_cpu),
167 | .reset(rst_cpu),
168 | .io_isram_dout(io_isram_dout),
169 | .io_dsram_dout(io_dsram_dout),
170 | .io_isram_addr(io_isram_addr),
171 | .io_isram_din(io_isram_din),
172 | .io_isram_en(io_isram_en),
173 | .io_isram_re(io_isram_re),
174 | .io_isram_we(io_isram_we),
175 | .io_isram_wmask(io_isram_wmask),
176 | .io_dsram_addr(io_dsram_addr),
177 | .io_dsram_din(io_dsram_din),
178 | .io_dsram_en(io_dsram_en),
179 | .io_dsram_re(io_dsram_re),
180 | .io_dsram_we(io_dsram_we),
181 | .io_dsram_wmask(io_dsram_wmask),
182 |
183 | .io_uart_ar_ready(io_uart_ar_ready),
184 | .io_uart_r_bits_id(io_uart_r_id),
185 | .io_uart_r_bits_resp(io_uart_r_resp),
186 | .io_uart_r_bits_data(io_uart_r_data),
187 | .io_uart_r_bits_last(io_uart_r_last),
188 | .io_uart_r_valid(io_uart_r_valid),
189 | .io_uart_aw_ready(io_uart_aw_ready),
190 | .io_uart_w_ready(io_uart_w_ready),
191 | .io_uart_b_bits_id(io_uart_b_id),
192 | .io_uart_b_bits_resp(io_uart_b_resp),
193 | .io_uart_b_valid(io_uart_b_valid),
194 |
195 | .io_uart_ar_bits_id(io_uart_ar_id),
196 | .io_uart_ar_bits_addr(io_uart_ar_addr),
197 | .io_uart_ar_bits_len(io_uart_ar_len),
198 | .io_uart_ar_bits_size(io_uart_ar_size),
199 | .io_uart_ar_bits_burst(io_uart_ar_burst),
200 | .io_uart_ar_valid(io_uart_ar_valid),
201 | .io_uart_r_ready(io_uart_r_ready),
202 | .io_uart_aw_bits_id(io_uart_aw_id),
203 | .io_uart_aw_bits_addr(io_uart_aw_addr),
204 | .io_uart_aw_bits_len(io_uart_aw_len),
205 | .io_uart_aw_bits_size(io_uart_aw_size),
206 | .io_uart_aw_bits_burst(io_uart_aw_burst),
207 | .io_uart_aw_valid(io_uart_aw_valid),
208 | .io_uart_w_bits_data(io_uart_w_data),
209 | .io_uart_w_bits_strb(io_uart_w_strb),
210 | .io_uart_w_bits_last(io_uart_w_last),
211 | .io_uart_w_valid(io_uart_w_valid),
212 | .io_uart_b_ready(io_uart_b_ready)
213 | );
214 |
215 | uart_wrapper#(
216 | .clk_freq(162000000),
217 | .uart_baud(9600)
218 | ) uart(
219 | .clk(clk_cpu),
220 | .rst(reset_btn),
221 |
222 | .txd(txd),
223 | .rxd(rxd),
224 |
225 | .io_uart_ar_ready(io_uart_ar_ready),
226 | .io_uart_r_id(io_uart_r_id),
227 | .io_uart_r_resp(io_uart_r_resp),
228 | .io_uart_r_data(io_uart_r_data),
229 | .io_uart_r_last(io_uart_r_last),
230 | .io_uart_r_valid(io_uart_r_valid),
231 | .io_uart_aw_ready(io_uart_aw_ready),
232 | .io_uart_w_ready(io_uart_w_ready),
233 | .io_uart_b_id(io_uart_b_id),
234 | .io_uart_b_resp(io_uart_b_resp),
235 | .io_uart_b_valid(io_uart_b_valid),
236 |
237 | .io_uart_ar_id(io_uart_ar_id),
238 | .io_uart_ar_addr(io_uart_ar_addr),
239 | .io_uart_ar_len(io_uart_ar_len),
240 | .io_uart_ar_size(io_uart_ar_size),
241 | .io_uart_ar_burst(io_uart_ar_burst),
242 | .io_uart_ar_valid(io_uart_ar_valid),
243 | .io_uart_r_ready(io_uart_r_ready),
244 | .io_uart_aw_id(io_uart_aw_id),
245 | .io_uart_aw_addr(io_uart_aw_addr),
246 | .io_uart_aw_len(io_uart_aw_len),
247 | .io_uart_aw_size(io_uart_aw_size),
248 | .io_uart_aw_burst(io_uart_aw_burst),
249 | .io_uart_aw_valid(io_uart_aw_valid),
250 | .io_uart_w_data(io_uart_w_data),
251 | .io_uart_w_strb(io_uart_w_strb),
252 | .io_uart_w_last(io_uart_w_last),
253 | .io_uart_w_valid(io_uart_w_valid),
254 | .io_uart_b_ready(io_uart_b_ready)
255 | );
256 |
257 | assign leds = dip_sw[15:0];
258 |
259 | endmodule
260 |
--------------------------------------------------------------------------------
/nscscc/uart_wrapper.sv:
--------------------------------------------------------------------------------
1 | module uart_wrapper # (
2 | parameter clk_freq = 50000000,
3 | parameter uart_baud = 9600
4 | )(
5 | input wire clk,
6 | input wire rst,
7 | output io_uart_ar_ready,
8 | output [7:0] io_uart_r_id,
9 | output [1:0] io_uart_r_resp,
10 | output [31:0] io_uart_r_data,
11 | output io_uart_r_last,
12 | output io_uart_r_valid,
13 | output io_uart_aw_ready,
14 | output io_uart_w_ready,
15 | output [7:0] io_uart_b_id,
16 | output [1:0] io_uart_b_resp,
17 | output io_uart_b_valid,
18 |
19 | input [7:0] io_uart_ar_id,
20 | input [31:0] io_uart_ar_addr,
21 | input [7:0] io_uart_ar_len,
22 | input [2:0] io_uart_ar_size,
23 | input [1:0] io_uart_ar_burst,
24 | input io_uart_ar_valid,
25 | input io_uart_r_ready,
26 | input [7:0] io_uart_aw_id,
27 | input [31:0] io_uart_aw_addr,
28 | input [7:0] io_uart_aw_len,
29 | input [2:0] io_uart_aw_size,
30 | input [1:0] io_uart_aw_burst,
31 | input io_uart_aw_valid,
32 | input [31:0] io_uart_w_data,
33 | input [3:0] io_uart_w_strb,
34 | input io_uart_w_last,
35 | input io_uart_w_valid,
36 | input io_uart_b_ready,
37 |
38 |
39 | output wire txd, //直连串口发送端
40 | input wire rxd //直连串口接收端
41 | );
42 |
43 | wire [7:0] ext_uart_rx;
44 | reg [7:0] ext_uart_tx;
45 | wire ext_uart_ready, ext_uart_clear, ext_uart_busy;
46 | reg ext_uart_start;
47 |
48 |
49 | reg [7:0] rid;
50 | reg [31:0] raddr;
51 | reg stater;
52 |
53 | wire ar_fire = io_uart_ar_valid && io_uart_ar_ready;
54 | wire r_fire = io_uart_r_valid && io_uart_r_ready;
55 | wire [7:0] uart_state = {6'b0, ext_uart_ready, !ext_uart_busy};
56 | // 0xBFD003F8 -> rw data
57 | // 0xBFD003FC -> state
58 | wire read_state = raddr[2];
59 | wire [7:0] rdata = read_state ? uart_state : ext_uart_rx;
60 |
61 | assign io_uart_ar_ready = !stater;
62 | assign io_uart_r_valid = (read_state ? 1 : ext_uart_ready) && stater;
63 | assign io_uart_r_data = {4{rdata}};
64 | assign io_uart_r_id = rid;
65 | assign io_uart_r_last = io_uart_r_valid;
66 | assign io_uart_r_resp = 0;
67 |
68 | always @(posedge clk) begin
69 | if (rst) begin
70 | stater <= 0;
71 | end else begin
72 | if(!stater&&ar_fire) begin
73 | rid <= io_uart_ar_id;
74 | raddr <= io_uart_ar_addr;
75 | stater <= 1;
76 | end
77 | if(stater&&r_fire) begin
78 | stater <= 0;
79 | end
80 | end
81 | end
82 |
83 |
84 | async_receiver #(.ClkFrequency(clk_freq),.Baud(uart_baud)) //接收模块,9600无检验位
85 | ext_uart_r(
86 | .clk(clk), //外部时钟信号
87 | .RxD(rxd), //外部串行信号输入
88 | .RxD_data_ready(ext_uart_ready), //数据接收到标志
89 | .RxD_clear(ext_uart_clear), //清除接收标志
90 | .RxD_data(ext_uart_rx) //接收到的一字节数据
91 | );
92 |
93 | assign ext_uart_clear = r_fire && !read_state;
94 |
95 | wire aw_fire = io_uart_aw_valid && io_uart_aw_ready;
96 | wire w_fire = io_uart_w_valid && io_uart_w_ready;
97 | wire b_fire = io_uart_b_valid && io_uart_b_ready;
98 |
99 | reg [7:0] wid;
100 | reg wb;
101 |
102 | // parameter WS_IDLE = 0;
103 | // parameter WS_W = 1;
104 | // parameter WS_B = 2;
105 |
106 | assign io_uart_aw_ready = 1;
107 | assign io_uart_w_ready = !ext_uart_busy;
108 | assign io_uart_b_id = wid;
109 | assign io_uart_b_valid = wb;
110 | assign io_uart_b_resp = 0;
111 |
112 |
113 | always @(posedge clk) begin
114 | if (rst) begin
115 | wb <= 0;
116 | end else begin
117 | if(aw_fire) begin
118 | wid <= io_uart_aw_id;
119 | end
120 | if(!wb&&w_fire) begin
121 | wb <= 1;
122 | end
123 | if(wb&&b_fire) begin
124 | wb <= 0;
125 | end
126 | end
127 | end
128 |
129 | always @(posedge clk) begin //将缓冲区ext_uart_buffer发送出去
130 | if(rst) begin
131 | ext_uart_tx <= 0;
132 | ext_uart_start <= 0;
133 | end else begin
134 | if(!ext_uart_busy&&w_fire)begin
135 | ext_uart_tx <= io_uart_w_data[7:0];
136 | ext_uart_start <= 1;
137 | end else if(ext_uart_busy) begin
138 | ext_uart_start <= 0;
139 | end
140 | end
141 | end
142 |
143 | async_transmitter #(.ClkFrequency(clk_freq),.Baud(uart_baud)) //发送模块,9600无检验位
144 | ext_uart_t(
145 | .clk(clk), //外部时钟信号
146 | .TxD(txd), //串行信号输出
147 | .TxD_busy(ext_uart_busy), //发送器忙状态指示
148 | .TxD_start(ext_uart_start), //开始发送信号
149 | .TxD_data(ext_uart_tx) //待发送的数据
150 | );
151 |
152 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_PERF_BRU.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_perf_bru(input `uint8_t fail);
3 | module DPIC_PERF_BRU #(
4 | parameter DATA_WIDTH = 32
5 | ) (
6 | input wire clk,
7 | input wire rst,
8 | input wire valid,
9 | input wire [7:0] fail
10 | );
11 |
12 | always @(posedge clk) begin
13 | if(!rst && valid) begin
14 | dpic_perf_bru(fail);
15 | end
16 | end
17 |
18 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_PERF_BUFF.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_update_instrs_buff(input `uint8_t id, input `uint8_t head, input `uint8_t tail, input `uint8_t full, input `uint8_t reload);
3 | module DPIC_PERF_BUFF #(
4 | parameter DATA_WIDTH = 32
5 | ) (
6 | input wire clk,
7 | input wire rst,
8 | input wire [7:0] id,
9 | input wire [7:0] head,
10 | input wire [7:0] tail,
11 | input wire [7:0] full,
12 | input wire [7:0] reload
13 | );
14 |
15 | always @(posedge clk) begin
16 | if(!rst) begin
17 | dpic_update_instrs_buff(id, head, tail, full, reload);
18 | end
19 | end
20 |
21 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_PERF_CACHE.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_perf_cache(input `uint8_t id, input `uint8_t access_type);
3 | module DPIC_PERF_CACHE #(
4 | parameter DATA_WIDTH = 32
5 | ) (
6 | input wire clk,
7 | input wire rst,
8 | input wire valid,
9 | input wire [7:0] id,
10 | input wire [7:0] access_type
11 |
12 | );
13 |
14 | always @(posedge clk) begin
15 | if(!rst && valid) begin
16 | dpic_perf_cache(id, access_type);
17 | end
18 | end
19 |
20 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_PERF_PIPE.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_perf_pipe(input `uint8_t id, input `uint8_t valid, input `uint8_t stall);
3 | module DPIC_PERF_PIPE #(
4 | parameter DATA_WIDTH = 32
5 | ) (
6 | input wire clk,
7 | input wire rst,
8 | input wire [7:0] id,
9 | input wire invalid,
10 | input wire stall
11 | );
12 |
13 | always @(posedge clk) begin
14 | if(!rst) begin
15 | dpic_perf_pipe(id, {7'b0, invalid}, {7'b0, stall});
16 | end
17 | end
18 |
19 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_RAM_1CYC.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | // import "DPI-C" function void dpic_bus_read(input `uint32_t addr, input `uint8_t size, output `uint32_t rdata);
3 | // import "DPI-C" function void dpic_bus_write(input `uint32_t addr, input `uint8_t wmask, input `uint32_t wdata);
4 |
5 | module DPIC_RAM_1CYC #(
6 | parameter ADDR_WIDTH = 32,
7 | parameter DATA_WIDTH = 32
8 | ) (
9 | input wire clk,
10 | input wire rst,
11 | input wire en,
12 | input wire [ADDR_WIDTH-1:0] addr,
13 | input wire re,
14 | input wire we,
15 | input wire [DATA_WIDTH/8-1:0] wmask,
16 | input wire [1:0] size,
17 | input wire [DATA_WIDTH-1:0] wdata,
18 | output reg [DATA_WIDTH-1:0] rdata
19 | );
20 |
21 | always @(*) begin
22 | rdata = 0;
23 | if (en&&!rst) begin
24 | if(we) begin
25 | dpic_bus_write({{32-ADDR_WIDTH{1'b0}}, addr}, {{8-DATA_WIDTH/8{1'b0}}, wmask}, wdata);
26 | end else if(re) begin
27 | dpic_bus_read({{32-ADDR_WIDTH{1'b0}}, addr}, {6'b0, size}, rdata);
28 | end
29 | end
30 | end
31 |
32 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_RAM_2CYC.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_bus_read(input `uint32_t addr, input `uint8_t size, output `uint32_t rdata);
3 | import "DPI-C" function void dpic_bus_write(input `uint32_t addr, input `uint8_t wmask, input `uint32_t wdata);
4 |
5 | module DPIC_RAM_2CYC #(
6 | parameter ADDR_WIDTH = 32,
7 | parameter DATA_WIDTH = 32
8 | ) (
9 | input wire clk,
10 | input wire rst,
11 | input wire en,
12 | input wire [ADDR_WIDTH-1:0] addr,
13 | input wire re,
14 | input wire we,
15 | input wire [DATA_WIDTH/8-1:0] wmask,
16 | input wire [1:0] size,
17 | input wire [DATA_WIDTH-1:0] wdata,
18 | output reg [DATA_WIDTH-1:0] rdata
19 | );
20 | wire [DATA_WIDTH-1:0] rdata_wire;
21 |
22 |
23 |
24 | always @(posedge clk) begin
25 | if (rst) begin
26 | rdata <= 0;
27 | end else begin
28 | if (en) begin
29 | if(we) begin
30 | dpic_bus_write({{32-ADDR_WIDTH{1'b0}}, addr}, {{8-DATA_WIDTH/8{1'b0}}, wmask}, wdata);
31 | end else if(re) begin
32 | dpic_bus_read({{32-ADDR_WIDTH{1'b0}}, addr}, {6'b0, size}, rdata_wire);
33 | rdata <= rdata_wire;
34 | end
35 | end
36 | end
37 | end
38 |
39 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_TRACE_MEM.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_trace_mem(input `uint32_t addr, input `uint8_t size, input `uint32_t data, input `uint8_t wmask);
3 | module DPIC_TRACE_MEM #(
4 | parameter ADDR_WIDTH = 32,
5 | parameter DATA_WIDTH = 32
6 | ) (
7 | input wire clk,
8 | input wire rst,
9 | input wire valid,
10 | input wire [ADDR_WIDTH-1:0] addr,
11 | input wire [DATA_WIDTH/8-1:0] wmask,
12 | input wire [1:0] size,
13 | input wire [ADDR_WIDTH-1:0] data
14 | );
15 |
16 | always @(posedge clk) begin
17 | if(!rst && valid) begin
18 | dpic_trace_mem({{32-ADDR_WIDTH{1'b0}}, addr}, {6'b0, size}, {{32-DATA_WIDTH{1'b0}}, data}, {{8-DATA_WIDTH/8{1'b0}}, wmask});
19 | end
20 | end
21 |
22 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_TYPES_DEFINE.sv:
--------------------------------------------------------------------------------
1 | // typedef byte uint8_t;
2 | // typedef shortint uint16_t;
3 | // typedef int uint32_t;
4 | // typedef longint uint64_t;
5 |
6 | `define uint8_t byte unsigned
7 | `define uint16_t shortint unsigned
8 | `define uint32_t int unsigned
9 | `define uint64_t longint unsigned
10 |
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_UPDATE_GPR.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_update_gpr(input `uint8_t id, input `uint32_t value);
3 |
4 | module DPIC_UPDATE_GPR #(
5 | parameter GPR_NUM = 32,
6 | parameter DATA_WIDTH = 32
7 | ) (
8 | input wire clk,
9 | input wire rst,
10 | input wire [$clog2(GPR_NUM)-1:0] id,
11 | input wire wen,
12 | input wire [DATA_WIDTH-1:0] wdata
13 | );
14 |
15 | always @(posedge clk) begin
16 | if(!rst && id!=0 && wen) begin
17 | dpic_update_gpr({{8-$clog2(GPR_NUM){1'b0}}, id}, wdata);
18 | end
19 | end
20 |
21 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_UPDATE_GPR2.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_update_gpr(input `uint8_t id, input `uint32_t value);
3 |
4 | module DPIC_UPDATE_GPR2 #(
5 | parameter GPR_NUM = 32,
6 | parameter DATA_WIDTH = 32
7 | ) (
8 | input wire clk,
9 | input wire rst,
10 | input wire [$clog2(GPR_NUM)-1:0] id1,
11 | input wire wen1,
12 | input wire [DATA_WIDTH-1:0] wdata1,
13 | input wire [$clog2(GPR_NUM)-1:0] id2,
14 | input wire wen2,
15 | input wire [DATA_WIDTH-1:0] wdata2
16 | );
17 |
18 | always @(posedge clk) begin
19 | if(!rst && id1!=0 && wen1) begin
20 | dpic_update_gpr({{8-$clog2(GPR_NUM){1'b0}}, id1}, wdata1);
21 | end
22 | if(!rst && id2!=0 && wen2) begin
23 | dpic_update_gpr({{8-$clog2(GPR_NUM){1'b0}}, id2}, wdata2);
24 | end
25 | end
26 |
27 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_UPDATE_PC.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_update_pc(input `uint32_t value);
3 | module DPIC_UPDATE_PC #(
4 | parameter DATA_WIDTH = 32
5 | ) (
6 | input wire clk,
7 | input wire rst,
8 | input wire wen,
9 | input wire [DATA_WIDTH-1:0] pc
10 | );
11 |
12 | always @(posedge clk) begin
13 | if(!rst && wen) begin
14 | dpic_update_pc(pc);
15 | end
16 | end
17 |
18 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/DPIC_UPDATE_PC2.sv:
--------------------------------------------------------------------------------
1 | `include "DPIC_TYPES_DEFINE.sv"
2 | import "DPI-C" function void dpic_update_pc2(input `uint32_t pc1, input `uint8_t valid1, input `uint32_t pc2, input `uint8_t valid2);
3 | module DPIC_UPDATE_PC2 #(
4 | parameter DATA_WIDTH = 32
5 | ) (
6 | input wire clk,
7 | input wire rst,
8 | input wire wen1,
9 | input wire [DATA_WIDTH-1:0] pc1,
10 | input wire wen2,
11 | input wire [DATA_WIDTH-1:0] pc2
12 | );
13 |
14 | always @(posedge clk) begin
15 | if(!rst) begin
16 | dpic_update_pc2(pc1, {7'b0, wen1}, pc2, {7'b0, wen2});
17 | end
18 | end
19 |
20 | endmodule
--------------------------------------------------------------------------------
/src/main/resources/sv/axi_cdc/axi_cdc.v:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Copyright (c) 2019 Alex Forencich
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
23 | */
24 |
25 | // Language: Verilog 2001
26 |
27 | `resetall
28 | `timescale 1ns / 1ps
29 | `default_nettype none
30 |
31 | /*
32 | * AXI4 clock domain crossing module
33 | */
34 | module axi_cdc #
35 | (
36 | // Width of data bus in bits
37 | parameter DATA_WIDTH = 32,
38 | // Width of address bus in bits
39 | parameter ADDR_WIDTH = 32,
40 | // Width of wstrb (width of data bus in words)
41 | parameter STRB_WIDTH = (DATA_WIDTH/8),
42 | // Width of ID
43 | parameter ID_WIDTH = 4
44 | )
45 | (
46 | /*
47 | * AXI4 slave interface
48 | */
49 | input wire s_clk,
50 | input wire s_rst,
51 | input wire [ADDR_WIDTH-1:0] s_axi_awaddr,
52 | input wire [ID_WIDTH-1:0] s_axi_awid,
53 | input wire [8-1:0] s_axi_awlen,
54 | input wire [3-1:0] s_axi_awsize,
55 | input wire [2-1:0] s_axi_awburst,
56 | input wire [2:0] s_axi_awprot,
57 | input wire s_axi_awvalid,
58 | output wire s_axi_awready,
59 | input wire [DATA_WIDTH-1:0] s_axi_wdata,
60 | input wire [STRB_WIDTH-1:0] s_axi_wstrb,
61 | input wire s_axi_wlast,
62 | input wire s_axi_wvalid,
63 | output wire s_axi_wready,
64 | output wire [ID_WIDTH-1:0] s_axi_bid,
65 | output wire [1:0] s_axi_bresp,
66 | output wire s_axi_bvalid,
67 | input wire s_axi_bready,
68 | input wire [ADDR_WIDTH-1:0] s_axi_araddr,
69 | input wire [ID_WIDTH-1:0] s_axi_arid,
70 | input wire [8-1:0] s_axi_arlen,
71 | input wire [3-1:0] s_axi_arsize,
72 | input wire [2-1:0] s_axi_arburst,
73 | input wire [2:0] s_axi_arprot,
74 | input wire s_axi_arvalid,
75 | output wire s_axi_arready,
76 | output wire [DATA_WIDTH-1:0] s_axi_rdata,
77 | output wire [ID_WIDTH-1:0] s_axi_rid,
78 | output wire s_axi_rlast,
79 | output wire [1:0] s_axi_rresp,
80 | output wire s_axi_rvalid,
81 | input wire s_axi_rready,
82 |
83 | /*
84 | * AXI4 master interface
85 | */
86 | input wire m_clk,
87 | input wire m_rst,
88 | output wire [ADDR_WIDTH-1:0] m_axi_awaddr,
89 | output wire [ID_WIDTH-1:0] m_axi_awid,
90 | output wire [8-1:0] m_axi_awlen,
91 | output wire [3-1:0] m_axi_awsize,
92 | output wire [2-1:0] m_axi_awburst,
93 | output wire [2:0] m_axi_awprot,
94 | output wire m_axi_awvalid,
95 | input wire m_axi_awready,
96 | output wire [DATA_WIDTH-1:0] m_axi_wdata,
97 | output wire [STRB_WIDTH-1:0] m_axi_wstrb,
98 | output wire m_axi_wlast,
99 | output wire m_axi_wvalid,
100 | input wire m_axi_wready,
101 | input wire [ID_WIDTH-1:0] m_axi_bid,
102 | input wire [1:0] m_axi_bresp,
103 | input wire m_axi_bvalid,
104 | output wire m_axi_bready,
105 | output wire [ADDR_WIDTH-1:0] m_axi_araddr,
106 | output wire [ID_WIDTH-1:0] m_axi_arid,
107 | output wire [8-1:0] m_axi_arlen,
108 | output wire [3-1:0] m_axi_arsize,
109 | output wire [2-1:0] m_axi_arburst,
110 | output wire [2:0] m_axi_arprot,
111 | output wire m_axi_arvalid,
112 | input wire m_axi_arready,
113 | input wire [DATA_WIDTH-1:0] m_axi_rdata,
114 | input wire [ID_WIDTH-1:0] m_axi_rid,
115 | input wire m_axi_rlast,
116 | input wire [1:0] m_axi_rresp,
117 | input wire m_axi_rvalid,
118 | output wire m_axi_rready
119 | );
120 |
121 | axi_cdc_wr #(
122 | .DATA_WIDTH(DATA_WIDTH),
123 | .ADDR_WIDTH(ADDR_WIDTH),
124 | .STRB_WIDTH(STRB_WIDTH)
125 | )
126 | axi_cdc_wr_inst (
127 | /*
128 | * AXI4 slave interface
129 | */
130 | .s_clk(s_clk),
131 | .s_rst(s_rst),
132 | .s_axi_awaddr(s_axi_awaddr),
133 | .s_axi_awid(s_axi_awid),
134 | .s_axi_awlen(s_axi_awlen),
135 | .s_axi_awsize(s_axi_awsize),
136 | .s_axi_awburst(s_axi_awburst),
137 | .s_axi_awprot(s_axi_awprot),
138 | .s_axi_awvalid(s_axi_awvalid),
139 | .s_axi_awready(s_axi_awready),
140 | .s_axi_wdata(s_axi_wdata),
141 | .s_axi_wstrb(s_axi_wstrb),
142 | .s_axi_wlast(s_axi_wlast),
143 | .s_axi_wvalid(s_axi_wvalid),
144 | .s_axi_wready(s_axi_wready),
145 | .s_axi_bid(s_axi_bid),
146 | .s_axi_bresp(s_axi_bresp),
147 | .s_axi_bvalid(s_axi_bvalid),
148 | .s_axi_bready(s_axi_bready),
149 |
150 | /*
151 | * AXI4 master interface
152 | */
153 | .m_clk(m_clk),
154 | .m_rst(m_rst),
155 | .m_axi_awaddr(m_axi_awaddr),
156 | .m_axi_awid(m_axi_awid),
157 | .m_axi_awlen(m_axi_awlen),
158 | .m_axi_awsize(m_axi_awsize),
159 | .m_axi_awburst(m_axi_awburst),
160 | .m_axi_awprot(m_axi_awprot),
161 | .m_axi_awvalid(m_axi_awvalid),
162 | .m_axi_awready(m_axi_awready),
163 | .m_axi_wdata(m_axi_wdata),
164 | .m_axi_wstrb(m_axi_wstrb),
165 | .m_axi_wlast(m_axi_wlast),
166 | .m_axi_wvalid(m_axi_wvalid),
167 | .m_axi_wready(m_axi_wready),
168 | .m_axi_bid(m_axi_bid),
169 | .m_axi_bresp(m_axi_bresp),
170 | .m_axi_bvalid(m_axi_bvalid),
171 | .m_axi_bready(m_axi_bready)
172 | );
173 |
174 | axi_cdc_rd #(
175 | .DATA_WIDTH(DATA_WIDTH),
176 | .ADDR_WIDTH(ADDR_WIDTH),
177 | .STRB_WIDTH(STRB_WIDTH)
178 | )
179 | axi_cdc_rd_inst (
180 | /*
181 | * AXI4 slave interface
182 | */
183 | .s_clk(s_clk),
184 | .s_rst(s_rst),
185 | .s_axi_araddr(s_axi_araddr),
186 | .s_axi_arid(s_axi_arid),
187 | .s_axi_arlen(s_axi_arlen),
188 | .s_axi_arsize(s_axi_arsize),
189 | .s_axi_arburst(s_axi_arburst),
190 | .s_axi_arprot(s_axi_arprot),
191 | .s_axi_arvalid(s_axi_arvalid),
192 | .s_axi_arready(s_axi_arready),
193 | .s_axi_rdata(s_axi_rdata),
194 | .s_axi_rid(s_axi_rid),
195 | .s_axi_rlast(s_axi_rlast),
196 | .s_axi_rresp(s_axi_rresp),
197 | .s_axi_rvalid(s_axi_rvalid),
198 | .s_axi_rready(s_axi_rready),
199 |
200 | /*
201 | * AXI4 master interface
202 | */
203 | .m_clk(m_clk),
204 | .m_rst(m_rst),
205 | .m_axi_araddr(m_axi_araddr),
206 | .m_axi_arid(m_axi_arid),
207 | .m_axi_arlen(m_axi_arlen),
208 | .m_axi_arsize(m_axi_arsize),
209 | .m_axi_arburst(m_axi_arburst),
210 | .m_axi_arprot(m_axi_arprot),
211 | .m_axi_arvalid(m_axi_arvalid),
212 | .m_axi_arready(m_axi_arready),
213 | .m_axi_rdata(m_axi_rdata),
214 | .m_axi_rid(m_axi_rid),
215 | .m_axi_rlast(m_axi_rlast),
216 | .m_axi_rresp(m_axi_rresp),
217 | .m_axi_rvalid(m_axi_rvalid),
218 | .m_axi_rready(m_axi_rready)
219 | );
220 |
221 | endmodule
222 |
223 | `resetall
224 |
--------------------------------------------------------------------------------
/src/main/resources/sv/axi_cdc/axi_cdc_rd.v:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Copyright (c) 2019 Alex Forencich
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
23 | */
24 |
25 | // Language: Verilog 2001
26 |
27 | `resetall
28 | `timescale 1ns / 1ps
29 | `default_nettype none
30 |
31 | /*
32 | * AXI4 lite clock domain crossing module (read)
33 | */
34 | module axi_cdc_rd #
35 | (
36 | // Width of data bus in bits
37 | parameter DATA_WIDTH = 32,
38 | // Width of address bus in bits
39 | parameter ADDR_WIDTH = 32,
40 | // Width of wstrb (width of data bus in words)
41 | parameter STRB_WIDTH = (DATA_WIDTH/8),
42 | // Width of ID
43 | parameter ID_WIDTH = 4
44 | )
45 | (
46 | /*
47 | * AXI lite slave interface
48 | */
49 | input wire s_clk,
50 | input wire s_rst,
51 | input wire [ADDR_WIDTH-1:0] s_axi_araddr,
52 |
53 | input wire [ID_WIDTH-1:0] s_axi_arid,
54 | input wire [8-1:0] s_axi_arlen,
55 | input wire [3-1:0] s_axi_arsize,
56 | input wire [2-1:0] s_axi_arburst,
57 |
58 | input wire [2:0] s_axi_arprot,
59 | input wire s_axi_arvalid,
60 | output wire s_axi_arready,
61 |
62 | output wire [DATA_WIDTH-1:0] s_axi_rdata,
63 |
64 | output wire [ID_WIDTH-1:0] s_axi_rid,
65 | output wire s_axi_rlast,
66 |
67 | output wire [1:0] s_axi_rresp,
68 | output wire s_axi_rvalid,
69 | input wire s_axi_rready,
70 |
71 | /*
72 | * AXI lite master interface
73 | */
74 | input wire m_clk,
75 | input wire m_rst,
76 | output wire [ADDR_WIDTH-1:0] m_axi_araddr,
77 |
78 | output wire [ID_WIDTH-1:0] m_axi_arid,
79 | output wire [8-1:0] m_axi_arlen,
80 | output wire [3-1:0] m_axi_arsize,
81 | output wire [2-1:0] m_axi_arburst,
82 |
83 | output wire [2:0] m_axi_arprot,
84 | output wire m_axi_arvalid,
85 | input wire m_axi_arready,
86 | input wire [DATA_WIDTH-1:0] m_axi_rdata,
87 |
88 | input wire [ID_WIDTH-1:0] m_axi_rid,
89 | input wire m_axi_rlast,
90 |
91 | input wire [1:0] m_axi_rresp,
92 | input wire m_axi_rvalid,
93 | output wire m_axi_rready
94 | );
95 |
96 | reg [1:0] s_state_reg = 2'd0;
97 | reg s_flag_reg = 1'b0;
98 | (* srl_style = "register" *)
99 | reg s_flag_sync_reg_1 = 1'b0;
100 | (* srl_style = "register" *)
101 | reg s_flag_sync_reg_2 = 1'b0;
102 |
103 | reg [1:0] m_state_reg = 2'd0;
104 | reg m_flag_reg = 1'b0;
105 | (* srl_style = "register" *)
106 | reg m_flag_sync_reg_1 = 1'b0;
107 | (* srl_style = "register" *)
108 | reg m_flag_sync_reg_2 = 1'b0;
109 |
110 | reg [ADDR_WIDTH-1:0] s_axi_araddr_reg = {ADDR_WIDTH{1'b0}};
111 |
112 | reg [ID_WIDTH-1:0] s_axi_arid_reg = {ID_WIDTH{1'b0}};
113 | reg [8-1:0] s_axi_arlen_reg = {8{1'b0}};
114 | reg [3-1:0] s_axi_arsize_reg = {3{1'b0}};
115 | reg [2-1:0] s_axi_arburst_reg = {2{1'b0}};
116 |
117 | reg [2:0] s_axi_arprot_reg = 3'd0;
118 | reg s_axi_arvalid_reg = 1'b0;
119 | reg [DATA_WIDTH-1:0] s_axi_rdata_reg = {DATA_WIDTH{1'b0}};
120 |
121 | reg [ID_WIDTH-1:0] s_axi_rid_reg = {ID_WIDTH{1'b0}};
122 | reg s_axi_rlast_reg = 1'b0;
123 |
124 | reg [1:0] s_axi_rresp_reg = 2'b00;
125 | reg s_axi_rvalid_reg = 1'b0;
126 |
127 | reg [ADDR_WIDTH-1:0] m_axi_araddr_reg = {ADDR_WIDTH{1'b0}};
128 |
129 | reg [ID_WIDTH-1:0] m_axi_arid_reg = {ID_WIDTH{1'b0}};
130 | reg [8-1:0] m_axi_arlen_reg = {8{1'b0}};
131 | reg [3-1:0] m_axi_arsize_reg = {3{1'b0}};
132 | reg [2-1:0] m_axi_arburst_reg = {2{1'b0}};
133 |
134 | reg [2:0] m_axi_arprot_reg = 3'd0;
135 | reg m_axi_arvalid_reg = 1'b0;
136 | reg [DATA_WIDTH-1:0] m_axi_rdata_reg = {DATA_WIDTH{1'b0}};
137 |
138 | reg [ID_WIDTH-1:0] m_axi_rid_reg = {ID_WIDTH{1'b0}};
139 | reg m_axi_rlast_reg = 1'b0;
140 |
141 | reg [1:0] m_axi_rresp_reg = 2'b00;
142 | reg m_axi_rvalid_reg = 1'b1;
143 |
144 | assign s_axi_arready = !s_axi_arvalid_reg && !s_axi_rvalid_reg;
145 | assign s_axi_rdata = s_axi_rdata_reg;
146 |
147 | assign s_axi_rid = s_axi_rid_reg;
148 | assign s_axi_rlast = s_axi_rlast_reg;
149 |
150 | assign s_axi_rresp = s_axi_rresp_reg;
151 | assign s_axi_rvalid = s_axi_rvalid_reg;
152 |
153 | assign m_axi_araddr = m_axi_araddr_reg;
154 |
155 | assign m_axi_arid = m_axi_arid_reg;
156 | assign m_axi_arlen = m_axi_arlen_reg;
157 | assign m_axi_arsize = m_axi_arsize_reg;
158 | assign m_axi_arburst = m_axi_arburst_reg;
159 |
160 | assign m_axi_arprot = m_axi_arprot_reg;
161 | assign m_axi_arvalid = m_axi_arvalid_reg;
162 | assign m_axi_rready = !m_axi_rvalid_reg;
163 |
164 | // slave side
165 | always @(posedge s_clk) begin
166 | s_axi_rvalid_reg <= s_axi_rvalid_reg && !s_axi_rready;
167 |
168 | if (!s_axi_arvalid_reg && !s_axi_rvalid_reg) begin
169 | s_axi_araddr_reg <= s_axi_araddr;
170 |
171 | s_axi_arid_reg <= s_axi_arid;
172 | s_axi_arlen_reg <= s_axi_arlen;
173 | s_axi_arsize_reg <= s_axi_arsize;
174 | s_axi_arburst_reg <= s_axi_arburst;
175 |
176 | s_axi_arprot_reg <= s_axi_arprot;
177 | s_axi_arvalid_reg <= s_axi_arvalid;
178 | end
179 |
180 | case (s_state_reg)
181 | 2'd0: begin
182 | if (s_axi_arvalid_reg) begin
183 | s_state_reg <= 2'd1;
184 | s_flag_reg <= 1'b1;
185 | end
186 | end
187 | 2'd1: begin
188 | if (m_flag_sync_reg_2) begin
189 | s_state_reg <= 2'd2;
190 | s_flag_reg <= 1'b0;
191 | s_axi_rdata_reg <= m_axi_rdata_reg;
192 |
193 | s_axi_rid_reg <= m_axi_rid_reg;
194 | s_axi_rlast_reg <= m_axi_rlast_reg;
195 |
196 | s_axi_rresp_reg <= m_axi_rresp_reg;
197 | s_axi_rvalid_reg <= 1'b1;
198 | end
199 | end
200 | 2'd2: begin
201 | if (!m_flag_sync_reg_2) begin
202 | s_state_reg <= 2'd0;
203 | s_axi_arvalid_reg <= 1'b0;
204 | end
205 | end
206 | endcase
207 |
208 | if (s_rst) begin
209 | s_state_reg <= 2'd0;
210 | s_flag_reg <= 1'b0;
211 | s_axi_arvalid_reg <= 1'b0;
212 | s_axi_rvalid_reg <= 1'b0;
213 | end
214 | end
215 |
216 | // synchronization
217 | always @(posedge s_clk) begin
218 | m_flag_sync_reg_1 <= m_flag_reg;
219 | m_flag_sync_reg_2 <= m_flag_sync_reg_1;
220 | end
221 |
222 | always @(posedge m_clk) begin
223 | s_flag_sync_reg_1 <= s_flag_reg;
224 | s_flag_sync_reg_2 <= s_flag_sync_reg_1;
225 | end
226 |
227 | // master side
228 | always @(posedge m_clk) begin
229 | m_axi_arvalid_reg <= m_axi_arvalid_reg && !m_axi_arready;
230 |
231 | if (!m_axi_rvalid_reg) begin
232 | m_axi_rdata_reg <= m_axi_rdata;
233 |
234 | m_axi_rid_reg <= m_axi_rid;
235 | m_axi_rlast_reg <= m_axi_rlast;
236 |
237 | m_axi_rresp_reg <= m_axi_rresp;
238 | m_axi_rvalid_reg <= m_axi_rvalid;
239 | end
240 |
241 | case (m_state_reg)
242 | 2'd0: begin
243 | if (s_flag_sync_reg_2) begin
244 | m_state_reg <= 2'd1;
245 | m_axi_araddr_reg <= s_axi_araddr_reg;
246 |
247 | m_axi_arid_reg <= s_axi_arid_reg;
248 | m_axi_arlen_reg <= s_axi_arlen_reg;
249 | m_axi_arsize_reg <= s_axi_arsize_reg;
250 | m_axi_arburst_reg <= s_axi_arburst_reg;
251 |
252 | m_axi_arprot_reg <= s_axi_arprot_reg;
253 | m_axi_arvalid_reg <= 1'b1;
254 | m_axi_rvalid_reg <= 1'b0;
255 | end
256 | end
257 | 2'd1: begin
258 | if (m_axi_rvalid_reg) begin
259 | m_flag_reg <= 1'b1;
260 | m_state_reg <= 2'd2;
261 | end
262 | end
263 | 2'd2: begin
264 | if (!s_flag_sync_reg_2) begin
265 | m_state_reg <= 2'd0;
266 | m_flag_reg <= 1'b0;
267 | end
268 | end
269 | endcase
270 |
271 | if (m_rst) begin
272 | m_state_reg <= 2'd0;
273 | m_flag_reg <= 1'b0;
274 | m_axi_arvalid_reg <= 1'b0;
275 | m_axi_rvalid_reg <= 1'b1;
276 | end
277 | end
278 |
279 | endmodule
280 |
281 | `resetall
282 |
--------------------------------------------------------------------------------
/src/main/resources/sv/axi_cdc/axi_cdc_wr.v:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Copyright (c) 2019 Alex Forencich
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
23 | */
24 |
25 | // Language: Verilog 2001
26 |
27 | `resetall
28 | `timescale 1ns / 1ps
29 | `default_nettype none
30 |
31 | /*
32 | * AXI4 clock domain crossing module (write)
33 | */
34 | module axi_cdc_wr #
35 | (
36 | // Width of data bus in bits
37 | parameter DATA_WIDTH = 32,
38 | // Width of address bus in bits
39 | parameter ADDR_WIDTH = 32,
40 | // Width of wstrb (width of data bus in words)
41 | parameter STRB_WIDTH = (DATA_WIDTH/8),
42 | // Width of ID
43 | parameter ID_WIDTH = 4
44 | )
45 | (
46 | /*
47 | * AXI slave interface
48 | */
49 | input wire s_clk,
50 | input wire s_rst,
51 | input wire [ADDR_WIDTH-1:0] s_axi_awaddr,
52 |
53 | input wire [ID_WIDTH-1:0] s_axi_awid,
54 | input wire [8-1:0] s_axi_awlen,
55 | input wire [3-1:0] s_axi_awsize,
56 | input wire [2-1:0] s_axi_awburst,
57 |
58 | input wire [2:0] s_axi_awprot,
59 | input wire s_axi_awvalid,
60 | output wire s_axi_awready,
61 |
62 | input wire [DATA_WIDTH-1:0] s_axi_wdata,
63 | input wire [STRB_WIDTH-1:0] s_axi_wstrb,
64 |
65 | input wire s_axi_wlast,
66 |
67 | input wire s_axi_wvalid,
68 | output wire s_axi_wready,
69 |
70 | output wire [ID_WIDTH-1:0] s_axi_bid,
71 |
72 | output wire [1:0] s_axi_bresp,
73 | output wire s_axi_bvalid,
74 | input wire s_axi_bready,
75 |
76 | /*
77 | * AXI master interface
78 | */
79 | input wire m_clk,
80 | input wire m_rst,
81 | output wire [ADDR_WIDTH-1:0] m_axi_awaddr,
82 |
83 | output wire [ID_WIDTH-1:0] m_axi_awid,
84 | output wire [8-1:0] m_axi_awlen,
85 | output wire [3-1:0] m_axi_awsize,
86 | output wire [2-1:0] m_axi_awburst,
87 |
88 | output wire [2:0] m_axi_awprot,
89 | output wire m_axi_awvalid,
90 | input wire m_axi_awready,
91 | output wire [DATA_WIDTH-1:0] m_axi_wdata,
92 | output wire [STRB_WIDTH-1:0] m_axi_wstrb,
93 |
94 | output wire m_axi_wlast,
95 |
96 | output wire m_axi_wvalid,
97 | input wire m_axi_wready,
98 |
99 | input wire [ID_WIDTH-1:0] m_axi_bid,
100 |
101 | input wire [1:0] m_axi_bresp,
102 | input wire m_axi_bvalid,
103 | output wire m_axi_bready
104 | );
105 |
106 | reg [1:0] s_state_reg = 2'd0;
107 | reg s_flag_reg = 1'b0;
108 | (* srl_style = "register" *)
109 | reg s_flag_sync_reg_1 = 1'b0;
110 | (* srl_style = "register" *)
111 | reg s_flag_sync_reg_2 = 1'b0;
112 |
113 | reg [1:0] m_state_reg = 2'd0;
114 | reg m_flag_reg = 1'b0;
115 | (* srl_style = "register" *)
116 | reg m_flag_sync_reg_1 = 1'b0;
117 | (* srl_style = "register" *)
118 | reg m_flag_sync_reg_2 = 1'b0;
119 |
120 | reg [ADDR_WIDTH-1:0] s_axi_awaddr_reg = {ADDR_WIDTH{1'b0}};
121 |
122 | reg [ID_WIDTH-1:0] s_axi_awid_reg = {ID_WIDTH{1'b0}};
123 | reg [8-1:0] s_axi_awlen_reg = {8{1'b0}};
124 | reg [3-1:0] s_axi_awsize_reg = {3{1'b0}};
125 | reg [2-1:0] s_axi_awburst_reg = {2{1'b0}};
126 |
127 | reg [2:0] s_axi_awprot_reg = 3'd0;
128 | reg s_axi_awvalid_reg = 1'b0;
129 | reg [DATA_WIDTH-1:0] s_axi_wdata_reg = {DATA_WIDTH{1'b0}};
130 | reg [STRB_WIDTH-1:0] s_axi_wstrb_reg = {STRB_WIDTH{1'b0}};
131 |
132 | reg s_axi_wlast_reg = 0;
133 |
134 | reg s_axi_wvalid_reg = 1'b0;
135 |
136 | reg [ID_WIDTH-1:0] s_axi_bid_reg = {ID_WIDTH{1'b0}};
137 |
138 | reg [1:0] s_axi_bresp_reg = 2'b00;
139 | reg s_axi_bvalid_reg = 1'b0;
140 |
141 | reg [ADDR_WIDTH-1:0] m_axi_awaddr_reg = {ADDR_WIDTH{1'b0}};
142 |
143 | reg [ID_WIDTH-1:0] m_axi_awid_reg = {ID_WIDTH{1'b0}};
144 | reg [8-1:0] m_axi_awlen_reg = {8{1'b0}};
145 | reg [3-1:0] m_axi_awsize_reg = {3{1'b0}};
146 | reg [2-1:0] m_axi_awburst_reg = {2{1'b0}};
147 |
148 | reg [2:0] m_axi_awprot_reg = 3'd0;
149 | reg m_axi_awvalid_reg = 1'b0;
150 | reg [DATA_WIDTH-1:0] m_axi_wdata_reg = {DATA_WIDTH{1'b0}};
151 | reg [STRB_WIDTH-1:0] m_axi_wstrb_reg = {STRB_WIDTH{1'b0}};
152 |
153 | reg m_axi_wlast_reg = 0;
154 |
155 | reg m_axi_wvalid_reg = 1'b0;
156 |
157 | reg [ID_WIDTH-1:0] m_axi_bid_reg = {ID_WIDTH{1'b0}};
158 |
159 | reg [1:0] m_axi_bresp_reg = 2'b00;
160 | reg m_axi_bvalid_reg = 1'b1;
161 |
162 | assign s_axi_awready = !s_axi_awvalid_reg && !s_axi_bvalid_reg;
163 | assign s_axi_wready = !s_axi_wvalid_reg && !s_axi_bvalid_reg;
164 |
165 | assign s_axi_bid = s_axi_bid_reg;
166 |
167 | assign s_axi_bresp = s_axi_bresp_reg;
168 | assign s_axi_bvalid = s_axi_bvalid_reg;
169 |
170 | assign m_axi_awaddr = m_axi_awaddr_reg;
171 |
172 | assign m_axi_awid = m_axi_awid_reg;
173 | assign m_axi_awlen = m_axi_awlen_reg;
174 | assign m_axi_awsize = m_axi_awsize_reg;
175 | assign m_axi_awburst = m_axi_awburst_reg;
176 |
177 | assign m_axi_awprot = m_axi_awprot_reg;
178 | assign m_axi_awvalid = m_axi_awvalid_reg;
179 | assign m_axi_wdata = m_axi_wdata_reg;
180 | assign m_axi_wstrb = m_axi_wstrb_reg;
181 |
182 | assign m_axi_wlast = m_axi_wlast_reg;
183 |
184 | assign m_axi_wvalid = m_axi_wvalid_reg;
185 | assign m_axi_bready = !m_axi_bvalid_reg;
186 |
187 | // slave side
188 | always @(posedge s_clk) begin
189 | s_axi_bvalid_reg <= s_axi_bvalid_reg && !s_axi_bready;
190 |
191 | if (!s_axi_awvalid_reg && !s_axi_bvalid_reg) begin
192 | s_axi_awaddr_reg <= s_axi_awaddr;
193 |
194 | s_axi_awid_reg <= s_axi_awid;
195 | s_axi_awlen_reg <= s_axi_awlen;
196 | s_axi_awsize_reg <= s_axi_awsize;
197 | s_axi_awburst_reg <= s_axi_awburst;
198 |
199 | s_axi_awprot_reg <= s_axi_awprot;
200 | s_axi_awvalid_reg <= s_axi_awvalid;
201 | end
202 |
203 | if (!s_axi_wvalid_reg && !s_axi_bvalid_reg) begin
204 | s_axi_wdata_reg <= s_axi_wdata;
205 | s_axi_wstrb_reg <= s_axi_wstrb;
206 |
207 | s_axi_wlast_reg <= s_axi_wlast;
208 |
209 | s_axi_wvalid_reg <= s_axi_wvalid;
210 | end
211 |
212 | case (s_state_reg)
213 | 2'd0: begin
214 | if (s_axi_awvalid_reg && s_axi_wvalid_reg) begin
215 | s_state_reg <= 2'd1;
216 | s_flag_reg <= 1'b1;
217 | end
218 | end
219 | 2'd1: begin
220 | if (m_flag_sync_reg_2) begin
221 | s_state_reg <= 2'd2;
222 | s_flag_reg <= 1'b0;
223 |
224 | s_axi_bid_reg <= m_axi_bid_reg;
225 |
226 | s_axi_bresp_reg <= m_axi_bresp_reg;
227 | s_axi_bvalid_reg <= 1'b1;
228 | end
229 | end
230 | 2'd2: begin
231 | if (!m_flag_sync_reg_2) begin
232 | s_state_reg <= 2'd0;
233 | s_axi_awvalid_reg <= 1'b0;
234 | s_axi_wvalid_reg <= 1'b0;
235 | end
236 | end
237 | endcase
238 |
239 | if (s_rst) begin
240 | s_state_reg <= 2'd0;
241 | s_flag_reg <= 1'b0;
242 | s_axi_awvalid_reg <= 1'b0;
243 | s_axi_wvalid_reg <= 1'b0;
244 | s_axi_bvalid_reg <= 1'b0;
245 | end
246 | end
247 |
248 | // synchronization
249 | always @(posedge s_clk) begin
250 | m_flag_sync_reg_1 <= m_flag_reg;
251 | m_flag_sync_reg_2 <= m_flag_sync_reg_1;
252 | end
253 |
254 | always @(posedge m_clk) begin
255 | s_flag_sync_reg_1 <= s_flag_reg;
256 | s_flag_sync_reg_2 <= s_flag_sync_reg_1;
257 | end
258 |
259 | // master side
260 | always @(posedge m_clk) begin
261 | m_axi_awvalid_reg <= m_axi_awvalid_reg && !m_axi_awready;
262 | m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wready;
263 |
264 | if (!m_axi_bvalid_reg) begin
265 | m_axi_bid_reg <= m_axi_bid;
266 |
267 | m_axi_bresp_reg <= m_axi_bresp;
268 | m_axi_bvalid_reg <= m_axi_bvalid;
269 | end
270 |
271 | case (m_state_reg)
272 | 2'd0: begin
273 | if (s_flag_sync_reg_2) begin
274 | m_state_reg <= 2'd1;
275 | m_axi_awaddr_reg <= s_axi_awaddr_reg;
276 |
277 | m_axi_awid_reg <= s_axi_awid_reg;
278 | m_axi_awlen_reg <= s_axi_awlen_reg;
279 | m_axi_awsize_reg <= s_axi_awsize_reg;
280 | m_axi_awburst_reg <= s_axi_awburst_reg;
281 |
282 | m_axi_awprot_reg <= s_axi_awprot_reg;
283 | m_axi_awvalid_reg <= 1'b1;
284 | m_axi_wdata_reg <= s_axi_wdata_reg;
285 | m_axi_wstrb_reg <= s_axi_wstrb_reg;
286 |
287 | m_axi_wlast_reg <= s_axi_wlast_reg;
288 |
289 | m_axi_wvalid_reg <= 1'b1;
290 |
291 | m_axi_bid_reg <= s_axi_bid_reg;
292 |
293 | m_axi_bvalid_reg <= 1'b0;
294 | end
295 | end
296 | 2'd1: begin
297 | if (m_axi_bvalid_reg) begin
298 | m_flag_reg <= 1'b1;
299 | m_state_reg <= 2'd2;
300 | end
301 | end
302 | 2'd2: begin
303 | if (!s_flag_sync_reg_2) begin
304 | m_state_reg <= 2'd0;
305 | m_flag_reg <= 1'b0;
306 | end
307 | end
308 | endcase
309 |
310 | if (m_rst) begin
311 | m_state_reg <= 2'd0;
312 | m_flag_reg <= 1'b0;
313 | m_axi_awvalid_reg <= 1'b0;
314 | m_axi_wvalid_reg <= 1'b0;
315 | m_axi_bvalid_reg <= 1'b1;
316 | end
317 | end
318 |
319 | endmodule
320 |
321 | `resetall
322 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/Main.scala:
--------------------------------------------------------------------------------
1 | package nagicore
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import _root_.circt.stage._
6 |
7 | object Main extends App {
8 | val target = args(0)
9 | val build_dir = "./build"
10 | println(target)
11 | def exportVerilog(core: () => chisel3.RawModule): Unit = {
12 | println("Export Verilog Started")
13 | val chiselStageOption = Seq(
14 | chisel3.stage.ChiselGeneratorAnnotation(() => core()),
15 | CIRCTTargetAnnotation(CIRCTTarget.Verilog)
16 | )
17 | val firtoolOptions = Seq(
18 | // FirtoolOption("--lowering-options=disallowLocalVariables,locationInfoStyle=wrapInAtSquareBracket,noAlwaysComb"),
19 | FirtoolOption("--lowering-options=disallowLocalVariables,locationInfoStyle=wrapInAtSquareBracket,noAlwaysComb"),
20 | // FirtoolOption("--lowering-options=disallowLocalVariables,disallowPackedArrays,locationInfoStyle=wrapInAtSquareBracket,noAlwaysComb"),
21 |
22 | FirtoolOption("--split-verilog"),
23 | FirtoolOption("-o=" + build_dir),
24 | FirtoolOption("--disable-all-randomization"),
25 | FirtoolOption("--preserve-aggregate=none"),
26 | )
27 | val executeOptions = chiselStageOption ++ firtoolOptions
28 | val executeArgs = Array("-td", build_dir)
29 | (new ChiselStage).execute(executeArgs, executeOptions)
30 | }
31 | target match {
32 | case "NSCSCC" => {
33 | GlobalConfg.SIM = false
34 | exportVerilog(() => new nagicore.loongarch.nscscc2024.CoreNSCSCC)
35 | }
36 | // case "TEST" => {
37 | // exportVerilog(() => new Module{
38 | // val io = IO(new Bundle {
39 | // val clk = Input(Clock())
40 | // })
41 | // val a = "h123".U
42 | // val xbar = Module(new nagicore.unit.ip.axi_corssbar.AXI4XBar(32, 32, List((0, nagicore.unit.ip.axi_corssbar.Axi4RW.RW)), List(("0x80000000", "0x807FFFFF"))))
43 | // xbar.io.masters <> DontCare
44 | // xbar.io.slaves <> DontCare
45 | // xbar.io.slaves(0).ar.addr := 2.U(32.W)
46 | // })
47 | // }
48 | case _ => {
49 | exportVerilog(() => new nagicore.loongarch.nscscc2024.Core)
50 | }
51 | }
52 | }
53 |
54 | object GlobalConfg{
55 | var SIM = true
56 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/bus/RAM.scala:
--------------------------------------------------------------------------------
1 | package nagicore.bus
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.unit.cache.CacheMemType.{RAM_2cyc, Value}
6 | import nagicore.GlobalConfg
7 |
8 | object RamType extends Enumeration {
9 | type RamType = Value
10 | val RAM_2CYC, RAM_1CYC, BRAM_1CYC, DPIC_2CYC, DPIC_1CYC = Value
11 | }
12 |
13 | class RamIO(width: Int, depth: Long) extends Bundle{
14 | val addr = Input(UInt(log2Up(depth).W))
15 | val din = Input(UInt(width.W))
16 | val dout = Output(UInt(width.W))
17 | val en = Input(Bool())
18 | val we = Input(Bool())
19 | val re = Input(Bool())
20 | val wmask = Input(UInt((width/8).W))
21 | }
22 |
23 | /**
24 | * RAM
25 | * @note
26 | * @param addrBits
27 | * @param dataBits
28 | * @param imp
29 | */
30 | class Ram(width: Int, depth: Long, imp: RamType.RamType=RamType.RAM_2CYC) extends Module{
31 | val io = IO(new RamIO(width, depth))
32 | val addrBits = log2Up(depth)
33 | imp match {
34 | case RamType.DPIC_2CYC => {
35 | import nagicore.unit.DPIC_RAM_2CYC
36 | val sram = Module(new DPIC_RAM_2CYC(addrBits, width))
37 | sram.io.clk := clock
38 | sram.io.rst := reset
39 | sram.io.addr := io.addr
40 | sram.io.re := io.re
41 | sram.io.we := io.we
42 | sram.io.wdata := io.din
43 | sram.io.wmask := io.wmask
44 | sram.io.size := log2Up(width/8).U
45 | sram.io.en := io.en
46 | io.dout := sram.io.rdata
47 | }
48 | case RamType.DPIC_1CYC => {
49 | import nagicore.unit.DPIC_RAM_1CYC
50 | val sram = Module(new DPIC_RAM_1CYC(addrBits, width))
51 | sram.io.clk := clock
52 | sram.io.rst := reset
53 | sram.io.addr := io.addr
54 | sram.io.wdata := io.din
55 | sram.io.re := io.re
56 | sram.io.we := io.we
57 | sram.io.wmask := io.wmask
58 | sram.io.size := log2Up(width/8).U
59 | sram.io.en := io.en
60 | io.dout := sram.io.rdata
61 | }
62 | case RamType.RAM_1CYC | RamType.BRAM_1CYC => {
63 | if(imp==RamType.BRAM_1CYC && !GlobalConfg.SIM){
64 | Predef.println(s"Xilinx BlockRAM IP blk_mem_${width}_${depth} needed")
65 | class BlockRAMIP extends BlackBox{
66 | override val desiredName = s"blk_mem_${width}_${depth}"
67 | val io = IO(new Bundle {
68 | val addra = Input(UInt(addrBits.W))
69 | val clka = Input(Clock())
70 | val dina = Input(UInt(width.W))
71 | val douta = Output(UInt(width.W))
72 | val ena = Input(Bool())
73 | val wea = Input(Bool())
74 | })
75 | }
76 | val bram = Module(new BlockRAMIP)
77 | bram.io.clka := clock
78 | bram.io.addra := io.addr
79 | bram.io.dina := io.din
80 | bram.io.wea := io.we
81 | bram.io.ena := io.en
82 | io.dout := bram.io.douta
83 | }else{
84 | if(width%8==0){
85 | val bytes = width/8
86 | val mem = Mem(depth, Vec(bytes, UInt(8.W)))
87 | when(io.en&&io.we){
88 | val wdata = VecInit.tabulate(bytes){
89 | i => io.din(8*((bytes-1-i)+1)-1, 8*(bytes-1-i))
90 | }
91 | assert(Cat(io.wmask.asBools)===io.wmask)
92 | mem.write(io.addr, wdata, io.wmask.asBools)
93 | }
94 | // WRITE_FIRST Mode
95 | io.dout := Cat(mem.read(io.addr))
96 | }else{
97 | val mem = Mem(depth, UInt(width.W))
98 | when(io.en&&io.we){
99 | mem.write(io.addr, io.din)
100 | }
101 | io.dout := mem.read(io.addr)
102 | }
103 | }
104 | }
105 | case _ => {
106 | /**
107 | * 两个周期的同步RAM
108 | * 当EN拉低时,不会写入任何数据,读数据将会保持在上一个状态;读后写时,将会继续读上一次读地址的数据
109 | * When inactive, no data is written to the RAM and the output bus remains in its previous state.
110 | * [NO_CHANGE Mode](https://docs.amd.com/r/en-US/am007-versal-memory/NO_CHANGE-Mode-DEFAULT)
111 | */
112 | val mem = Mem(depth, UInt(width.W))
113 | // val enable_read = io.en && !io.we
114 | // val rdata = mem.read(io.addr, enable_read)
115 | // io.dout := Mux(enable_read, rdata, RegEnable(rdata, enable_read))
116 | // val rdata = mem.read(io.addr, enable_read)
117 | // io.dout =
118 | // io.dout := rdata
119 | // when(io.en&&io.we){
120 | // mem.write(io.addr, io.din)
121 | // }
122 | val rdata = mem.read(RegEnable(io.addr, io.en && !io.we))
123 | io.dout := rdata
124 | when(io.en&&io.we){
125 | mem.write(io.addr, io.din)
126 | }
127 | assert(io.wmask.andR)
128 | /*
129 | val regs = Reg(Vec(1< if_part.io.preif2if
19 | if_part.io.if2id <> id_part.io.if2id
20 | id_part.io.id2ex <> ex_part.io.id2ex
21 | ex_part.io.ex2preif <> preif_part.io.ex2preif
22 | ex_part.io.ex2id <> id_part.io.ex2id
23 | ex_part.io.ex2mem <> mem_part.io.ex2mem
24 | mem_part.io.mem2id <> id_part.io.mem2id
25 | mem_part.io.stall_all := false.B
26 |
27 | val isram_ctrl = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, 1.toLong< dsram_ctrl.io.axi
34 |
35 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS))
36 | xbar_imem.io.in(0) <> if_part.io.isram
37 | xbar_imem.io.out <> isram_ctrl.io.axi
38 |
39 | val xbar_dmem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS))
40 | xbar_dmem.io.in(1) <> mia.io.mem
41 | xbar_dmem.io.out <> dsram_ctrl.io.axi
42 |
43 | val xbar_mem_stage = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List(
44 | (0x80000000L, 0x400000L, false),
45 | (0x80400000L, 0x400000L, false),
46 | (0xbfd00000L, 0x400000L, false),
47 | (0x90000000L, 0x400000L, false),
48 | )))
49 |
50 | xbar_mem_stage.io.in <> mem_part.io.dmem
51 | xbar_mem_stage.io.out(0) <> xbar_imem.io.in(1)
52 | xbar_mem_stage.io.out(1) <> xbar_dmem.io.in(0)
53 | xbar_mem_stage.io.out(2) <> uart_axi4.io.axi
54 | xbar_mem_stage.io.out(3) <> mia.io.cmd
55 |
56 | val isram = Module(new Ram(XLEN, 1.toLong< isram.io
60 | dsram_ctrl.io.sram <> dsram.io
61 | uart_axi4.io.sram <> uart.io
62 | }
63 |
64 | class CoreNSCSCC extends Module with Config{
65 | val RAM_DEPTH = 0x400000/4
66 | val io = IO(new Bundle{
67 | val isram = Flipped(new RamIO(32, RAM_DEPTH))
68 | val dsram = Flipped(new RamIO(32, RAM_DEPTH))
69 | val uart = new AXI4IO(XLEN, XLEN)
70 | })
71 |
72 | val preif_part = Module(new stages.PREIF)
73 | val if_part = Module(new stages.IF)
74 | val id_part = Module(new stages.ID)
75 | val ex_part = Module(new stages.EX)
76 | val mem_part = Module(new stages.MEM)
77 |
78 | preif_part.io.preif2if <> if_part.io.preif2if
79 | if_part.io.if2id <> id_part.io.if2id
80 | id_part.io.id2ex <> ex_part.io.id2ex
81 | ex_part.io.ex2preif <> preif_part.io.ex2preif
82 | ex_part.io.ex2id <> id_part.io.ex2id
83 | ex_part.io.ex2mem <> mem_part.io.ex2mem
84 | mem_part.io.mem2id <> id_part.io.mem2id
85 | mem_part.io.stall_all := false.B
86 |
87 | val isram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2))
88 | val dsram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2))
89 | val mia = Module(new MIAU(XLEN, XLEN, AXI4IDBITS))
90 |
91 | if_part.io.isram <> isram_axi4_wrapper.io.axi
92 |
93 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS))
94 | xbar_imem.io.in(0) <> if_part.io.isram
95 | xbar_imem.io.out <> isram_axi4_wrapper.io.axi
96 |
97 | val xbar_dmem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS))
98 | xbar_dmem.io.in(1) <> mia.io.mem
99 | xbar_dmem.io.out <> dsram_axi4_wrapper.io.axi
100 |
101 | val xbar_mem_stage = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List(
102 | (0x80000000L, 0x400000L, false),
103 | (0x80400000L, 0x400000L, false),
104 | (0xbfd00000L, 0x400000L, false),
105 | (0x90000000L, 0x400000L, false),
106 | )))
107 |
108 | xbar_mem_stage.io.in <> mem_part.io.dmem
109 | xbar_mem_stage.io.out(0) <> xbar_imem.io.in(1)
110 | xbar_mem_stage.io.out(1) <> xbar_dmem.io.in(0)
111 | xbar_mem_stage.io.out(2) <> io.uart
112 | xbar_mem_stage.io.out(3) <> mia.io.cmd
113 |
114 | isram_axi4_wrapper.io.sram <> io.isram
115 | dsram_axi4_wrapper.io.sram <> io.dsram
116 | }
117 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024/CtrlFlags.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 |
7 | object CtrlFlags{
8 | // trait FlagsEnum {
9 | // def value: String
10 | // }
11 | // object aluASel{
12 | // sealed trait T extends FlagsEnum
13 | // case object ra extends T{
14 | // def value = "01"
15 | // }
16 | // case object pc extends T{
17 | // def value = "10"
18 | // }
19 | // }
20 | object aluASel{
21 | val ra = "01"
22 | val pc = "10"
23 | def apply() = UInt(2.W)
24 | }
25 | object aluBSel{
26 | val rb = "001"
27 | val imm = "010"
28 | val num4 = "100"
29 | def apply() = UInt(3.W)
30 | }
31 | object brpcAddSel{
32 | val pc = "01"
33 | val ra_val = "10"
34 | def apply() = UInt(2.W)
35 | }
36 | object ldType{
37 | val x = "000001"
38 | val b = "000010"
39 | val h = "000100"
40 | val w = "001000"
41 | val bu = "010000"
42 | val hu = "100000"
43 | def apply() = UInt(6.W)
44 | }
45 | object stType{
46 | val x = "0001"
47 | val b = "0010"
48 | val h = "0100"
49 | val w = "1000"
50 | def apply() = UInt(4.W)
51 | }
52 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024/stages/EX.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.utils.Flags
6 | import nagicore.unit.ALU
7 | import nagicore.unit.BRU_SINGLE
8 | import nagicore.GlobalConfg
9 | import nagicore.unit.BTBUpdateIO
10 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags}
11 | import nagicore.unit.BR_TYPE
12 | import nagicore.unit.BP_TYPE
13 | import nagicore.unit.MULU_IMP
14 | import nagicore.unit.DIVU_IMP
15 |
16 | class ex2preifIO extends Bundle with Config{
17 | val bpu_update = new BTBUpdateIO(BTB_ENTRYS, XLEN)
18 | val bpu_fail = Bool()
19 | val br_real_pc = UInt(XLEN.W)
20 | }
21 |
22 | class ex2idIO extends Bundle with Config{
23 | // effective signal
24 | val bypass_rc = Output(UInt(GPR_LEN.W))
25 | val bypass_val = Output(UInt(XLEN.W))
26 | val bypass_en = Output(Bool())
27 | }
28 |
29 | class ex2memBits extends Bundle with Config{
30 | val instr = UInt(XLEN.W)
31 | val alu_out = UInt(XLEN.W)
32 | val rb_val = UInt(XLEN.W)
33 | val rc = UInt(GPR_LEN.W)
34 | val ld_type = CtrlFlags.ldType()
35 | val st_type = CtrlFlags.stType()
36 | val pc = UInt(XLEN.W)
37 |
38 | val valid = Bool()
39 | }
40 |
41 | class ex2memIO extends Bundle{
42 | val bits = Output(new ex2memBits)
43 | val stall = Input(Bool())
44 | }
45 |
46 | class EX extends Module with Config{
47 | val io = IO(new Bundle{
48 | val ex2preif = new ex2preifIO
49 | val id2ex = Flipped(new id2exIO)
50 | val ex2mem = new ex2memIO
51 | val ex2id = new ex2idIO
52 | })
53 | // stall signal from next stage
54 | val stall_nxt = io.ex2mem.stall
55 |
56 | val alu = Module(new ALU(XLEN, MULU_IMP.synthesizer_DSP, DIVU_IMP.none))
57 | val busy = alu.io.busy
58 |
59 | // accept instrs from pre stage
60 | val accp_pre = Wire(Bool())
61 | // pipeline registers
62 | val preg = RegEnable(io.id2ex.bits, accp_pre)
63 |
64 | /* kill following *valid instrs*, max 3 instrs */
65 | val kill_nxt = RegInit(0.U(3.W))
66 | // stall pre stages in force
67 | val stall_pre_counter = RegInit(0.U(2.W))
68 |
69 | val valid_instr = kill_nxt === 0.U && preg.valid && !busy && stall_pre_counter === 0.U
70 | val is_ld : Bool = valid_instr && !Flags.OHis(preg.ld_type, CtrlFlags.ldType.x)
71 | accp_pre := !(stall_nxt || busy)
72 |
73 | // must stall when ld comes immediately unlike kill
74 | io.id2ex.stall := stall_pre_counter(1) =/= 0.U || is_ld || busy || stall_nxt
75 |
76 | val bru = Module(new BRU_SINGLE(XLEN))
77 | bru.io.a := preg.ra_val
78 | bru.io.b := preg.rb_val
79 | bru.io.br_type := preg.br_type
80 |
81 | val br_pc = preg.imm + Mux(Flags.OHis(preg.brpcAdd_sel, CtrlFlags.brpcAddSel.ra_val), preg.ra_val, preg.pc)
82 |
83 | // valid_instr && bru.io.br_take
84 |
85 | val br_pred_fail = Mux(preg.bpu_out.taken, !bru.io.br_take || preg.bpu_out.target =/= br_pc,
86 | bru.io.br_take) && valid_instr
87 |
88 | io.ex2preif.bpu_fail := br_pred_fail
89 | io.ex2preif.br_real_pc := Mux(bru.io.br_take, br_pc, preg.pc+4.U)
90 |
91 | io.ex2preif.bpu_update.bp_type := RegNext(Mux(Flags.OHis(preg.br_type, BR_TYPE.ALWAYS),
92 | Flags.U(BP_TYPE.jump), Flags.U(BP_TYPE.cond)
93 | ))
94 | io.ex2preif.bpu_update.hit := RegNext(preg.bpu_out.hit)
95 | io.ex2preif.bpu_update.index := RegNext(preg.bpu_out.index)
96 | io.ex2preif.bpu_update.pc := RegNext(preg.pc)
97 | io.ex2preif.bpu_update.target := RegNext(io.ex2preif.br_real_pc)
98 | io.ex2preif.bpu_update.taken := RegNext(bru.io.br_take)
99 | io.ex2preif.bpu_update.valid := RegNext(valid_instr && !Flags.OHis(preg.br_type, BR_TYPE.NEVER))
100 |
101 | if(GlobalConfg.SIM){
102 | import nagicore.unit.DPIC_PERF_BRU
103 | import nagicore.unit.BR_TYPE
104 | val dpic_perf_bru = Module(new DPIC_PERF_BRU)
105 | dpic_perf_bru.io.clk := clock
106 | dpic_perf_bru.io.rst := reset
107 | dpic_perf_bru.io.valid := !Flags.OHis(preg.br_type, BR_TYPE.NEVER) && valid_instr
108 | dpic_perf_bru.io.fail := br_pred_fail
109 | }
110 |
111 | io.ex2mem.bits.valid := valid_instr
112 |
113 | kill_nxt := Mux(!stall_nxt && !busy && (kill_nxt === 0.U || io.id2ex.bits.valid),
114 | /* 当分支预测失败时,应该无视接下来3条有效指令(PREIF,IF,ID) */
115 | Mux(br_pred_fail, 3.U,
116 | // Mux(is_ld, 1.U,
117 | Mux(kill_nxt===0.U, 0.U,
118 | kill_nxt-1.U
119 | )
120 | // )
121 | ), kill_nxt
122 | )
123 | stall_pre_counter := Mux(!stall_nxt,
124 | /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来1个周期的指令(EX) */
125 | Mux(is_ld, 1.U,
126 | Mux(stall_pre_counter===0.U, 0.U,
127 | stall_pre_counter-1.U
128 | )
129 | ), stall_pre_counter)
130 | // stall_pre_counter := Mux(!stall_nxt,
131 | // /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来2个周期的指令(EX, DMEM) */
132 | // Mux(is_ld,
133 | // Mux(preg.ld_type === Flags.bp(CtrlFlags.ldType.w),
134 | // 1.U,
135 | // 2.U
136 | // ),
137 | // Mux(stall_pre_counter===0.U, 0.U,
138 | // stall_pre_counter-1.U
139 | // )
140 | // ), stall_pre_counter)
141 |
142 |
143 | io.ex2mem.bits.instr := preg.instr
144 |
145 | val alu_a = Flags.onehotMux(preg.aluA_sel, Seq(
146 | CtrlFlags.aluASel.ra -> preg.ra_val,
147 | CtrlFlags.aluASel.pc -> preg.pc,
148 | ))
149 | val alu_b = Flags.onehotMux(preg.aluB_sel, Seq(
150 | CtrlFlags.aluBSel.rb -> preg.rb_val,
151 | CtrlFlags.aluBSel.imm -> preg.imm,
152 | CtrlFlags.aluBSel.num4 -> 4.U,
153 | ))
154 |
155 | // must assert for only one cycle
156 | // alu.io.valid := kill_nxt === 0.U && preg.valid && RegNext(accp_pre)
157 | alu.io.valid := kill_nxt === 0.U && stall_pre_counter === 0.U && preg.valid && RegNext(accp_pre)
158 | alu.io.a := alu_a
159 | alu.io.b := alu_b
160 | alu.io.op := preg.alu_op
161 | io.ex2mem.bits.alu_out := alu.io.out
162 |
163 | io.ex2mem.bits.rb_val := preg.rb_val
164 |
165 | io.ex2mem.bits.rc := preg.rc
166 |
167 | io.ex2mem.bits.ld_type := preg.ld_type
168 |
169 | io.ex2mem.bits.st_type := preg.st_type
170 |
171 | io.ex2mem.bits.pc := preg.pc
172 |
173 | io.ex2id.bypass_rc := preg.rc
174 | io.ex2id.bypass_val := alu.io.out
175 | io.ex2id.bypass_en := valid_instr
176 |
177 | if(GlobalConfg.SIM){
178 | import nagicore.unit.DPIC_PERF_PIPE
179 | val perf_pipe_ex = Module(new DPIC_PERF_PIPE())
180 | perf_pipe_ex.io.clk := clock
181 | perf_pipe_ex.io.rst := reset
182 | perf_pipe_ex.io.id := 1.U
183 | perf_pipe_ex.io.invalid := !io.ex2mem.bits.valid
184 | perf_pipe_ex.io.stall := io.id2ex.stall
185 | }
186 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024/stages/ID.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.unit.GPR
6 | import nagicore.unit.ALU_OP
7 | import nagicore.unit.BR_TYPE
8 | import nagicore.unit.BTBPredOutIO
9 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags, Decoder}
10 | import nagicore.GlobalConfg
11 |
12 |
13 | class id2exBits extends Bundle with Config{
14 | val instr = UInt(XLEN.W)
15 | val pc = UInt(XLEN.W)
16 | val ra_val = UInt(XLEN.W)
17 | val aluA_sel = CtrlFlags.aluASel()
18 | val rb_val = UInt(XLEN.W)
19 | val aluB_sel = CtrlFlags.aluBSel()
20 | val alu_op = ALU_OP()
21 | val rc = UInt(GPR_LEN.W)
22 | val imm = UInt(XLEN.W)
23 | val br_type = BR_TYPE()
24 | val brpcAdd_sel = CtrlFlags.brpcAddSel()
25 | val ld_type = CtrlFlags.ldType()
26 | val st_type = CtrlFlags.stType()
27 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
28 |
29 | val valid = Bool()
30 | }
31 |
32 | class id2exIO extends Bundle{
33 | val bits = Output(new id2exBits)
34 | val stall = Input(Bool())
35 | }
36 |
37 | class ID extends Module with Config{
38 | val io = IO(new Bundle{
39 | val if2id = Flipped(new if2idIO)
40 | val id2ex = new id2exIO
41 |
42 | val ex2id = Flipped(new ex2idIO)
43 | val mem2id = Flipped(new mem2idIO)
44 | })
45 |
46 | // pipeline registers
47 | val preg = RegEnable(io.if2id.bits, !io.id2ex.stall)
48 |
49 | io.id2ex.bits.valid := preg.valid
50 | io.if2id.stall := io.id2ex.stall
51 |
52 | val decoder = Module(new Decoder(XLEN, GPR_LEN))
53 | decoder.io.instr := preg.instr
54 |
55 | io.id2ex.bits.instr := preg.instr
56 | io.id2ex.bits.pc := preg.pc
57 |
58 | val gpr = Module(new GPR(XLEN, GPR_NUM, 2, 1))
59 | gpr.io.wen(0) := io.mem2id.gpr_wen
60 | gpr.io.waddr(0) := io.mem2id.gpr_wid
61 | gpr.io.wdata(0) := io.mem2id.gpr_wdata
62 |
63 | if(GlobalConfg.SIM){
64 | import nagicore.unit.DPIC_UPDATE_GPR
65 | val dpic_update_gpr = Module(new DPIC_UPDATE_GPR(XLEN, GPR_NUM))
66 | dpic_update_gpr.io.clk := clock
67 | dpic_update_gpr.io.rst := reset
68 | dpic_update_gpr.io.id := gpr.io.waddr(0)
69 | dpic_update_gpr.io.wen := gpr.io.wen(0)
70 | dpic_update_gpr.io.wdata := gpr.io.wdata(0)
71 | }
72 |
73 | def bypass_unit(rx: UInt, gpr_rdata: UInt):UInt = {
74 | Mux(rx === 0.U, 0.U,
75 | Mux(io.ex2id.bypass_rc === rx && io.ex2id.bypass_en, io.ex2id.bypass_val,
76 | Mux(io.mem2id.bypass_rc === rx && io.mem2id.bypass_en, io.mem2id.bypass_val,
77 | gpr_rdata
78 | )
79 | )
80 | )
81 | }
82 |
83 | val ra = decoder.io.ra
84 | gpr.io.raddr(0) := ra
85 | // bypass
86 | io.id2ex.bits.ra_val := bypass_unit(ra, gpr.io.rdata(0))
87 | io.id2ex.bits.aluA_sel := decoder.io.aluA_sel
88 |
89 | val rb = decoder.io.rb
90 | gpr.io.raddr(1) := rb
91 | // bypass
92 | io.id2ex.bits.rb_val := bypass_unit(rb, gpr.io.rdata(1))
93 |
94 | io.id2ex.bits.aluB_sel := decoder.io.aluB_sel
95 |
96 | io.id2ex.bits.alu_op := decoder.io.alu_op
97 |
98 | io.id2ex.bits.rc := decoder.io.rc
99 |
100 | io.id2ex.bits.imm := decoder.io.imm
101 |
102 | io.id2ex.bits.br_type := decoder.io.br_type
103 |
104 | io.id2ex.bits.brpcAdd_sel := decoder.io.brpcAdd_sel
105 |
106 | io.id2ex.bits.ld_type := decoder.io.ld_type
107 |
108 | io.id2ex.bits.st_type := decoder.io.st_type
109 |
110 | io.id2ex.bits.bpu_out := preg.bpu_out
111 |
112 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024/stages/IF.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.bus.AXI4IO
6 | //import nagicore.unit.{InstrsBuff, InstrsBuffCacheBundle}
7 | import nagicore.unit.cache.Cache
8 | import nagicore.GlobalConfg
9 | import nagicore.unit.cache.CacheReplaceType
10 | import nagicore.unit.BTBPredOutIO
11 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags}
12 | import nagicore.bus.RamType
13 |
14 |
15 | class if2idBits extends Bundle with Config{
16 | val pc = UInt(XLEN.W)
17 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
18 | val instr = UInt(XLEN.W)
19 |
20 | val valid = Bool()
21 | }
22 |
23 | class if2idIO extends Bundle{
24 | val bits = Output(new if2idBits)
25 | val stall = Input(Bool())
26 | }
27 |
28 | class IF extends Module with Config{
29 | val io = IO(new Bundle {
30 | val preif2if = Flipped(new preif2ifIO)
31 | val if2id = new if2idIO
32 | val isram = new AXI4IO(XLEN, XLEN)
33 | })
34 | // 2-stages 1cyc cache
35 | val icache = Module(new Cache(XLEN, XLEN, ICACHE_WAYS, ICACHE_LINES, ICACHE_WORDS, () => new preif2ifBits(), CacheReplaceType.LRU,
36 | dataRamType = RamType.RAM_1CYC,
37 | tagVRamType = RamType.RAM_1CYC,
38 | debug_id = 0))
39 | icache.io.axi <> io.isram
40 |
41 | icache.io.master.front.bits.addr := io.preif2if.bits.pc
42 | icache.io.master.front.bits.size := 2.U
43 | icache.io.master.front.bits.uncache := false.B
44 | icache.io.master.front.bits.wmask := 0.U
45 | icache.io.master.front.bits.valid := io.preif2if.bits.valid
46 | icache.io.master.front.bits.wdata := DontCare
47 | icache.io.master.front.bits.pipedata := io.preif2if.bits
48 | icache.io.master.back.stall := io.if2id.stall
49 |
50 |
51 | io.if2id.bits.instr := icache.io.master.back.bits.rdata
52 | io.if2id.bits.valid := icache.io.master.back.bits.valid
53 | io.if2id.bits.pc := icache.io.master.back.bits.pipedata.pc
54 | io.if2id.bits.bpu_out := icache.io.master.back.bits.pipedata.bpu_out
55 |
56 | io.preif2if.stall := icache.io.master.front.stall
57 |
58 | if(GlobalConfg.SIM){
59 | import nagicore.unit.DPIC_PERF_PIPE
60 | val perf_pipe_if = Module(new DPIC_PERF_PIPE())
61 | perf_pipe_if.io.clk := clock
62 | perf_pipe_if.io.rst := reset
63 | perf_pipe_if.io.id := 0.U
64 | perf_pipe_if.io.invalid := !io.if2id.bits.valid
65 | perf_pipe_if.io.stall := io.preif2if.stall
66 | }
67 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024/stages/MEM.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.bus.AXI4IO
6 | import nagicore.unit.cache.CacheMini
7 | import nagicore.utils.Flags
8 | import nagicore.GlobalConfg
9 | import nagicore.unit.cache.CacheReplaceType
10 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags}
11 | import nagicore.unit.cache.UnCache
12 |
13 | class mem2idIO extends Bundle with Config{
14 | // effective signal
15 | val bypass_rc = Output(UInt(GPR_LEN.W))
16 | val bypass_val = Output(UInt(XLEN.W))
17 | val bypass_en = Output(Bool())
18 |
19 | val gpr_wid = Output(UInt(GPR_LEN.W))
20 | val gpr_wdata = Output(UInt(XLEN.W))
21 | val gpr_wen = Output(Bool())
22 | }
23 |
24 | class MEM extends Module with Config{
25 | val io = IO(new Bundle {
26 | val ex2mem = Flipped(new ex2memIO())
27 | val mem2id = new mem2idIO()
28 | val dmem = new AXI4IO(XLEN, XLEN)
29 | val stall_all = Input(Bool())
30 | })
31 |
32 | class dcachePipeT extends Bundle {
33 | val instr = UInt(XLEN.W)
34 | val alu_out = UInt(XLEN.W)
35 | val rc = UInt(GPR_LEN.W)
36 | val ld_type = CtrlFlags.ldType()
37 | val pc = UInt(XLEN.W)
38 | val no_ldst = Bool()
39 |
40 | val valid = Bool()
41 | }
42 |
43 | // val dcache = Module(new CacheMini(XLEN, XLEN, 8, 8, 1))
44 | val dcache = Module(new UnCache(XLEN, XLEN, WBUFF_LEN, 1))
45 |
46 | // pipeline registers
47 | val preg = RegEnable(io.ex2mem.bits, !dcache.io.out.busy && !io.stall_all)
48 | io.ex2mem.stall := dcache.io.out.busy || io.stall_all
49 |
50 | dcache.io.axi <> io.dmem
51 |
52 | val addr = preg.alu_out
53 |
54 | dcache.io.in.bits.addr := addr
55 | // dcache.io.in.bits.uncache := addr(31, 28) === "hb".U
56 | dcache.io.in.bits.we := !Flags.OHis(preg.st_type, CtrlFlags.stType.x)
57 | dcache.io.in.bits.wdata := Flags.onehotMux(preg.st_type, Seq(
58 | CtrlFlags.stType.x -> 0.U,
59 | CtrlFlags.stType.b -> Fill(XLEN/8, preg.rb_val(7, 0)),
60 | CtrlFlags.stType.h -> Fill(XLEN/16, preg.rb_val(15, 0)),
61 | CtrlFlags.stType.w -> preg.rb_val(31, 0),
62 | ))
63 | dcache.io.in.bits.size := Flags.onehotMux(preg.st_type, Seq(
64 | CtrlFlags.stType.x -> 0.U,
65 | CtrlFlags.stType.b -> 0.U,
66 | CtrlFlags.stType.h -> 1.U,
67 | CtrlFlags.stType.w -> 2.U,
68 | )) | Flags.onehotMux(preg.ld_type, Seq(
69 | CtrlFlags.ldType.x -> 0.U,
70 | CtrlFlags.ldType.b -> 0.U,
71 | CtrlFlags.ldType.bu -> 0.U,
72 | CtrlFlags.ldType.h -> 1.U,
73 | CtrlFlags.ldType.hu -> 1.U,
74 | CtrlFlags.ldType.w -> 2.U,
75 | ))
76 | dcache.io.in.bits.wmask := Flags.onehotMux(preg.st_type, Seq(
77 | CtrlFlags.stType.x -> 0.U,
78 | CtrlFlags.stType.b -> ("b1".U< ("b11".U<<(addr(1)##0.U(1.W))),
80 | CtrlFlags.stType.w -> "b1111".U,
81 | ))
82 | // 不走Cache的指令
83 | val nolr = Flags.OHis(preg.ld_type, CtrlFlags.ldType.x) && Flags.OHis(preg.st_type, CtrlFlags.stType.x)
84 | dcache.io.in.req := preg.valid && !nolr && RegNext(!dcache.io.out.busy) && !io.stall_all
85 |
86 | val rdata_raw = dcache.io.out.rdata
87 | val wordData = if(XLEN == 64) Mux(addr(2), rdata_raw(63, 32), rdata_raw(31, 0))
88 | else rdata_raw(31, 0)
89 | val halfData = Mux(addr(1), wordData(31, 16), wordData(15, 0))
90 | val byteData = Mux(addr(0), halfData(15, 8), halfData(7, 0))
91 |
92 | val rdata_mem = Flags.onehotMux(preg.ld_type, Seq(
93 | CtrlFlags.ldType.x -> (0.U).zext,
94 | CtrlFlags.ldType.b -> byteData.asSInt,
95 | CtrlFlags.ldType.bu -> byteData.zext,
96 | CtrlFlags.ldType.h -> halfData.asSInt,
97 | CtrlFlags.ldType.hu -> halfData.zext,
98 | CtrlFlags.ldType.w -> wordData.zext,
99 | )).asUInt
100 |
101 | val mem_valid = preg.valid && !dcache.io.out.busy
102 |
103 | io.mem2id.bypass_rc := preg.rc
104 | io.mem2id.bypass_en := mem_valid
105 | val wb_data = Mux(Flags.OHis(preg.ld_type, CtrlFlags.ldType.x), preg.alu_out, rdata_mem)
106 | io.mem2id.bypass_val := wb_data
107 |
108 | // when(nolr){
109 | // io.mem2id.bypass_rc := Mux(preg.valid, preg.rc, 0.U)
110 | // io.mem2id.bypass_val := preg.alu_out
111 | // }.otherwise{
112 | // io.mem2id.bypass_rc := Mux(preg.valid && preg.ld_type === Flags.bp(CtrlFlags.ldType.w), preg.rc, 0.U)
113 | // io.mem2id.bypass_val := dcache.io.out.rdata
114 | // }
115 |
116 | io.mem2id.gpr_wid := preg.rc
117 | io.mem2id.gpr_wdata := wb_data
118 | io.mem2id.gpr_wen := mem_valid
119 |
120 | if(GlobalConfg.SIM){
121 | import nagicore.unit.DPIC_TRACE_MEM
122 | val dpic_trace_mem_w = Module(new DPIC_TRACE_MEM(XLEN, XLEN))
123 | dpic_trace_mem_w.io.clk := clock
124 | dpic_trace_mem_w.io.rst := reset
125 | dpic_trace_mem_w.io.valid := dcache.io.in.req && dcache.io.in.bits.wmask.orR
126 | dpic_trace_mem_w.io.addr := dcache.io.in.bits.addr
127 | dpic_trace_mem_w.io.size := dcache.io.in.bits.size
128 | dpic_trace_mem_w.io.data := dcache.io.in.bits.wdata
129 | dpic_trace_mem_w.io.wmask := dcache.io.in.bits.wmask
130 |
131 | import nagicore.unit.DPIC_PERF_PIPE
132 | val perf_pipe_dcache = Module(new DPIC_PERF_PIPE())
133 | perf_pipe_dcache.io.clk := clock
134 | perf_pipe_dcache.io.rst := reset
135 | perf_pipe_dcache.io.id := 2.U
136 | perf_pipe_dcache.io.invalid := !mem_valid
137 | perf_pipe_dcache.io.stall := io.ex2mem.stall
138 |
139 | import nagicore.unit.DPIC_UPDATE_PC
140 | val dpic_update_pc = Module(new DPIC_UPDATE_PC(XLEN))
141 | dpic_update_pc.io.clk := clock
142 | dpic_update_pc.io.rst := reset
143 | dpic_update_pc.io.pc := preg.pc
144 | dpic_update_pc.io.wen := mem_valid
145 |
146 | import nagicore.unit.DPIC_TRACE_MEM
147 | val dpic_trace_mem_r = Module(new DPIC_TRACE_MEM(XLEN, XLEN))
148 | dpic_trace_mem_r.io.clk := clock
149 | dpic_trace_mem_r.io.rst := reset
150 | dpic_trace_mem_r.io.valid := mem_valid && preg.ld_type =/= Flags.bp(CtrlFlags.ldType.x)
151 | dpic_trace_mem_r.io.addr := preg.alu_out
152 | dpic_trace_mem_r.io.size := Flags.onehotMux(preg.ld_type, Seq(
153 | CtrlFlags.ldType.x -> 0.U,
154 | CtrlFlags.ldType.b -> 0.U,
155 | CtrlFlags.ldType.bu -> 0.U,
156 | CtrlFlags.ldType.h -> 1.U,
157 | CtrlFlags.ldType.hu -> 1.U,
158 | CtrlFlags.ldType.w -> 2.U,
159 | ))
160 | dpic_trace_mem_r.io.data := rdata_mem
161 | dpic_trace_mem_r.io.wmask := 0.U
162 |
163 |
164 | }
165 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024/stages/PREIF.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.unit.BTB
6 | import nagicore.unit.BTBPredOutIO
7 | import nagicore.loongarch.nscscc2024.{Config, CtrlFlags}
8 | import nagicore.GlobalConfg
9 |
10 |
11 | class preif2ifBits extends Bundle with Config{
12 | val pc = UInt(XLEN.W)
13 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
14 |
15 | val valid = Bool()
16 | }
17 |
18 | class preif2ifIO extends Bundle{
19 | val bits = Output(new preif2ifBits)
20 | val stall = Input(Bool())
21 | }
22 |
23 | class PREIF extends Module with Config{
24 | val io = IO(new Bundle {
25 | val preif2if = new preif2ifIO
26 | val ex2preif = Flipped(new ex2preifIO)
27 | })
28 |
29 | val nxt_pc = Wire(UInt(XLEN.W))
30 | if(GlobalConfg.SIM){
31 | dontTouch(nxt_pc)
32 | }
33 | val pc = RegEnable(nxt_pc, PC_START, !io.preif2if.stall)
34 | val pc4 = pc+4.U
35 | // 当流水线阻塞但分支预测又失败的时候,需要先暂存,等阻塞解除后再修改PC,不能直接覆盖,否则会少一个周期的气泡
36 | val bpu_fail_when_stall = RegInit(false.B)
37 | val bpu_fail_pc_when_stall = Reg(UInt(XLEN.W))
38 | when(io.preif2if.stall && io.ex2preif.bpu_fail){
39 | bpu_fail_when_stall := true.B
40 | bpu_fail_pc_when_stall := io.ex2preif.br_real_pc
41 | }
42 | when(!io.preif2if.stall){
43 | bpu_fail_when_stall := false.B
44 | }
45 |
46 | // val bpu_fail = RegEnable(io.ex2preif.bpu_fail || bpu_fail_when_stall, true.B, !io.preif2if.stall)
47 |
48 | val bpu = Module(new BTB(BTB_ENTRYS, XLEN, XLEN/2))
49 | bpu.io.pred.in.pc := pc
50 | bpu.io.update := io.ex2preif.bpu_update
51 |
52 | nxt_pc := Mux(bpu_fail_when_stall, bpu_fail_pc_when_stall,
53 | Mux(io.ex2preif.bpu_fail, io.ex2preif.br_real_pc,
54 | Mux(bpu.io.pred.out.taken, bpu.io.pred.out.target,
55 | pc4
56 | )
57 | )
58 | )
59 | io.preif2if.bits.pc := pc
60 | io.preif2if.bits.bpu_out := bpu.io.pred.out
61 | io.preif2if.bits.valid := !reset.asBool
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/Config.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | trait Config{
7 | def XLEN = 32
8 | def GPR_NUM = 32
9 | def GPR_LEN = log2Up(GPR_NUM)
10 |
11 | def PC_START = "h80000000".U(XLEN.W)
12 |
13 | def ICACHE_WAYS = 2
14 | def ICACHE_LINES = 128
15 | def ICACHE_WORDS = 4
16 |
17 | def WBUFF_LEN = 8
18 |
19 | def BTB_ENTRYS = 8
20 |
21 | def AXI4IDBITS = 4
22 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/Core.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.bus.{AXI4SRAM, AXI4IO, Ram, RamType, RamIO}
6 | import nagicore.bus.{AXI4XBar1toN, AXI4XBarNto1, AXI4SRAM_MultiCycs}
7 |
8 | class Core extends Module with Config{
9 | val io = IO(new Bundle{})
10 |
11 | val preif_stage = Module(new stages.PREIF)
12 | val if_stage = Module(new stages.IF)
13 | val id_stage = Module(new stages.ID)
14 | val is_stage = Module(new stages.IS)
15 | val ex_stage = Module(new stages.EX)
16 | val mem_stage = Module(new stages.MEM)
17 |
18 | preif_stage.io.preif2if <> if_stage.io.preif2if
19 | if_stage.io.if2id <> id_stage.io.if2id
20 | id_stage.io.id2is <> is_stage.io.id2is
21 | is_stage.io.is2ex <> ex_stage.io.is2ex
22 | ex_stage.io.ex2preif <> preif_stage.io.ex2preif
23 | ex_stage.io.ex2is <> is_stage.io.ex2is
24 | ex_stage.io.ex2mem <> mem_stage.io.ex2mem
25 | mem_stage.io.mem2is <> is_stage.io.mem2is
26 | mem_stage.io.stall_all := false.B
27 |
28 | val isram_ctrl = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, 1.toLong< dsram_ctrl.io.axi
34 |
35 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS))
36 | xbar_imem.io.in(0) <> if_stage.io.isram
37 | xbar_imem.io.out <> isram_ctrl.io.axi
38 |
39 | val xbar_dmem = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List(
40 | (0x80000000L, 0x400000L, false),
41 | (0x80400000L, 0x400000L, false),
42 | (0xbfd00000L, 0x400000L, false),
43 | )))
44 |
45 | xbar_dmem.io.in <> mem_stage.io.dmem
46 | xbar_dmem.io.out(0) <> xbar_imem.io.in(1)
47 | xbar_dmem.io.out(1) <> dsram_ctrl.io.axi
48 | xbar_dmem.io.out(2) <> uart_axi4.io.axi
49 |
50 | val isram = Module(new Ram(XLEN, 1.toLong< isram.io
54 | dsram_ctrl.io.sram <> dsram.io
55 | uart_axi4.io.sram <> uart.io
56 | }
57 |
58 | class CoreNSCSCC extends Module with Config{
59 | val RAM_DEPTH = 0x400000/4
60 | val io = IO(new Bundle{
61 | val isram = Flipped(new RamIO(32, RAM_DEPTH))
62 | val dsram = Flipped(new RamIO(32, RAM_DEPTH))
63 | val uart = new AXI4IO(XLEN, XLEN)
64 | })
65 |
66 | val preif_stage = Module(new stages.PREIF)
67 | val if_stage = Module(new stages.IF)
68 | val id_stage = Module(new stages.ID)
69 | val is_stage = Module(new stages.IS)
70 | val ex_stage = Module(new stages.EX)
71 | val mem_stage = Module(new stages.MEM)
72 |
73 | preif_stage.io.preif2if <> if_stage.io.preif2if
74 | if_stage.io.if2id <> id_stage.io.if2id
75 | id_stage.io.id2is <> is_stage.io.id2is
76 | is_stage.io.is2ex <> ex_stage.io.is2ex
77 | ex_stage.io.ex2preif <> preif_stage.io.ex2preif
78 | ex_stage.io.ex2is <> is_stage.io.ex2is
79 | ex_stage.io.ex2mem <> mem_stage.io.ex2mem
80 | mem_stage.io.mem2is <> is_stage.io.mem2is
81 | mem_stage.io.stall_all := false.B
82 |
83 | val isram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2))
84 | val dsram_axi4_wrapper = Module(new AXI4SRAM_MultiCycs(XLEN, XLEN, 8, RAM_DEPTH, 32, 3, 2))
85 |
86 | if_stage.io.isram <> isram_axi4_wrapper.io.axi
87 |
88 | val xbar_imem = Module(new AXI4XBarNto1(2, XLEN, XLEN, AXI4IDBITS))
89 | xbar_imem.io.in(0) <> if_stage.io.isram
90 | xbar_imem.io.out <> isram_axi4_wrapper.io.axi
91 |
92 | val xbar_dmem = Module(new AXI4XBar1toN(XLEN, XLEN, AXI4IDBITS, List(
93 | (0x80000000L, 0x400000L, false),
94 | (0x80400000L, 0x400000L, false),
95 | (0xbfd00000L, 0x400000L, false),
96 | )))
97 |
98 | xbar_dmem.io.in <> mem_stage.io.dmem
99 | xbar_dmem.io.out(0) <> xbar_imem.io.in(1)
100 | xbar_dmem.io.out(1) <> dsram_axi4_wrapper.io.axi
101 | xbar_dmem.io.out(2) <> io.uart
102 |
103 | isram_axi4_wrapper.io.sram <> io.isram
104 | dsram_axi4_wrapper.io.sram <> io.dsram
105 | }
106 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/CtrlFlags.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 |
7 | object CtrlFlags{
8 | // trait FlagsEnum {
9 | // def value: String
10 | // }
11 | // object aluASel{
12 | // sealed trait T extends FlagsEnum
13 | // case object ra extends T{
14 | // def value = "01"
15 | // }
16 | // case object pc extends T{
17 | // def value = "10"
18 | // }
19 | // }
20 | object aluASel{
21 | val ra = "01"
22 | val pc = "10"
23 | def apply() = UInt(2.W)
24 | }
25 | object aluBSel{
26 | val rb = "001"
27 | val imm = "010"
28 | val num4 = "100"
29 | def apply() = UInt(3.W)
30 | }
31 | object brpcAddSel{
32 | val pc = "01"
33 | val ra_val = "10"
34 | def apply() = UInt(2.W)
35 | }
36 | object ldType{
37 | val x = "000001"
38 | val b = "000010"
39 | val h = "000100"
40 | val w = "001000"
41 | val bu = "010000"
42 | val hu = "100000"
43 | def apply() = UInt(6.W)
44 | }
45 | object stType{
46 | val x = "0001"
47 | val b = "0010"
48 | val h = "0100"
49 | val w = "1000"
50 | def apply() = UInt(4.W)
51 | }
52 | object InstrType{
53 | val alu = "00"
54 | val alu2 = "01"
55 | val ls = "10"
56 | val br = "11"
57 | def apply() = UInt(alu.length.W)
58 | }
59 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/EX.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.utils.Flags
6 | import nagicore.unit.ALU
7 | import nagicore.unit.BRU_SINGLE
8 | import nagicore.GlobalConfg
9 | import nagicore.unit.BTBUpdateIO
10 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags}
11 | import nagicore.unit.BR_TYPE
12 | import nagicore.unit.BP_TYPE
13 | import nagicore.unit.MULU_IMP
14 | import nagicore.unit.DIVU_IMP
15 |
16 | class ex2preifIO extends Bundle with Config{
17 | val bpu_update = new BTBUpdateIO(BTB_ENTRYS, XLEN)
18 | val bpu_fail = Bool()
19 | val br_real_pc = UInt(XLEN.W)
20 | }
21 |
22 | class ex2isIO extends Bundle with Config{
23 | // effective signal
24 | val bypass_rc1 = Output(UInt(GPR_LEN.W))
25 | val bypass_val1 = Output(UInt(XLEN.W))
26 | val bypass_en1 = Output(Bool())
27 |
28 | val bypass_rc2 = Output(UInt(GPR_LEN.W))
29 | val bypass_val2 = Output(UInt(XLEN.W))
30 | val bypass_en2 = Output(Bool())
31 |
32 | val clear_is = Output(Bool())
33 | }
34 |
35 | class ex2memBits extends Bundle with Config{
36 | val instr1 = UInt(XLEN.W)
37 | val instr2 = UInt(XLEN.W)
38 |
39 | val rc1 = UInt(GPR_LEN.W)
40 | val alu1_out = UInt(XLEN.W)
41 | val rc2 = UInt(GPR_LEN.W)
42 | val alu2_out = UInt(XLEN.W)
43 |
44 | val rb1_val = UInt(XLEN.W)
45 |
46 | val ld_type = CtrlFlags.ldType()
47 | val st_type = CtrlFlags.stType()
48 |
49 | val pc1 = UInt(XLEN.W)
50 | val pc2 = UInt(XLEN.W)
51 |
52 | val valid1 = Bool()
53 | val valid2 = Bool()
54 | }
55 |
56 | class ex2memIO extends Bundle{
57 | val bits = Output(new ex2memBits)
58 | val stall = Input(Bool())
59 | }
60 |
61 | class EX extends Module with Config{
62 | val io = IO(new Bundle{
63 | val ex2preif = new ex2preifIO
64 | val is2ex = Flipped(new is2exIO)
65 | val ex2mem = new ex2memIO
66 | val ex2is = new ex2isIO
67 | })
68 | // stall signal from next stage
69 | val stall_nxt = io.ex2mem.stall
70 |
71 | val alu1 = Module(new ALU(XLEN, MULU_IMP.synthesizer_DSP, DIVU_IMP.none))
72 | val alu2 = Module(new ALU(XLEN, MULU_IMP.none, DIVU_IMP.none))
73 | val busy = alu1.io.busy
74 |
75 | // accept instrs from pre stage
76 | val ready_nxt = Wire(Bool())
77 | // pipeline registers
78 | val preg = RegEnable(io.is2ex.bits, ready_nxt)
79 |
80 |
81 | // 分支预测失败后,等待新的指令
82 | val wait_refill = RegInit(false.B)
83 | val br_killed1 = wait_refill && !preg.pc_refill1
84 | val br_killed2 = wait_refill && !preg.pc_refill2
85 |
86 | // stall pre stages in force
87 | val stall_pre_counter = RegInit(0.U(2.W))
88 |
89 | val valid_instr1 = !br_killed1 && preg.valid1 && !busy && stall_pre_counter === 0.U
90 | val valid_instr1_once = valid_instr1 && !stall_nxt
91 |
92 |
93 | val is_ld : Bool = valid_instr1 && !Flags.OHis(preg.ld_type, CtrlFlags.ldType.x)
94 | ready_nxt := !(stall_nxt || busy)
95 |
96 | // must stall when ld comes immediately unlike kill
97 | io.is2ex.stall := stall_pre_counter(1) =/= 0.U || is_ld || busy || stall_nxt
98 |
99 | val bru = Module(new BRU_SINGLE(XLEN))
100 | bru.io.a := preg.ra1_val
101 | bru.io.b := preg.rb1_val
102 | bru.io.br_type := preg.br_type
103 |
104 | val br_pc = preg.imm1 + Mux(Flags.OHis(preg.brpcAdd_sel, CtrlFlags.brpcAddSel.ra_val), preg.ra1_val, preg.pc1)
105 |
106 | // valid_instr && bru.io.br_take
107 |
108 | val br_pred_fail = Mux(preg.bpu_out.taken, !bru.io.br_take || preg.bpu_out.target =/= br_pc,
109 | bru.io.br_take) && valid_instr1_once
110 |
111 | io.ex2preif.bpu_fail := br_pred_fail
112 | io.ex2preif.br_real_pc := Mux(bru.io.br_take, br_pc, preg.pc1+4.U)
113 |
114 | // 时序优化,延迟一拍
115 | io.ex2is.clear_is := RegNext(br_pred_fail)
116 |
117 | io.ex2preif.bpu_update.bp_type := RegNext(Mux(Flags.OHis(preg.br_type, BR_TYPE.ALWAYS),
118 | Flags.U(BP_TYPE.jump), Flags.U(BP_TYPE.cond)
119 | ))
120 | io.ex2preif.bpu_update.hit := RegNext(preg.bpu_out.hit)
121 | io.ex2preif.bpu_update.index := RegNext(preg.bpu_out.index)
122 | io.ex2preif.bpu_update.pc := RegNext(preg.pc1)
123 | io.ex2preif.bpu_update.target := RegNext(io.ex2preif.br_real_pc)
124 | io.ex2preif.bpu_update.taken := RegNext(bru.io.br_take)
125 | io.ex2preif.bpu_update.valid := RegNext(valid_instr1 && !Flags.OHis(preg.br_type, BR_TYPE.NEVER))
126 |
127 | val valid_instr2 = !br_killed2 && preg.valid2 && !br_pred_fail && !busy && stall_pre_counter === 0.U
128 | val valid_instr2_once = valid_instr2 && !stall_nxt
129 |
130 | if(GlobalConfg.SIM){
131 | import nagicore.unit.DPIC_PERF_BRU
132 | import nagicore.unit.BR_TYPE
133 | val dpic_perf_bru = Module(new DPIC_PERF_BRU)
134 | dpic_perf_bru.io.clk := clock
135 | dpic_perf_bru.io.rst := reset
136 | dpic_perf_bru.io.valid := !Flags.OHis(preg.br_type, BR_TYPE.NEVER) && valid_instr1
137 | dpic_perf_bru.io.fail := br_pred_fail
138 | }
139 |
140 | io.ex2mem.bits.valid1 := valid_instr1
141 | io.ex2mem.bits.valid2 := valid_instr2
142 |
143 | io.ex2mem.bits.pc1 := preg.pc1
144 | io.ex2mem.bits.pc2 := preg.pc2
145 |
146 | when(br_pred_fail){
147 | wait_refill := true.B
148 | }.elsewhen((preg.pc_refill1 && preg.valid1) || (preg.pc_refill2 && preg.valid2)){
149 | wait_refill := false.B
150 | }
151 |
152 | stall_pre_counter := Mux(!stall_nxt,
153 | /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来1个周期的指令(EX) */
154 | Mux(is_ld, 1.U,
155 | Mux(stall_pre_counter===0.U, 0.U,
156 | stall_pre_counter-1.U
157 | )
158 | ), stall_pre_counter)
159 | // stall_pre_counter := Mux(!stall_nxt,
160 | // /* 当遇到加载指令时,应该请求上一级阻塞1个周期,并且无视接下来2个周期的指令(EX, DMEM) */
161 | // Mux(is_ld,
162 | // Mux(preg.ld_type === Flags.bp(CtrlFlags.ldType.w),
163 | // 1.U,
164 | // 2.U
165 | // ),
166 | // Mux(stall_pre_counter===0.U, 0.U,
167 | // stall_pre_counter-1.U
168 | // )
169 | // ), stall_pre_counter)
170 |
171 |
172 | io.ex2mem.bits.instr1 := preg.instr1
173 | io.ex2mem.bits.instr2 := preg.instr2
174 |
175 | val alu1_a = Flags.onehotMux(preg.alu1A_sel, Seq(
176 | CtrlFlags.aluASel.ra -> preg.ra1_val,
177 | CtrlFlags.aluASel.pc -> preg.pc1,
178 | ))
179 | val alu1_b = Flags.onehotMux(preg.alu1B_sel, Seq(
180 | CtrlFlags.aluBSel.rb -> preg.rb1_val,
181 | CtrlFlags.aluBSel.imm -> preg.imm1,
182 | CtrlFlags.aluBSel.num4 -> 4.U,
183 | ))
184 | val alu2_a = Flags.onehotMux(preg.alu2A_sel, Seq(
185 | CtrlFlags.aluASel.ra -> preg.ra2_val,
186 | CtrlFlags.aluASel.pc -> preg.pc2,
187 | ))
188 | val alu2_b = Flags.onehotMux(preg.alu2B_sel, Seq(
189 | CtrlFlags.aluBSel.rb -> preg.rb2_val,
190 | CtrlFlags.aluBSel.imm -> preg.imm2,
191 | CtrlFlags.aluBSel.num4 -> 4.U,
192 | ))
193 |
194 | // must assert for only one cycle
195 | // alu.io.valid := kill_nxt === 0.U && preg.valid && RegNext(accp_pre)
196 | alu1.io.valid := !br_killed1 && stall_pre_counter === 0.U && preg.valid1 && RegNext(ready_nxt)
197 | alu1.io.a := alu1_a
198 | alu1.io.b := alu1_b
199 | alu1.io.op := preg.alu1_op
200 |
201 | alu2.io.valid := preg.valid2
202 | alu2.io.a := alu2_a
203 | alu2.io.b := alu2_b
204 | alu2.io.op := preg.alu2_op
205 |
206 | io.ex2mem.bits.alu1_out := alu1.io.out
207 | io.ex2mem.bits.rb1_val := preg.rb1_val
208 | io.ex2mem.bits.rc1 := preg.rc1
209 |
210 | io.ex2mem.bits.alu2_out := alu2.io.out
211 | io.ex2mem.bits.rc2 := preg.rc2
212 |
213 | io.ex2mem.bits.ld_type := preg.ld_type
214 | io.ex2mem.bits.st_type := preg.st_type
215 | io.ex2mem.bits.pc1 := preg.pc1
216 |
217 | io.ex2is.bypass_rc1 := preg.rc1
218 | io.ex2is.bypass_val1 := alu1.io.out
219 | io.ex2is.bypass_en1 := valid_instr1
220 |
221 | io.ex2is.bypass_rc2 := preg.rc2
222 | io.ex2is.bypass_val2 := alu2.io.out
223 | io.ex2is.bypass_en2 := valid_instr2
224 |
225 | if(GlobalConfg.SIM){
226 | import nagicore.unit.DPIC_PERF_PIPE
227 | val perf_pipe_ex = Module(new DPIC_PERF_PIPE())
228 | perf_pipe_ex.io.clk := clock
229 | perf_pipe_ex.io.rst := reset
230 | perf_pipe_ex.io.id := 1.U
231 | perf_pipe_ex.io.invalid := !io.ex2mem.bits.valid1
232 | perf_pipe_ex.io.stall := io.is2ex.stall
233 | }
234 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/ID.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.unit.GPR
6 | import nagicore.unit.ALU_OP
7 | import nagicore.unit.BR_TYPE
8 | import nagicore.unit.BTBPredOutIO
9 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags, Decoder}
10 | import nagicore.GlobalConfg
11 |
12 |
13 | class id2isBits extends Bundle with Config{
14 | val instr = UInt(XLEN.W)
15 | val pc = UInt(XLEN.W)
16 | val ra = UInt(GPR_LEN.W)
17 | val aluA_sel = CtrlFlags.aluASel()
18 | val rb = UInt(GPR_LEN.W)
19 | val aluB_sel = CtrlFlags.aluBSel()
20 | val alu_op = ALU_OP()
21 | val rc = UInt(GPR_LEN.W)
22 | val imm = UInt(XLEN.W)
23 | val br_type = BR_TYPE()
24 | val brpcAdd_sel = CtrlFlags.brpcAddSel()
25 | val ld_type = CtrlFlags.ldType()
26 | val st_type = CtrlFlags.stType()
27 | val instr_type = CtrlFlags.InstrType()
28 | val pc_refill = Bool()
29 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
30 |
31 | val valid = Bool()
32 | }
33 |
34 | class id2isIO extends Bundle{
35 | val bits = Output(new id2isBits)
36 | val stall = Input(Bool())
37 | }
38 |
39 | class ID extends Module with Config{
40 | val io = IO(new Bundle{
41 | val if2id = Flipped(new if2idIO)
42 | val id2is = new id2isIO
43 | })
44 |
45 | // pipeline registers
46 | val preg = RegEnable(io.if2id.bits, !io.id2is.stall)
47 |
48 | io.id2is.bits.valid := preg.valid
49 | io.if2id.stall := io.id2is.stall
50 |
51 | val decoder = Module(new Decoder(XLEN, GPR_LEN))
52 | decoder.io.instr := preg.instr
53 |
54 | io.id2is.bits.instr := preg.instr
55 | io.id2is.bits.pc := preg.pc
56 |
57 | // val gpr = Module(new GPR(XLEN, GPR_NUM, 2))
58 | // gpr.io.wen := io.mem2id.gpr_wen
59 | // gpr.io.waddr := io.mem2id.gpr_wid
60 | // gpr.io.wdata := io.mem2id.gpr_wdata
61 |
62 | // if(GlobalConfg.SIM){
63 | // import nagicore.unit.DPIC_UPDATE_GPR
64 | // val dpic_update_gpr = Module(new DPIC_UPDATE_GPR(XLEN, GPR_NUM))
65 | // dpic_update_gpr.io.clk := clock
66 | // dpic_update_gpr.io.rst := reset
67 | // dpic_update_gpr.io.id := gpr.io.waddr
68 | // dpic_update_gpr.io.wen := gpr.io.wen
69 | // dpic_update_gpr.io.wdata := gpr.io.wdata
70 | // }
71 |
72 | // def bypass_unit(rx: UInt, gpr_rdata: UInt):UInt = {
73 | // Mux(rx === 0.U, 0.U,
74 | // Mux(io.ex2id.bypass_rc === rx && io.ex2id.bypass_en, io.ex2id.bypass_val,
75 | // Mux(io.mem2id.bypass_rc === rx && io.mem2id.bypass_en, io.mem2id.bypass_val,
76 | // gpr_rdata
77 | // )
78 | // )
79 | // )
80 | // }
81 |
82 | // val ra = decoder.io.ra
83 | // gpr.io.raddr(0) := ra
84 | // // bypass
85 | io.id2is.bits.ra := decoder.io.ra
86 | io.id2is.bits.aluA_sel := decoder.io.aluA_sel
87 |
88 | // val rb = decoder.io.rb
89 | // gpr.io.raddr(1) := rb
90 | // // bypass
91 | // io.id2is.bits.rb_val := bypass_unit(rb, gpr.io.rdata(1))
92 |
93 | io.id2is.bits.rb := decoder.io.rb
94 | io.id2is.bits.aluB_sel := decoder.io.aluB_sel
95 | io.id2is.bits.alu_op := decoder.io.alu_op
96 | io.id2is.bits.rc := decoder.io.rc
97 | io.id2is.bits.imm := decoder.io.imm
98 | io.id2is.bits.br_type := decoder.io.br_type
99 | io.id2is.bits.brpcAdd_sel := decoder.io.brpcAdd_sel
100 | io.id2is.bits.ld_type := decoder.io.ld_type
101 | io.id2is.bits.st_type := decoder.io.st_type
102 | io.id2is.bits.instr_type := decoder.io.instr_type
103 | io.id2is.bits.bpu_out := preg.bpu_out
104 | io.id2is.bits.pc_refill := preg.pc_refill
105 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/IF.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.bus.AXI4IO
6 | //import nagicore.unit.{InstrsBuff, InstrsBuffCacheBundle}
7 | import nagicore.unit.cache.Cache
8 | import nagicore.GlobalConfg
9 | import nagicore.unit.cache.CacheReplaceType
10 | import nagicore.unit.BTBPredOutIO
11 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags}
12 | import nagicore.bus.RamType
13 |
14 |
15 | class if2idBits extends Bundle with Config{
16 | val pc = UInt(XLEN.W)
17 | val pc_refill = Bool()
18 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
19 | val instr = UInt(XLEN.W)
20 |
21 | val valid = Bool()
22 | }
23 |
24 | class if2idIO extends Bundle{
25 | val bits = Output(new if2idBits)
26 | val stall = Input(Bool())
27 | }
28 |
29 | class IF extends Module with Config{
30 | val io = IO(new Bundle {
31 | val preif2if = Flipped(new preif2ifIO)
32 | val if2id = new if2idIO
33 | val isram = new AXI4IO(XLEN, XLEN)
34 | })
35 | // 2-stages 1cyc cache
36 | val icache = Module(new Cache(XLEN, XLEN, ICACHE_WAYS, ICACHE_LINES, ICACHE_WORDS, () => new preif2ifBits(), CacheReplaceType.LRU,
37 | dataRamType = RamType.BRAM_1CYC,
38 | tagVRamType = RamType.BRAM_1CYC,
39 | debug_id = 0))
40 | icache.io.axi <> io.isram
41 |
42 | icache.io.master.front.bits.addr := io.preif2if.bits.pc
43 | icache.io.master.front.bits.size := 2.U
44 | icache.io.master.front.bits.uncache := false.B
45 | icache.io.master.front.bits.wmask := 0.U
46 | icache.io.master.front.bits.valid := io.preif2if.bits.valid
47 | icache.io.master.front.bits.wdata := DontCare
48 | icache.io.master.front.bits.pipedata := io.preif2if.bits
49 | icache.io.master.back.stall := io.if2id.stall
50 |
51 |
52 | io.if2id.bits.instr := icache.io.master.back.bits.rdata
53 | io.if2id.bits.valid := icache.io.master.back.bits.valid
54 | io.if2id.bits.pc := icache.io.master.back.bits.pipedata.pc
55 | io.if2id.bits.pc_refill := icache.io.master.back.bits.pipedata.pc_refill
56 | io.if2id.bits.bpu_out := icache.io.master.back.bits.pipedata.bpu_out
57 |
58 | io.preif2if.stall := icache.io.master.front.stall
59 |
60 | if(GlobalConfg.SIM){
61 | import nagicore.unit.DPIC_PERF_PIPE
62 | val perf_pipe_if = Module(new DPIC_PERF_PIPE())
63 | perf_pipe_if.io.clk := clock
64 | perf_pipe_if.io.rst := reset
65 | perf_pipe_if.io.id := 0.U
66 | perf_pipe_if.io.invalid := !io.if2id.bits.valid
67 | perf_pipe_if.io.stall := io.preif2if.stall
68 | }
69 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/IS.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.unit.GPR
6 | import nagicore.unit.ALU_OP
7 | import nagicore.unit.BR_TYPE
8 | import nagicore.unit.BTBPredOutIO
9 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags, Decoder}
10 | import nagicore.GlobalConfg
11 | import nagicore.unit.RingBuff
12 | import nagicore.utils.Flags
13 |
14 |
15 | class is2exBits extends Bundle with Config{
16 | val instr1 = UInt(XLEN.W)
17 | val pc1 = UInt(XLEN.W)
18 | val ra1_val = UInt(XLEN.W)
19 | val alu1A_sel = CtrlFlags.aluASel()
20 | val rb1_val = UInt(XLEN.W)
21 | val alu1B_sel = CtrlFlags.aluBSel()
22 | val alu1_op = ALU_OP()
23 | val rc1 = UInt(GPR_LEN.W)
24 | val imm1 = UInt(XLEN.W)
25 | val pc_refill1 = Bool()
26 |
27 | val br_type = BR_TYPE()
28 | val brpcAdd_sel = CtrlFlags.brpcAddSel()
29 | val ld_type = CtrlFlags.ldType()
30 | val st_type = CtrlFlags.stType()
31 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
32 |
33 | val valid1 = Bool()
34 |
35 | val instr2 = UInt(XLEN.W)
36 | val pc2 = UInt(XLEN.W)
37 | val ra2_val = UInt(XLEN.W)
38 | val alu2A_sel = CtrlFlags.aluASel()
39 | val rb2_val = UInt(XLEN.W)
40 | val alu2B_sel = CtrlFlags.aluBSel()
41 | val alu2_op = ALU_OP()
42 | val rc2 = UInt(GPR_LEN.W)
43 | val imm2 = UInt(XLEN.W)
44 | val pc_refill2 = Bool()
45 |
46 | val valid2 = Bool()
47 | }
48 |
49 | class is2exIO extends Bundle{
50 | val bits = Output(new is2exBits)
51 | val stall = Input(Bool())
52 | }
53 |
54 | class IS extends Module with Config{
55 | val io = IO(new Bundle{
56 | val id2is = Flipped(new id2isIO)
57 | val is2ex = new is2exIO
58 |
59 | val ex2is = Flipped(new ex2isIO)
60 | val mem2is = Flipped(new mem2isIO)
61 | })
62 |
63 | val issue_buffer = Module(new RingBuff(()=>new id2isBits, 8, rchannel=2, debug_id=0))
64 |
65 | issue_buffer.io.push := io.id2is.bits.valid
66 | issue_buffer.io.wdata := io.id2is.bits
67 | issue_buffer.io.clear := io.ex2is.clear_is
68 |
69 | val is1 = issue_buffer.io.rdatas(0)
70 | val is2 = issue_buffer.io.rdatas(1)
71 | val data_hazard = (is1.rc === is2.ra || is1.rc === is2.rb) && is1.rc =/= 0.U
72 | // 只双发is2是ALU类,且无数据冒险的指令
73 | val issue_double =
74 | Flags.is(is2.instr_type, CtrlFlags.InstrType.alu) &&
75 | issue_buffer.io.rvalids(1) &&
76 | !data_hazard
77 |
78 | issue_buffer.io.pop := !io.is2ex.stall && !issue_buffer.io.empty
79 | issue_buffer.io.popN := issue_double
80 |
81 | // io.id2is.stall := io.is2ex.stall
82 | io.id2is.stall := issue_buffer.io.full
83 |
84 | val gpr = Module(new GPR(XLEN, GPR_NUM, 4, 2))
85 | gpr.io.wen(0) := io.mem2is.gpr_wen1 && (!io.mem2is.gpr_wen2 || io.mem2is.gpr_wid2 =/= io.mem2is.gpr_wid1)
86 | gpr.io.waddr(0) := io.mem2is.gpr_wid1
87 | gpr.io.wdata(0) := io.mem2is.gpr_wdata1
88 |
89 | gpr.io.wen(1) := io.mem2is.gpr_wen2
90 | gpr.io.waddr(1) := io.mem2is.gpr_wid2
91 | gpr.io.wdata(1) := io.mem2is.gpr_wdata2
92 |
93 | if(GlobalConfg.SIM){
94 | import nagicore.unit.DPIC_UPDATE_GPR2
95 | val dpic_update_gpr = Module(new DPIC_UPDATE_GPR2(XLEN, GPR_NUM))
96 | dpic_update_gpr.io.clk := clock
97 | dpic_update_gpr.io.rst := reset
98 |
99 | dpic_update_gpr.io.id1 := gpr.io.waddr(0)
100 | dpic_update_gpr.io.wen1 := gpr.io.wen(0)
101 | dpic_update_gpr.io.wdata1 := gpr.io.wdata(0)
102 |
103 | dpic_update_gpr.io.id2 := gpr.io.waddr(1)
104 | dpic_update_gpr.io.wen2 := gpr.io.wen(1)
105 | dpic_update_gpr.io.wdata2 := gpr.io.wdata(1)
106 | }
107 |
108 | def bypass_unit(rx: UInt, gpr_rdata: UInt):UInt = {
109 | Mux(rx === 0.U, 0.U,
110 | Mux(io.ex2is.bypass_rc2 === rx && io.ex2is.bypass_en2, io.ex2is.bypass_val2,
111 | Mux(io.ex2is.bypass_rc1 === rx && io.ex2is.bypass_en1, io.ex2is.bypass_val1,
112 | Mux(io.mem2is.bypass_rc2 === rx && io.mem2is.bypass_en2, io.mem2is.bypass_val2,
113 | Mux(io.mem2is.bypass_rc1 === rx && io.mem2is.bypass_en1, io.mem2is.bypass_val1,
114 | gpr_rdata
115 | )
116 | )
117 | )
118 | )
119 | )
120 | }
121 |
122 | gpr.io.raddr(0) := is1.ra
123 | io.is2ex.bits.ra1_val := bypass_unit(is1.ra, gpr.io.rdata(0))
124 | io.is2ex.bits.alu1A_sel := is1.aluA_sel
125 | gpr.io.raddr(1) := is1.rb
126 | io.is2ex.bits.rb1_val := bypass_unit(is1.rb, gpr.io.rdata(1))
127 | io.is2ex.bits.alu1B_sel := is1.aluB_sel
128 |
129 | gpr.io.raddr(2) := is2.ra
130 | io.is2ex.bits.ra2_val := bypass_unit(is2.ra, gpr.io.rdata(2))
131 | io.is2ex.bits.alu2A_sel := is2.aluA_sel
132 | gpr.io.raddr(3) := is2.rb
133 | io.is2ex.bits.rb2_val := bypass_unit(is2.rb, gpr.io.rdata(3))
134 | io.is2ex.bits.alu2B_sel := is2.aluB_sel
135 |
136 | io.is2ex.bits.instr1 := is1.instr
137 | io.is2ex.bits.instr2 := is2.instr
138 |
139 | io.is2ex.bits.pc1 := is1.pc
140 | io.is2ex.bits.pc2 := is2.pc
141 |
142 | io.is2ex.bits.alu1_op := is1.alu_op
143 | io.is2ex.bits.alu2_op := is2.alu_op
144 |
145 | io.is2ex.bits.rc1 := is1.rc
146 | io.is2ex.bits.rc2 := is2.rc
147 |
148 | io.is2ex.bits.imm1 := is1.imm
149 | io.is2ex.bits.imm2 := is2.imm
150 |
151 | io.is2ex.bits.br_type := is1.br_type
152 | io.is2ex.bits.brpcAdd_sel := is1.brpcAdd_sel
153 | io.is2ex.bits.ld_type := is1.ld_type
154 | io.is2ex.bits.st_type := is1.st_type
155 | io.is2ex.bits.bpu_out := is1.bpu_out
156 |
157 | io.is2ex.bits.pc_refill1 := is1.pc_refill
158 | io.is2ex.bits.pc_refill2 := is2.pc_refill
159 |
160 | io.is2ex.bits.valid1 := issue_buffer.io.rvalids(0)
161 | io.is2ex.bits.valid2 := issue_buffer.io.rvalids(1) && issue_double
162 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/MEM.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.bus.AXI4IO
6 | import nagicore.unit.cache.CacheMini
7 | import nagicore.utils.Flags
8 | import nagicore.GlobalConfg
9 | import nagicore.unit.cache.CacheReplaceType
10 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags}
11 | import nagicore.unit.cache.UnCache
12 |
13 | class mem2isIO extends Bundle with Config{
14 | val bypass_rc1 = Output(UInt(GPR_LEN.W))
15 | val bypass_val1 = Output(UInt(XLEN.W))
16 | val bypass_en1 = Output(Bool())
17 |
18 | val bypass_rc2 = Output(UInt(GPR_LEN.W))
19 | val bypass_val2 = Output(UInt(XLEN.W))
20 | val bypass_en2 = Output(Bool())
21 |
22 | val gpr_wid1 = Output(UInt(GPR_LEN.W))
23 | val gpr_wdata1 = Output(UInt(XLEN.W))
24 | val gpr_wen1 = Output(Bool())
25 |
26 | val gpr_wid2 = Output(UInt(GPR_LEN.W))
27 | val gpr_wdata2 = Output(UInt(XLEN.W))
28 | val gpr_wen2 = Output(Bool())
29 | }
30 |
31 | class MEM extends Module with Config{
32 | val io = IO(new Bundle {
33 | val ex2mem = Flipped(new ex2memIO())
34 | val mem2is = new mem2isIO()
35 | val dmem = new AXI4IO(XLEN, XLEN)
36 | val stall_all = Input(Bool())
37 | })
38 |
39 | class dcachePipeT extends Bundle {
40 | val instr = UInt(XLEN.W)
41 | val alu_out = UInt(XLEN.W)
42 | val rc = UInt(GPR_LEN.W)
43 | val ld_type = CtrlFlags.ldType()
44 | val pc = UInt(XLEN.W)
45 | val no_ldst = Bool()
46 |
47 | val valid = Bool()
48 | }
49 |
50 | // val dcache = Module(new CacheMini(XLEN, XLEN, 8, 8, 1))
51 | val dcache = Module(new UnCache(XLEN, XLEN, WBUFF_LEN, 1))
52 |
53 | // pipeline registers
54 | val preg = RegEnable(io.ex2mem.bits, !dcache.io.out.busy && !io.stall_all)
55 | io.ex2mem.stall := dcache.io.out.busy || io.stall_all
56 |
57 | dcache.io.axi <> io.dmem
58 |
59 | val addr = preg.alu1_out
60 |
61 | dcache.io.in.bits.addr := addr
62 | // dcache.io.in.bits.uncache := addr(31, 28) === "hb".U
63 | dcache.io.in.bits.we := !Flags.OHis(preg.st_type, CtrlFlags.stType.x)
64 | dcache.io.in.bits.wdata := Flags.onehotMux(preg.st_type, Seq(
65 | CtrlFlags.stType.x -> 0.U,
66 | CtrlFlags.stType.b -> Fill(XLEN/8, preg.rb1_val(7, 0)),
67 | CtrlFlags.stType.h -> Fill(XLEN/16, preg.rb1_val(15, 0)),
68 | CtrlFlags.stType.w -> preg.rb1_val(31, 0),
69 | ))
70 | dcache.io.in.bits.size := Flags.onehotMux(preg.st_type, Seq(
71 | CtrlFlags.stType.x -> 0.U,
72 | CtrlFlags.stType.b -> 0.U,
73 | CtrlFlags.stType.h -> 1.U,
74 | CtrlFlags.stType.w -> 2.U,
75 | )) | Flags.onehotMux(preg.ld_type, Seq(
76 | CtrlFlags.ldType.x -> 0.U,
77 | CtrlFlags.ldType.b -> 0.U,
78 | CtrlFlags.ldType.bu -> 0.U,
79 | CtrlFlags.ldType.h -> 1.U,
80 | CtrlFlags.ldType.hu -> 1.U,
81 | CtrlFlags.ldType.w -> 2.U,
82 | ))
83 | dcache.io.in.bits.wmask := Flags.onehotMux(preg.st_type, Seq(
84 | CtrlFlags.stType.x -> 0.U,
85 | CtrlFlags.stType.b -> ("b1".U< ("b11".U<<(addr(1)##0.U(1.W))),
87 | CtrlFlags.stType.w -> "b1111".U,
88 | ))
89 | // 不走Cache的指令
90 | val nolr = Flags.OHis(preg.ld_type, CtrlFlags.ldType.x) && Flags.OHis(preg.st_type, CtrlFlags.stType.x)
91 | dcache.io.in.req := preg.valid1 && !nolr && RegNext(!dcache.io.out.busy) && !io.stall_all
92 |
93 | val rdata_raw = dcache.io.out.rdata
94 | val wordData = if(XLEN == 64) Mux(addr(2), rdata_raw(63, 32), rdata_raw(31, 0))
95 | else rdata_raw(31, 0)
96 | val halfData = Mux(addr(1), wordData(31, 16), wordData(15, 0))
97 | val byteData = Mux(addr(0), halfData(15, 8), halfData(7, 0))
98 |
99 | val rdata_mem = Flags.onehotMux(preg.ld_type, Seq(
100 | CtrlFlags.ldType.x -> (0.U).zext,
101 | CtrlFlags.ldType.b -> byteData.asSInt,
102 | CtrlFlags.ldType.bu -> byteData.zext,
103 | CtrlFlags.ldType.h -> halfData.asSInt,
104 | CtrlFlags.ldType.hu -> halfData.zext,
105 | CtrlFlags.ldType.w -> wordData.zext,
106 | )).asUInt
107 |
108 | val valid1 = preg.valid1 && !dcache.io.out.busy
109 | val valid2 = preg.valid2 && !dcache.io.out.busy
110 |
111 | io.mem2is.bypass_rc1 := preg.rc1
112 | io.mem2is.bypass_en1 := valid1
113 | val wb_data = Mux(Flags.OHis(preg.ld_type, CtrlFlags.ldType.x), preg.alu1_out, rdata_mem)
114 | io.mem2is.bypass_val1 := wb_data
115 |
116 | io.mem2is.bypass_rc2 := preg.rc2
117 | io.mem2is.bypass_en2 := preg.valid2
118 | io.mem2is.bypass_val2 := preg.alu2_out
119 |
120 | // when(nolr){
121 | // io.mem2id.bypass_rc := Mux(preg.valid, preg.rc, 0.U)
122 | // io.mem2id.bypass_val := preg.alu_out
123 | // }.otherwise{
124 | // io.mem2id.bypass_rc := Mux(preg.valid && preg.ld_type === Flags.bp(CtrlFlags.ldType.w), preg.rc, 0.U)
125 | // io.mem2id.bypass_val := dcache.io.out.rdata
126 | // }
127 |
128 | io.mem2is.gpr_wid1 := preg.rc1
129 | io.mem2is.gpr_wdata1 := wb_data
130 | io.mem2is.gpr_wen1 := valid1
131 |
132 | io.mem2is.gpr_wid2 := preg.rc2
133 | io.mem2is.gpr_wdata2 := preg.alu2_out
134 | io.mem2is.gpr_wen2 := valid2
135 |
136 | if(GlobalConfg.SIM){
137 | import nagicore.unit.DPIC_TRACE_MEM
138 | val dpic_trace_mem_w = Module(new DPIC_TRACE_MEM(XLEN, XLEN))
139 | dpic_trace_mem_w.io.clk := clock
140 | dpic_trace_mem_w.io.rst := reset
141 | dpic_trace_mem_w.io.valid := dcache.io.in.req && dcache.io.in.bits.wmask.orR
142 | dpic_trace_mem_w.io.addr := dcache.io.in.bits.addr
143 | dpic_trace_mem_w.io.size := dcache.io.in.bits.size
144 | dpic_trace_mem_w.io.data := dcache.io.in.bits.wdata
145 | dpic_trace_mem_w.io.wmask := dcache.io.in.bits.wmask
146 |
147 | import nagicore.unit.DPIC_PERF_PIPE
148 | val perf_pipe_dcache = Module(new DPIC_PERF_PIPE())
149 | perf_pipe_dcache.io.clk := clock
150 | perf_pipe_dcache.io.rst := reset
151 | perf_pipe_dcache.io.id := 2.U
152 | perf_pipe_dcache.io.invalid := !valid1
153 | perf_pipe_dcache.io.stall := io.ex2mem.stall
154 |
155 | import nagicore.unit.DPIC_UPDATE_PC2
156 | val dpic_update_pc = Module(new DPIC_UPDATE_PC2(XLEN))
157 | dpic_update_pc.io.clk := clock
158 | dpic_update_pc.io.rst := reset
159 | dpic_update_pc.io.pc1 := preg.pc1
160 | dpic_update_pc.io.pc2 := preg.pc2
161 | dpic_update_pc.io.wen1:= valid1
162 | dpic_update_pc.io.wen2:= valid2
163 |
164 | import nagicore.unit.DPIC_TRACE_MEM
165 | val dpic_trace_mem_r = Module(new DPIC_TRACE_MEM(XLEN, XLEN))
166 | dpic_trace_mem_r.io.clk := clock
167 | dpic_trace_mem_r.io.rst := reset
168 | dpic_trace_mem_r.io.valid := valid1 && preg.ld_type =/= Flags.bp(CtrlFlags.ldType.x)
169 | dpic_trace_mem_r.io.addr := addr
170 | dpic_trace_mem_r.io.size := Flags.onehotMux(preg.ld_type, Seq(
171 | CtrlFlags.ldType.x -> 0.U,
172 | CtrlFlags.ldType.b -> 0.U,
173 | CtrlFlags.ldType.bu -> 0.U,
174 | CtrlFlags.ldType.h -> 1.U,
175 | CtrlFlags.ldType.hu -> 1.U,
176 | CtrlFlags.ldType.w -> 2.U,
177 | ))
178 | dpic_trace_mem_r.io.data := rdata_mem
179 | dpic_trace_mem_r.io.wmask := 0.U
180 |
181 |
182 | }
183 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/loongarch/nscscc2024Dual/stages/PREIF.scala:
--------------------------------------------------------------------------------
1 | package nagicore.loongarch.nscscc2024Dual.stages
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.unit.BTB
6 | import nagicore.unit.BTBPredOutIO
7 | import nagicore.loongarch.nscscc2024Dual.{Config, CtrlFlags}
8 | import nagicore.GlobalConfg
9 |
10 |
11 | class preif2ifBits extends Bundle with Config{
12 | val pc = UInt(XLEN.W)
13 | val pc_refill = Bool()
14 | val bpu_out = new BTBPredOutIO(BTB_ENTRYS, XLEN)
15 |
16 | val valid = Bool()
17 | }
18 |
19 | class preif2ifIO extends Bundle{
20 | val bits = Output(new preif2ifBits)
21 | val stall = Input(Bool())
22 | }
23 |
24 | class PREIF extends Module with Config{
25 | val io = IO(new Bundle {
26 | val preif2if = new preif2ifIO
27 | val ex2preif = Flipped(new ex2preifIO)
28 | })
29 |
30 | val nxt_pc = Wire(UInt(XLEN.W))
31 | if(GlobalConfg.SIM){
32 | dontTouch(nxt_pc)
33 | }
34 | val pc = RegEnable(nxt_pc, PC_START, !io.preif2if.stall || io.ex2preif.bpu_fail)
35 | val pc4 = pc+4.U
36 |
37 | val bpu = Module(new BTB(BTB_ENTRYS, XLEN, XLEN/2))
38 | bpu.io.pred.in.pc := pc
39 | bpu.io.update := io.ex2preif.bpu_update
40 |
41 | nxt_pc := Mux(io.ex2preif.bpu_fail, io.ex2preif.br_real_pc,
42 | Mux(bpu.io.pred.out.taken, bpu.io.pred.out.target,
43 | pc4
44 | )
45 | )
46 | io.preif2if.bits.pc := pc
47 | io.preif2if.bits.bpu_out := bpu.io.pred.out
48 | io.preif2if.bits.pc_refill := RegEnable(io.ex2preif.bpu_fail, !io.preif2if.stall || io.ex2preif.bpu_fail)
49 | io.preif2if.bits.valid := !reset.asBool
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/ALU.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | import nagicore.utils.Flags
7 | import nagicore.GlobalConfg
8 |
9 | object ALU_OP{
10 | // val X = Value(0.U)
11 | // val ADD = Value(1.U)
12 | // val SUB = Value(2.U)
13 | // val AND = Value(4.U)
14 | // val OR = Value(8.U)
15 | // val XOR = Value(16.U)
16 | // val LT = Value(32.U)
17 | // val LTU = Value(64.U)
18 | // val SL = Value(128.U)
19 | // val SR = Value(256.U)
20 | // val SRA = Value(512.U)
21 | // val COPY_A = Value(1024.U)
22 | // val COPY_B = Value(2048.U)
23 | // val NOR = Value(4096.U)
24 | // val MUL = Value(8192.U)
25 | // val X, ADD, SUB, AND, OR, XOR, LT, LTU, SL, SR, SRA, COPY_A, COPY_B, NOR,
26 | // MUL, MULH, MULHU, DIV, DIVU, MOD, MODU = Value
27 | val X = "00000"
28 | val ADD = "00001"
29 | val SUB = "00010"
30 | val AND = "00011"
31 | val OR = "00100"
32 | val XOR = "00101"
33 | val LT = "00110"
34 | val LTU = "00111"
35 | val SL = "01000"
36 | val SR = "01001"
37 | val SRA = "01010"
38 | val COPY_A = "01011"
39 | val COPY_B = "01100"
40 | val NOR = "01101"
41 | val MUL = "01110"
42 | val MULH = "01111"
43 | val MULHU = "10000"
44 | val DIV = "10001"
45 | val DIVU = "10010"
46 | val MOD = "10011"
47 | val MODU = "10100"
48 | def apply() = UInt(X.length().W)
49 | }
50 |
51 | class ALUIO(dataBits: Int) extends Bundle{
52 | val a = Input(UInt(dataBits.W))
53 | val b = Input(UInt(dataBits.W))
54 | val op = Input(ALU_OP())
55 | val sum = Output(UInt(dataBits.W))
56 | val out = Output(UInt(dataBits.W))
57 | val valid = Input(Bool())
58 | val busy = Output(Bool())
59 | }
60 |
61 | class ALU(dataBits: Int, mulu_imp: MULU_IMP.MULU_IMP, divu_imp: DIVU_IMP.DIVU_IMP) extends Module {
62 | val io = IO(new ALUIO(dataBits))
63 |
64 | val shamt = io.b(4, 0).asUInt
65 | val sum = io.a + io.b;
66 | val mins = (0.U ## io.a) + (1.U ## ~io.b) + 1.U;
67 | val isLT = Mux(io.a(dataBits-1)^io.b(dataBits-1), io.a(dataBits-1), mins(dataBits))
68 | val isLTU = mins(dataBits)
69 | val isEQ = mins(dataBits-1, 0) === 0.U
70 | val or = io.a | io.b
71 |
72 | io.sum := sum
73 | import ALU_OP._
74 |
75 | // val mulu_imp = if(GlobalConfg.SIM) MULU_IMP.synthesizer_1cyc else MULU_IMP.xsArrayMul
76 |
77 | val mulu = Module(new MULU(dataBits, mulu_imp))
78 | mulu.io.a := io.a
79 | mulu.io.b := io.b
80 | mulu.io.op := io.op(1, 0)
81 | mulu.io.vaild := io.valid && Flags.CasesMux(io.op, Seq(
82 | MUL -> true.B,
83 | MULH -> true.B,
84 | MULHU -> true.B,
85 | ), false.B)
86 |
87 | val divu = Module(new DIVU(dataBits, divu_imp))
88 | divu.io.a := io.a
89 | divu.io.b := io.b
90 | divu.io.signed := io.op(0)
91 | divu.io.valid := io.valid && Flags.CasesMux(io.op, Seq(
92 | DIV -> true.B,
93 | DIVU -> true.B,
94 | MOD -> true.B,
95 | MODU -> true.B,
96 | ), false.B)
97 |
98 | // must assert when mul or div type comes immediately or can not stall instrs from pre stage
99 | io.busy := mulu.io.busy || divu.io.busy || mulu.io.vaild || divu.io.valid
100 |
101 |
102 | io.out := Flags.CasesMux(io.op, Seq(
103 | ADD -> sum,
104 | SUB -> mins(dataBits-1, 0),
105 | SL -> (io.a << shamt),
106 | SR -> (io.a >> shamt),
107 | SRA -> (io.a.asSInt >> shamt.asUInt).asUInt,
108 | AND -> (io.a & io.b),
109 | OR -> or,
110 | XOR -> (io.a ^ io.b),
111 | LT -> isLT,
112 | LTU -> isLTU,
113 | COPY_A -> io.a,
114 | COPY_B -> io.b,
115 | NOR -> (~or),
116 |
117 | MUL -> mulu.io.out,
118 | MULH -> mulu.io.out,
119 | MULHU -> mulu.io.out,
120 | DIV -> divu.io.quo,
121 | DIVU -> divu.io.quo,
122 | MOD -> divu.io.rem,
123 | MODU -> divu.io.rem,
124 |
125 | // MUL -> (io.a.asSInt * io.b.asSInt)(31, 0).asUInt,
126 | // MULH -> (io.a.asSInt * io.b.asSInt)(63, 32).asUInt,
127 | // MULHU -> (io.a * io.b)(63, 32),
128 | // DIV -> (io.a.asSInt / io.b.asSInt)(31, 0).asUInt,
129 | // DIVU -> (io.a / io.b)(31, 0),
130 | // MOD -> (io.a.asSInt % io.b.asSInt)(31, 0).asUInt,
131 | // MODU -> (io.a % io.b)(31, 0),
132 | ), 0.U)
133 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/BPU.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.utils.Flags
6 | import nagicore.utils.isPowerOf2
7 |
8 | object BP_TYPE{
9 | val dontcare = "??"
10 | val jump = "01" // unconditional jump
11 | val cond = "10" // conditional jump
12 | def apply() = UInt(2.W)
13 | }
14 |
15 | class BTBPredInIO(pcBits: Int) extends Bundle {
16 | val pc = UInt(pcBits.W)
17 | }
18 |
19 | class BTBPredOutIO(entryNum: Int, pcBits: Int) extends Bundle {
20 | val taken = Bool()
21 | val target = UInt(pcBits.W)
22 | val hit = Bool()
23 | val index = UInt(log2Ceil(entryNum).W)
24 | }
25 |
26 | class BTBUpdateIO(entryNum: Int, pcBits: Int) extends Bundle {
27 | val bp_type = BP_TYPE()
28 | val taken = Bool() // cond jump taken or not
29 | val pc = UInt(pcBits.W)
30 | val target = UInt(pcBits.W)
31 | val hit = Bool()
32 | val index = UInt(log2Ceil(entryNum).W) // hit btb index
33 | val valid = Bool()
34 | }
35 |
36 | class NoBTB(entryNum: Int, pcBits: Int, tagBits: Int, scInit: Int=0, instrBytes: Int=4) extends Module {
37 | require(pcBits >= tagBits && tagBits > 0 && instrBytes > 0)
38 | val io = IO(new Bundle {
39 | val pred = new Bundle {
40 | val in = Input(new BTBPredInIO(pcBits))
41 | val out = Output(new BTBPredOutIO(entryNum, pcBits))
42 | }
43 | val update = Input(new BTBUpdateIO(entryNum, pcBits))
44 | })
45 | io.pred.out := DontCare
46 | io.pred.out.taken := false.B
47 | }
48 |
49 | class BTB(entryNum: Int, pcBits: Int, tagBits: Int, scInit: Int=0, instrBytes: Int=4) extends Module {
50 | require(pcBits >= tagBits && tagBits > 0 && instrBytes > 0 && isPowerOf2(entryNum))
51 | val io = IO(new Bundle {
52 | val pred = new Bundle {
53 | val in = Input(new BTBPredInIO(pcBits))
54 | val out = Output(new BTBPredOutIO(entryNum, pcBits))
55 | }
56 | val update = Input(new BTBUpdateIO(entryNum, pcBits))
57 | })
58 | class BTBTableEntry extends Bundle {
59 | // 两位饱和计数器
60 | // 00: strongly not taken, 01: not taken, 10: taken, 11: strongly taken
61 | val sc = UInt(2.W)
62 | val tag = UInt(tagBits.W)
63 | val target = UInt(pcBits.W)
64 | val valid = Bool()
65 | }
66 | def get_tag(pc: UInt): UInt = {
67 | pc(log2Ceil(instrBytes)+tagBits-1, log2Ceil(instrBytes))
68 | }
69 | val table = RegInit(VecInit(Seq.fill(entryNum)({
70 | val bundle = Wire(new BTBTableEntry())
71 | bundle.sc := scInit.U(2.W)
72 | bundle.tag := 0.U
73 | bundle.target := 0.U
74 | bundle.valid := false.B
75 | bundle
76 | })))
77 |
78 | val entry_p = RegInit(0.U(log2Ceil(entryNum).W))
79 |
80 | val pred_tag = get_tag(io.pred.in.pc)
81 | val pred_hits = VecInit.tabulate(entryNum){
82 | i => pred_tag === table(i).tag && table(i).valid
83 | }
84 | val pred_hit = pred_hits.reduceTree(_ || _)
85 | val pred_hit_index = OHToUInt(pred_hits)
86 | val pred_hit_entry = table(pred_hit_index)
87 | io.pred.out.taken := pred_hit && pred_hit_entry.sc(1)
88 | io.pred.out.target := pred_hit_entry.target
89 | io.pred.out.hit := pred_hit
90 | io.pred.out.index := pred_hit_index
91 |
92 | when(io.update.valid){
93 | when(Flags.OHis(io.update.bp_type, BP_TYPE.jump)){
94 | when(io.update.hit){
95 | table(io.update.index).sc := 3.U
96 | table(io.update.index).target := io.update.target
97 | }.otherwise{
98 | table(entry_p).sc := 3.U
99 | table(entry_p).target := io.update.target
100 | table(entry_p).tag := get_tag(io.update.pc)
101 | table(entry_p).valid := true.B
102 | entry_p := entry_p + 1.U
103 | }
104 | }.elsewhen(Flags.OHis(io.update.bp_type, BP_TYPE.cond)){
105 | when(io.update.hit){
106 | val sc = table(io.update.index).sc
107 | table(io.update.index).sc := Mux(io.update.taken,
108 | // 11 -> 11, 00 -> 01, 01 -> 10, 10 -> 11
109 | Mux(sc===3.U, 3.U, Cat(sc(1)|sc(0), ~sc(0))),
110 | // 00 -> 00, 01 -> 00, 10 -> 01, 11 -> 10
111 | Mux(sc===0.U, 0.U, Cat(sc(1)&sc(0), ~sc(0)))
112 | )
113 | table(io.update.index).target := io.update.target
114 | }.otherwise{
115 | table(entry_p).sc := 2.U // weekly taken
116 | table(entry_p).target := io.update.target
117 | table(entry_p).tag := get_tag(io.update.pc)
118 | table(entry_p).valid := true.B
119 | entry_p := entry_p + 1.U
120 | }
121 | }
122 | }
123 |
124 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/BRU.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | import nagicore.utils.Flags
7 |
8 | object BR_TYPE{
9 | val NEVER = "00000001"
10 | val EQ = "00000010"
11 | val NE = "00000100"
12 | val LT = "00001000"
13 | val LTU = "00010000"
14 | val GE = "00100000"
15 | val GEU = "01000000"
16 | val ALWAYS = "10000000"
17 | def apply() = UInt(NEVER.length.W)
18 | }
19 |
20 | class BRU_WITH_ALU_IO(dataBits: Int) extends Bundle{
21 | val alu_out = Input(UInt(dataBits.W))
22 | val br_type = Input(BR_TYPE())
23 | val br_take = Output(Bool())
24 | }
25 |
26 | class BRU_WITH_ALU(dataBits: Int) extends Module{
27 | val io = IO(new BRU_WITH_ALU_IO(dataBits))
28 |
29 | val eq = io.alu_out === 0.U
30 |
31 | import BR_TYPE._
32 |
33 |
34 | io.br_take := Flags.onehotMux(io.br_type, Seq(
35 | NEVER -> false.B,
36 | EQ -> eq,
37 | NE -> !eq,
38 | LT -> io.alu_out(0),
39 | LTU -> io.alu_out(0),
40 | GE -> !io.alu_out(0),
41 | GEU -> !io.alu_out(0),
42 | ALWAYS -> true.B,
43 | ))
44 |
45 | // io.br_take := MuxLookup(io.br_type, false.B)(Seq(
46 | // EQ -> eq,
47 | // NE -> !eq,
48 | // LT -> io.alu_out(0),
49 | // LTU -> io.alu_out(0),
50 | // GE -> !io.alu_out(0),
51 | // GEU -> !io.alu_out(0),
52 | // ))
53 | }
54 |
55 | class BRU_SINGLE(dataBits: Int) extends Module{
56 | val io = IO(new Bundle{
57 | val a = Input(UInt(dataBits.W))
58 | val b = Input(UInt(dataBits.W))
59 | val br_type = Input(BR_TYPE())
60 | val br_take = Output(Bool())
61 | })
62 |
63 | val mins = (0.U ## io.a) + (1.U ## ~io.b) + 1.U;
64 | val isLT = Mux(io.a(dataBits-1)^io.b(dataBits-1), io.a(dataBits-1), mins(dataBits))
65 | val isLTU = mins(dataBits)
66 | val eq = mins === 0.U
67 |
68 | import BR_TYPE._
69 |
70 | io.br_take := Flags.onehotMux(io.br_type, Seq(
71 | NEVER -> false.B,
72 | EQ -> eq,
73 | NE -> !eq,
74 | LT -> isLT,
75 | LTU -> isLTU,
76 | GE -> !isLT,
77 | GEU -> !isLTU,
78 | ALWAYS -> true.B,
79 | ))
80 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/DIVU.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.utils.Flags
6 |
7 | object DIVU_IMP extends Enumeration {
8 | type DIVU_IMP = Value
9 | val none, radix2 = Value
10 | }
11 |
12 | class DIVU(dataBits: Int, imp_way: DIVU_IMP.DIVU_IMP = DIVU_IMP.radix2) extends Module{
13 | val io = IO(new Bundle{
14 | val a = Input(UInt(dataBits.W))
15 | val b = Input(UInt(dataBits.W))
16 | val signed = Input(Bool())
17 | val quo = Output(UInt(dataBits.W))
18 | val rem = Output(UInt(dataBits.W))
19 | val valid = Input(Bool())
20 | val busy = Output(Bool())
21 | })
22 |
23 | imp_way match {
24 | case DIVU_IMP.radix2 => {
25 | /* ref: https://github.com/MaZirui2001/LAdataBitsR-pipeline-scala */
26 |
27 | /* stage1: solve sign */
28 | val sign_s = Mux(io.signed, io.a(dataBits-1) ^ io.b(dataBits-1), false.B)
29 | val sign_r = Mux(io.signed, io.a(dataBits-1), false.B)
30 | val src1 = Mux(io.signed && io.a(dataBits-1), ~io.a + 1.U, io.a)
31 | val src2 = Mux(io.signed && io.b(dataBits-1), ~io.b + 1.U, io.b)
32 |
33 | // get highest 1 in src1
34 | // TODO use log2
35 | val high_rev = PriorityEncoder(Reverse(src1))
36 |
37 | val cnt = RegInit(0.U(6.W))
38 | val stage1_fire = cnt === 0.U
39 |
40 | val src1_reg1 = RegEnable(src1, stage1_fire)
41 | val src2_reg1 = RegEnable(src2, stage1_fire)
42 | val signed_reg1 = RegEnable(io.signed, stage1_fire)
43 | val sign_s_reg1 = RegEnable(sign_s, stage1_fire)
44 | val sign_r_reg1 = RegEnable(sign_r, stage1_fire)
45 | val en_reg1 = RegEnable(io.valid, stage1_fire)
46 | val high_rev_reg1 = RegEnable(high_rev, stage1_fire)
47 |
48 | /* stage2+: div */
49 | val stage2_init = en_reg1 && cnt === 0.U
50 |
51 | val src2_reg2 = RegEnable(src2_reg1, stage2_init)
52 | val signed_reg2 = RegEnable(signed_reg1, stage2_init)
53 | val sign_s_reg2 = RegEnable(sign_s_reg1, stage2_init)
54 | val sign_r_reg2 = RegEnable(sign_r_reg1, stage2_init)
55 |
56 | when(cnt =/= 0.U){
57 | cnt := cnt - 1.U
58 | }.elsewhen(en_reg1){
59 | cnt := (dataBits+1).U - high_rev_reg1
60 | }
61 |
62 | val quo_rem_reg = RegInit(0.U((dataBits*2+1).W))
63 | val quo = quo_rem_reg(dataBits-1, 0)
64 | val rem = quo_rem_reg(dataBits*2-1, dataBits)
65 | when(cnt =/= 0.U){
66 | val mins = rem - src2_reg2
67 | when(rem >= src2_reg2){
68 | quo_rem_reg := mins(dataBits-1, 0) ## quo ## 1.U(1.W)
69 | }.otherwise{
70 | quo_rem_reg := quo_rem_reg(dataBits*2-1, 0) ## 0.U(1.W)
71 | }
72 | }.elsewhen(en_reg1){
73 | quo_rem_reg := (0.U((dataBits+1).W) ## src1_reg1) << high_rev_reg1
74 | }
75 |
76 | io.busy := cnt =/= 0.U || en_reg1
77 |
78 | io.quo := Mux(signed_reg2,
79 | Mux(sign_s_reg2, ~quo + 1.U, quo),
80 | quo
81 | )
82 |
83 | val rem_res = quo_rem_reg(dataBits*2, dataBits+1)
84 | io.rem := Mux(signed_reg2,
85 | Mux(sign_r_reg2, ~rem_res + 1.U, rem_res),
86 | rem_res
87 | )
88 | }
89 | case DIVU_IMP.none => {
90 | io.busy := false.B
91 | io.quo := DontCare
92 | io.rem := DontCare
93 | }
94 | }
95 | }
96 |
97 | /*
98 | class DIVU(dataBits: Int) extends Module{
99 | val io = IO(new Bundle{
100 | val a = Input(UInt(dataBits.W))
101 | val b = Input(UInt(dataBits.W))
102 | val signed = Input(Bool())
103 | val quo = Output(UInt(dataBits.W))
104 | val rem = Output(UInt(dataBits.W))
105 | val valid = Input(Bool())
106 | val busy = Output(Bool())
107 | })
108 |
109 | /* ref: https://github.com/MaZirui2001/LAdataBitsR-pipeline-scala */
110 |
111 | /* stage1: solve sign */
112 | val sign_s = Mux(io.signed, io.a(dataBits-1) ^ io.b(dataBits-1), false.B)
113 | val sign_r = Mux(io.signed, io.a(dataBits-1), false.B)
114 | val src1 = Mux(io.signed && io.a(dataBits-1), ~io.a + 1.U, io.a)
115 | val src2 = Mux(io.signed && io.b(dataBits-1), ~io.b + 1.U, io.b)
116 |
117 | // get highest 1 in src1
118 | val high_rev = PriorityEncoder(Reverse(src1))
119 |
120 | val src1_reg1 = ShiftRegister(src1, 1, !io.busy)
121 | val src2_reg1 = ShiftRegister(src2, 1, !io.busy)
122 | val signed_reg1 = ShiftRegister(io.signed, 1, !io.busy)
123 | val sign_s_reg1 = ShiftRegister(sign_s, 1, !io.busy)
124 | val sign_r_reg1 = ShiftRegister(sign_r, 1, !io.busy)
125 | val en_reg1 = ShiftRegister(io.valid, 1, !io.busy)
126 | val high_rev_reg1 = ShiftRegister(high_rev, 1, !io.busy)
127 |
128 | /* stage2+: div */
129 | val cnt = RegInit(0.U(6.W))
130 | val stage2_init = en_reg1 && cnt === 0.U
131 |
132 | val src2_reg2 = RegEnable(src2_reg1, stage2_init)
133 | val signed_reg2 = RegEnable(signed_reg1, stage2_init)
134 | val sign_s_reg2 = RegEnable(sign_s_reg1, stage2_init)
135 | val sign_r_reg2 = RegEnable(sign_r_reg1, stage2_init)
136 |
137 | when(cnt =/= 0.U){
138 | cnt := cnt - 1.U
139 | }.elsewhen(en_reg1){
140 | cnt := (dataBits+1).U - high_rev_reg1
141 | }
142 |
143 | val quo_rem_reg = RegInit(0.U((dataBits*2+1).W))
144 | val quo = quo_rem_reg(dataBits-1, 0)
145 | val rem = quo_rem_reg(dataBits*2-1, dataBits)
146 | when(cnt =/= 0.U){
147 | val mins = rem - src2_reg2
148 | when(rem >= src2_reg2){
149 | quo_rem_reg := mins ## quo ## 1.U(1.W)
150 | }.otherwise{
151 | quo_rem_reg := quo_rem_reg(dataBits*2-1, 0) ## 0.U(1.W)
152 | }
153 | }.elsewhen(en_reg1){
154 | quo_rem_reg := (0.U((dataBits+1).W) ## src1_reg1) << high_rev_reg1
155 | }
156 |
157 | !io.busy := cnt === 0.U
158 |
159 | io.quo := Mux(signed_reg2,
160 | Mux(sign_s_reg2, ~quo + 1.U, quo),
161 | quo
162 | )
163 |
164 | io.rem := Mux(signed_reg2,
165 | Mux(sign_r_reg2, ~quo_rem_reg(dataBits*2, dataBits+1) + 1.U, quo_rem_reg(dataBits*2, dataBits+1)),
166 | quo_rem_reg(dataBits*2, dataBits+1)
167 | )
168 | }
169 | */
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/DPIC.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 | import chisel3._
3 | import chisel3.util._
4 | import nagicore.bus.RamIO
5 |
6 |
7 | class DPIC_RAM_1CYC(addr_width: Int, data_width: Int) extends BlackBox(Map("ADDR_WIDTH" -> addr_width, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
8 | val io = IO(new Bundle {
9 | val clk = Input(Clock())
10 | val rst = Input(Bool())
11 | val en = Input(Bool())
12 | val addr = Input(UInt(addr_width.W))
13 | val re = Input(Bool())
14 | val we = Input(Bool())
15 | val wmask = Input(UInt((data_width/8).W))
16 | val size = Input(UInt(2.W))
17 | val wdata = Input(UInt(data_width.W))
18 | val rdata = Output(UInt(data_width.W))
19 | })
20 | addResource("/sv/DPIC_RAM_1CYC.sv")
21 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
22 | }
23 |
24 | class DPIC_RAM_2CYC(addr_width: Int, data_width: Int) extends BlackBox(Map("ADDR_WIDTH" -> addr_width, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
25 | val io = IO(new Bundle {
26 | val clk = Input(Clock())
27 | val rst = Input(Bool())
28 | val en = Input(Bool())
29 | val addr = Input(UInt(addr_width.W))
30 | val re = Input(Bool())
31 | val we = Input(Bool())
32 | val wmask = Input(UInt((data_width/8).W))
33 | val size = Input(UInt(2.W))
34 | val wdata = Input(UInt(data_width.W))
35 | val rdata = Output(UInt(data_width.W))
36 | })
37 | addResource("/sv/DPIC_RAM_2CYC.sv")
38 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
39 | }
40 |
41 | class DPIC_UPDATE_GPR(gpr_num: Int, data_width: Int) extends BlackBox(Map("GPR_NUM" -> gpr_num, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
42 | val io = IO(new Bundle{
43 | val clk = Input(Clock())
44 | val rst = Input(Bool())
45 | val id = Input(UInt(log2Ceil(gpr_num).W))
46 | val wen = Input(Bool())
47 | val wdata = Input(UInt(data_width.W))
48 | })
49 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
50 | addResource("/sv/DPIC_UPDATE_GPR.sv")
51 | }
52 |
53 | class DPIC_UPDATE_GPR2(gpr_num: Int, data_width: Int) extends BlackBox(Map("GPR_NUM" -> gpr_num, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
54 | val io = IO(new Bundle{
55 | val clk = Input(Clock())
56 | val rst = Input(Bool())
57 | val id1 = Input(UInt(log2Ceil(gpr_num).W))
58 | val wen1 = Input(Bool())
59 | val wdata1 = Input(UInt(data_width.W))
60 | val id2 = Input(UInt(log2Ceil(gpr_num).W))
61 | val wen2 = Input(Bool())
62 | val wdata2 = Input(UInt(data_width.W))
63 | })
64 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
65 | addResource("/sv/DPIC_UPDATE_GPR2.sv")
66 | }
67 |
68 | class DPIC_UPDATE_PC(data_width: Int) extends BlackBox(Map("DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
69 | val io = IO(new Bundle{
70 | val clk = Input(Clock())
71 | val rst = Input(Bool())
72 | val wen = Input(Bool())
73 | val pc = Input(UInt(data_width.W))
74 | })
75 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
76 | addResource("/sv/DPIC_UPDATE_PC.sv")
77 | }
78 |
79 | class DPIC_UPDATE_PC2(data_width: Int) extends BlackBox(Map("DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
80 | val io = IO(new Bundle{
81 | val clk = Input(Clock())
82 | val rst = Input(Bool())
83 | val wen1 = Input(Bool())
84 | val wen2 = Input(Bool())
85 | val pc1 = Input(UInt(data_width.W))
86 | val pc2 = Input(UInt(data_width.W))
87 | })
88 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
89 | addResource("/sv/DPIC_UPDATE_PC2.sv")
90 | }
91 |
92 | class DPIC_TRACE_MEM(addr_width: Int, data_width: Int) extends BlackBox(Map("ADDR_WIDTH" -> addr_width, "DATA_WIDTH" -> data_width)) with HasBlackBoxResource{
93 | val io = IO(new Bundle{
94 | val clk = Input(Clock())
95 | val rst = Input(Bool())
96 | val valid = Input(Bool())
97 | val addr = Input(UInt(addr_width.W))
98 | val wmask = Input(UInt((data_width/8).W))
99 | val size = Input(UInt(2.W))
100 | val data = Input(UInt(data_width.W))
101 | })
102 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
103 | addResource("/sv/DPIC_TRACE_MEM.sv")
104 | }
105 |
106 | class DPIC_PERF_CACHE extends BlackBox with HasBlackBoxResource{
107 | val io = IO(new Bundle{
108 | val clk = Input(Clock())
109 | val rst = Input(Bool())
110 | val valid = Input(Bool())
111 | val id = Input(UInt(8.W))
112 | val access_type = Input(UInt(8.W))
113 | })
114 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
115 | addResource("/sv/DPIC_PERF_CACHE.sv")
116 | }
117 |
118 | class DPIC_PERF_BRU extends BlackBox with HasBlackBoxResource{
119 | val io = IO(new Bundle{
120 | val clk = Input(Clock())
121 | val rst = Input(Bool())
122 | val valid = Input(Bool())
123 | val fail = Input(UInt(8.W))
124 | })
125 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
126 | addResource("/sv/DPIC_PERF_BRU.sv")
127 | }
128 |
129 | class DPIC_PERF_PIPE extends BlackBox with HasBlackBoxResource{
130 | val io = IO(new Bundle{
131 | val clk = Input(Clock())
132 | val rst = Input(Bool())
133 | val id = Input(UInt(8.W))
134 | // 对下一流水级的有效指令计数
135 | val invalid = Input(Bool())
136 | // 对上一流水级的阻塞计数
137 | val stall = Input(Bool())
138 | })
139 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
140 | addResource("/sv/DPIC_PERF_PIPE.sv")
141 | }
142 |
143 | class DPIC_PERF_BUFF extends BlackBox with HasBlackBoxResource{
144 | val io = IO(new Bundle{
145 | val clk = Input(Clock())
146 | val rst = Input(Bool())
147 | val id = Input(UInt(8.W))
148 | val head = Input(UInt(8.W))
149 | val tail = Input(UInt(8.W))
150 | val full = Input(UInt(8.W))
151 | val reload = Input(UInt(8.W))
152 | })
153 | addResource("/sv/DPIC_TYPES_DEFINE.sv")
154 | addResource("/sv/DPIC_PERF_BUFF.sv")
155 | }
156 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/GPR.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | class GPRIO(dataBits: Int, addrBits: Int, rchannel: Int, wchannel: Int) extends Bundle {
7 | val raddr = Input(Vec(rchannel, UInt(addrBits.W)))
8 | val rdata = Output(Vec(rchannel, UInt(dataBits.W)))
9 | val wen = Input(Vec(wchannel, Bool()))
10 | val waddr = Input(Vec(wchannel, UInt(addrBits.W)))
11 | val wdata = Input(Vec(wchannel, UInt(dataBits.W)))
12 | }
13 |
14 | class GPR(dataBits: Int, regNum: Int, rchannel: Int, wchannel: Int) extends Module {
15 | val io = IO(new GPRIO(dataBits, log2Up(regNum), rchannel, wchannel))
16 | val regs = Reg(Vec(regNum, UInt(dataBits.W)))
17 | // val regs = Reg(VecInit.fill(regNum)(0.U(dataBits.W)))
18 | // val regs = Mem(regNum, UInt(dataBits.W))
19 | for(i <- 0 until rchannel){
20 | io.rdata(i) := Mux(io.raddr(i) =/= 0.U, regs(io.raddr(i)), 0.U)
21 | }
22 | for(i <- 0 until wchannel){
23 | when(io.wen(i) && io.waddr(i) =/= 0.U){
24 | regs(io.waddr(i)) := io.wdata(i)
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/InstrsBuff.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.loongarch.nscscc2024.CtrlFlags.ldType.bu
6 | import nagicore.GlobalConfg
7 | import cache.CachePipedIO
8 |
9 | class InstrsBuffCacheBundle extends Bundle{
10 | val new_trans = Bool()
11 | }
12 |
13 | class InstrsBuffIO(addrBits:Int, dataBits: Int, cacheBlockWords: Int) extends Bundle{
14 | val in = Input(new Bundle {
15 | // 是否需要清空缓存并指定新的预取开始地址
16 | val new_trans = Bool()
17 | // 新的预取开始地址
18 | val trans_addr = UInt(addrBits.W)
19 | // 从缓存中读一个数据
20 | val fetch = Bool()
21 | })
22 | val cache = Flipped(new CachePipedIO(addrBits, dataBits, cacheBlockWords, ()=>new InstrsBuffCacheBundle))
23 | val out = Output(new Bundle {
24 | val busy = Output(Bool())
25 | val instr = Output(UInt(dataBits.W))
26 | })
27 | }
28 |
29 | /**
30 | * 指令预取,下一个周期读出预取指令
31 | *
32 | * @param addrBits
33 | * @param dataBits
34 | * @param cacheBlockWords 每个Cache Block有多少个dataBits
35 | * @param blockLen 缓存多少个Cache Block,必须是2的幂次
36 | */
37 | class InstrsBuff(addrBits:Int, dataBits: Int, cacheBlockWords: Int, blockLen: Int) extends Module{
38 | require((blockLen&(blockLen-1))==0)
39 | val io = IO(new InstrsBuffIO(addrBits, dataBits, cacheBlockWords))
40 | val buff = RegInit(VecInit(Seq.fill(blockLen*cacheBlockWords)(0.U(dataBits.W))))
41 | val buff_head = RegInit(0.U(log2Up(blockLen*cacheBlockWords).W))
42 | val buff_tail = RegInit(0.U(log2Up(blockLen*cacheBlockWords).W))
43 | if(GlobalConfg.SIM){
44 | dontTouch(buff_head)
45 | dontTouch(buff_tail)
46 | }
47 | val buff_valid = RegInit(VecInit.fill(blockLen*cacheBlockWords)(false.B))
48 | val empty = !buff_valid(buff_head)
49 | val full = buff_valid(buff_tail + (cacheBlockWords-1).U)
50 |
51 | val cache_addr = RegInit(0.U(addrBits.W))
52 | io.cache.front.bits.addr := cache_addr
53 |
54 | object State extends ChiselEnum {
55 | // 0 1 2 3
56 | val idle, wait_cache, wait_new_trans, continue_read = Value
57 | }
58 |
59 | val state = RegInit(State.idle)
60 |
61 |
62 | io.out.busy := io.in.new_trans || (state === State.wait_new_trans) || (state === State.wait_cache) || (state === State.continue_read && empty)
63 | io.out.instr := buff(buff_head)
64 |
65 |
66 | io.cache.front.bits.pipedata.new_trans := false.B
67 | io.cache.front.bits.valid := true.B
68 |
69 | when(!io.cache.front.stall && state =/= State.wait_cache){
70 | cache_addr := cache_addr + (cacheBlockWords*dataBits/8).U
71 | }
72 |
73 | val new_trans_offset = RegInit(0.U(log2Up(cacheBlockWords).W))
74 | val word_len = log2Ceil(cacheBlockWords)
75 | val byte_len = log2Ceil(dataBits/8)
76 |
77 | def cache_new_trans()={
78 | val addr = io.in.trans_addr(addrBits-1, word_len+byte_len) ## 0.U((word_len+byte_len).W)
79 | cache_addr := addr
80 | io.cache.front.bits.addr := addr
81 | io.cache.front.bits.pipedata.new_trans := true.B
82 | io.cache.front.bits.valid := true.B
83 | state := State.wait_new_trans
84 | new_trans_offset := io.in.trans_addr(word_len+byte_len-1, byte_len)
85 | }
86 |
87 | switch(state){
88 | is(State.idle){
89 | io.cache.front.bits.valid := false.B
90 | when(io.in.new_trans){
91 | cache_new_trans()
92 | }
93 | }
94 | is(State.wait_cache){
95 | when(!io.cache.front.stall){
96 | cache_new_trans()
97 | }
98 | }
99 | is(State.wait_new_trans){
100 | when(io.cache.back.bits.pipedata_s2.new_trans && io.cache.back.bits.valid){
101 | state := State.continue_read
102 | }
103 | }
104 | is(State.continue_read){
105 | when(io.in.new_trans){
106 | buff_head := 0.U
107 | buff_tail := 0.U
108 | buff_valid := VecInit.fill(blockLen*cacheBlockWords)(false.B)
109 | when(!io.cache.front.stall){
110 | cache_new_trans()
111 | }otherwise{
112 | // 如果Cache阻塞中,需要先等Cache空闲
113 | state := State.wait_cache
114 | }
115 | }.otherwise{
116 | when(io.cache.back.bits.valid && !full){
117 | for(i <- 0 until cacheBlockWords){
118 | buff(buff_tail+i.U) := io.cache.back.bits.rline(i)
119 | buff_valid(buff_tail+i.U) := (i.U >= new_trans_offset)
120 | }
121 | buff_tail := buff_tail + cacheBlockWords.U
122 | buff_head := buff_head + new_trans_offset
123 | new_trans_offset := 0.U
124 | }
125 | when(io.in.fetch && !io.out.busy){
126 | buff_head := buff_head + 1.U
127 | buff_valid(buff_head) := false.B
128 | }
129 | }
130 | }
131 | }
132 |
133 | io.cache.front.bits.size := log2Up(dataBits/8).U
134 | io.cache.front.bits.uncache := false.B
135 | io.cache.front.bits.wdata := DontCare
136 | io.cache.front.bits.wmask := 0.U
137 | io.cache.back.stall := full
138 |
139 | if(GlobalConfg.SIM){
140 | val dpic_perf_instrs_buff = Module(new DPIC_PERF_BUFF)
141 | dpic_perf_instrs_buff.io.clk := clock
142 | dpic_perf_instrs_buff.io.rst := reset
143 | dpic_perf_instrs_buff.io.id := 0.U
144 | dpic_perf_instrs_buff.io.head := buff_head
145 | dpic_perf_instrs_buff.io.tail := buff_tail
146 | dpic_perf_instrs_buff.io.full := full
147 | dpic_perf_instrs_buff.io.reload := io.in.new_trans
148 | }
149 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/MIAU.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.bus._
6 |
7 | /**
8 | * Max In Array Unit
9 | * 求数组中的最大值,为龙芯杯个人赛决赛设计
10 | *
11 | * @param addrBits
12 | * @param dataBits
13 | */
14 | class MIAU(addrBits:Int, dataBits: Int, idBits: Int) extends Module{
15 | val io = IO(new Bundle{
16 | val cmd = Flipped(new AXI4IO(addrBits, dataBits, idBits))
17 | val mem = new AXI4IO(addrBits, dataBits, idBits)
18 | })
19 |
20 | val MXR = RegInit(0.U(dataBits.W))
21 | val CMPR = RegInit(0.U(dataBits.W))
22 | val FIR = RegInit(0.U(dataBits.W))
23 |
24 | val raddr = Reg(UInt(addrBits.W))
25 | val rid = Reg(UInt(idBits.W))
26 | val rlen = Reg(UInt(8.W))
27 |
28 | val rs_idle :: rs_r :: Nil = Enum(2)
29 | val rs = RegInit(rs_idle)
30 |
31 | when(io.cmd.ar.fire){
32 | raddr := io.cmd.ar.bits.addr
33 | rid := io.cmd.ar.bits.id
34 | rlen := io.cmd.ar.bits.len
35 | rs := rs_r
36 | }
37 |
38 | when(io.cmd.r.fire){
39 | raddr := raddr + (dataBits/8).U
40 | when(rlen === 0.U){
41 | rs := rs_idle
42 | }otherwise{
43 | rlen := rlen - 1.U
44 | }
45 | }
46 | io.cmd.ar.ready := rs === rs_idle
47 | io.cmd.r.valid := rs === rs_r
48 | io.cmd.r.bits.id := rid
49 | io.cmd.r.bits.last := rlen === 0.U
50 | io.cmd.r.bits.resp := 0.U
51 | io.cmd.r.bits.data := FIR
52 |
53 | io.cmd.aw <> DontCare
54 | io.cmd.w <> DontCare
55 | io.cmd.b <> DontCare
56 |
57 | val axi_w_agent = Module(new AXI4WriteAgent(addrBits, dataBits, 1))
58 | axi_w_agent.io.axi.aw <> io.mem.aw
59 | axi_w_agent.io.axi.w <> io.mem.w
60 | axi_w_agent.io.axi.b <> io.mem.b
61 | axi_w_agent.io.cmd.in <> DontCare
62 | axi_w_agent.io.cmd.in.req := false.B
63 |
64 | object State extends ChiselEnum {
65 | val idle = Value(1.U)
66 | val read = Value(2.U)
67 | val cmp = Value(4.U)
68 | val write = Value(8.U)
69 | val end = Value(16.U)
70 | }
71 |
72 | val state = RegInit(State.idle)
73 |
74 | val mem_addr = RegInit("h80400000".U(dataBits.W))
75 |
76 | val axi_r_agent = Module(new AXI4ReadAgent(addrBits, dataBits, 1))
77 | axi_r_agent.io.axi.ar <> io.mem.ar
78 | axi_r_agent.io.axi.r <> io.mem.r
79 | axi_r_agent.io.cmd.in.addr := mem_addr
80 | axi_r_agent.io.cmd.in.len := 0.U
81 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U
82 | axi_r_agent.io.cmd.in.req := false.B
83 |
84 | when(state === State.idle){
85 | state := State.read
86 | axi_r_agent.io.cmd.in.req := true.B
87 | }
88 |
89 | when(state === State.read && axi_r_agent.io.cmd.out.ready){
90 | // printf(cf"state read at ${mem_addr}\n")
91 | state := State.cmp
92 | CMPR := axi_r_agent.io.cmd.out.rdata
93 | mem_addr := mem_addr + (dataBits/8).U
94 | }
95 | when(state === State.cmp){
96 | when(CMPR > MXR){
97 | MXR := CMPR
98 | }
99 | when(mem_addr === "h80700000".U){
100 | state := State.write
101 | }.otherwise{
102 | state := State.read
103 | axi_r_agent.io.cmd.in.req := true.B
104 | }
105 | }
106 | when(state === State.write){
107 | printf(cf"mia finish\n")
108 | axi_w_agent.io.cmd.in.req := true.B
109 | axi_w_agent.io.cmd.in.addr := "h80700000".U
110 | axi_w_agent.io.cmd.in.len := 0.U
111 | axi_w_agent.io.cmd.in.size := log2Up(dataBits).U
112 | axi_w_agent.io.cmd.in.wdata(0) := MXR
113 | axi_w_agent.io.cmd.in.wmask(0) := Fill(dataBits/4, "b1".U)
114 | state := State.end
115 | }
116 | when(state === State.end && axi_w_agent.io.cmd.out.ready){
117 | FIR := 233.U
118 | }
119 |
120 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/MULU.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.utils._
6 | import nagicore.GlobalConfg
7 |
8 | object MULU_IMP extends Enumeration {
9 | type MULU_IMP = Value
10 | val none, synthesizer_1cyc, oneBitShift, xsArrayMul, MultiplierIP, synthesizer_DSP = Value
11 | }
12 |
13 | object MULU_OP{
14 | val MUL = ALU_OP.MUL.takeRight(2)
15 | val MULH = ALU_OP.MULH.takeRight(2)
16 | val MULHU = ALU_OP.MULHU.takeRight(2)
17 | }
18 |
19 | /**
20 | * 乘法器
21 | *
22 | * @param dataBits 位宽
23 | * @param imp_way 实现方法,有3种实现方式,分别为:
24 | * synthesizer: 直接使用*,依靠综合器生成单周期乘法器
25 | * oneBitShift: 一位移位乘法实现
26 | * xsArrayMul: 使用香山的三周期ArrayMulDataModule实现
27 | * @note 注意valid信号只拉高一周期即可,busy在下一个周期开始拉高,直到乘法运算结束时拉低
28 | */
29 | class MULU(dataBits: Int, imp_way: MULU_IMP.MULU_IMP = MULU_IMP.synthesizer_1cyc) extends Module{
30 | val io = IO(new Bundle{
31 | val a = Input(UInt(dataBits.W))
32 | val b = Input(UInt(dataBits.W))
33 | val op = Input(UInt(2.W))
34 | val out = Output(UInt(dataBits.W))
35 | val vaild = Input(Bool())
36 | val busy = Output(Bool())
37 | })
38 | if(GlobalConfg.SIM){
39 | imp_way match {
40 | case MULU_IMP.xsArrayMul | MULU_IMP.MultiplierIP | MULU_IMP.synthesizer_DSP => {
41 | io.busy := io.vaild || RegNext(io.vaild)
42 | }
43 | case _ => {
44 | io.busy := false.B
45 | }
46 | }
47 | io.out := Flags.CasesMux(io.op, Seq(
48 | MULU_OP.MUL -> (io.a.asSInt * io.b.asSInt)(31, 0).asUInt,
49 | MULU_OP.MULH -> (io.a.asSInt * io.b.asSInt)(63, 32).asUInt,
50 | MULU_OP.MULHU -> (io.a * io.b)(63, 32),
51 | ), 0.U)
52 | }else{
53 | imp_way match {
54 | case MULU_IMP.xsArrayMul => {
55 | import nagicore.unit.ip.Xiangshan.ArrayMulDataModule
56 | val arrayMul = Module(new ArrayMulDataModule(dataBits+1))
57 | arrayMul.io.a := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.a(dataBits-1)) ## io.a
58 | arrayMul.io.b := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.b(dataBits-1)) ## io.b
59 | val valid_reg1 = RegNext(io.vaild)
60 | arrayMul.io.regEnables(0) := io.vaild
61 | arrayMul.io.regEnables(1) := valid_reg1
62 | // val res = arrayMul.io.result
63 | val res = RegNext(arrayMul.io.result)
64 | io.out := Flags.CasesMux(io.op, Seq(
65 | MULU_OP.MUL -> res(31, 0),
66 | MULU_OP.MULH -> SignExt(res(63, 32), dataBits),
67 | MULU_OP.MULHU -> res(63, 32),
68 | ), 0.U)
69 | io.busy := io.vaild || valid_reg1
70 | }
71 | case MULU_IMP.synthesizer_DSP => {
72 | def DSPInPipe[T <: Data](a: T) = RegNext(a)
73 | def DSPOutPipe[T <: Data](a: T) = RegNext(a)
74 | val a = Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.a(dataBits-1)) ## io.a
75 | val b = Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.b(dataBits-1)) ## io.b
76 | val res = DSPOutPipe(DSPInPipe(a) * DSPInPipe(b))
77 | io.out := Flags.CasesMux(io.op, Seq(
78 | MULU_OP.MUL -> res(31, 0),
79 | MULU_OP.MULH -> SignExt(res(63, 32), dataBits),
80 | MULU_OP.MULHU -> res(63, 32),
81 | ), 0.U)
82 | val busy = RegInit(false.B)
83 | when(io.vaild && !busy){ busy := true.B }
84 | val ready = DSPOutPipe(DSPInPipe(io.vaild))
85 | when(ready){ busy := false.B }
86 | io.busy := busy
87 | }
88 | case MULU_IMP.MultiplierIP => {
89 | Predef.println(s"Xilinx Multiplier IP mult_${dataBits+1}_unsigned_2stages needed")
90 | class MultiplierIP extends BlackBox{
91 | override val desiredName = s"mult_${dataBits+1}_unsigned_2stages"
92 | val io = IO(new Bundle {
93 | val CLK = Input(Clock())
94 | val A = Input(UInt((dataBits+1).W))
95 | val B = Input(UInt((dataBits+1).W))
96 | val P = Output(UInt(((dataBits+1)*2).W))
97 | })
98 | }
99 | val ip = Module(new MultiplierIP)
100 | ip.io.CLK := clock
101 | ip.io.A := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.a(dataBits-1)) ## io.a
102 | ip.io.B := Flags.ifEqu(io.op, MULU_OP.MULHU, 0.U(1.W), io.b(dataBits-1)) ## io.b
103 | val res = ip.io.P
104 | io.out := Flags.CasesMux(io.op, Seq(
105 | MULU_OP.MUL -> res(31, 0),
106 | MULU_OP.MULH -> SignExt(res(63, 32), dataBits),
107 | MULU_OP.MULHU -> res(63, 32),
108 | ), 0.U)
109 | io.busy := io.vaild || RegNext(io.vaild) || RegNext(RegNext(io.vaild))
110 | }
111 | case MULU_IMP.none => {
112 | io.busy := false.B
113 | io.out := DontCare
114 | }
115 | case _ => {
116 | io.busy := false.B
117 | io.out := Flags.CasesMux(io.op, Seq(
118 | MULU_OP.MUL -> (io.a.asSInt * io.b.asSInt)(31, 0).asUInt,
119 | MULU_OP.MULH -> (io.a.asSInt * io.b.asSInt)(63, 32).asUInt,
120 | MULU_OP.MULHU -> (io.a * io.b)(63, 32),
121 | ), 0.U)
122 | }
123 | }
124 | }
125 |
126 | // if(imp_way == MULU_IMP.synthesizer){
127 |
128 | // }else{
129 | // // TODO
130 | // /*
131 | // 原理:
132 | // n位数和n位数的乘法,Booth乘法将其转换为n/2个2*n位数(即部分积)相加,
133 | // 而华莱士树再将其转换为2*n个n/2 bits华莱士树,最终转换成两个2*n位数的加法,
134 | // 其中,每个n/2 bits华莱士树,有n/2个一位数相加,
135 | // */
136 | // // x * y
137 | // def booth2(x: UInt, y: UInt, n: Int, yi: Int) = {
138 | // assert(yi>=1&&yi<=y.getWidth-1)
139 | // val t = WireDefault(x)
140 | // switch(y(yi+1,yi-1)){
141 | // is(0.U){ t := 0.U }
142 | // is(3.U){ t := x(n-2, 0) ## 0.U(1.W) }
143 | // is(4.U){ t := x(n-2, 0) ## 0.U(1.W) }
144 | // is(7.U){ t := 0.U }
145 | // }
146 | // Mux(y(yi+1), ~t + 1.U, t)
147 | // }
148 | // // Carry-Save Adder
149 | // def CSA(a: UInt, b: UInt, cin: UInt) = {
150 | // assert(a.getWidth==b.getWidth&&b.getWidth==cin.getWidth)
151 | // val res = Vec(2, UInt(a.getWidth.W))
152 | // val a_xor_b = a ^ b
153 | // val a_and_b = a & b
154 | // val sum = a_xor_b ^ cin
155 | // val cout = a_and_b | (a_xor_b & cin)
156 | // res(0) := sum
157 | // res(1) := cout
158 | // res
159 | // }
160 | // }
161 |
162 | }
163 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/RingBuff.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import nagicore.GlobalConfg
6 |
7 | class RingBuffIO[T <: Bundle](dataT: ()=> T, rchannel: Int) extends Bundle{
8 | val full = Output(Bool())
9 | val empty = Output(Bool())
10 |
11 | val push = Input(Bool())
12 | val wdata = Input(dataT())
13 | val pop = Input(Bool())
14 | val popN = Input(UInt(log2Up(rchannel).W))
15 | val rdatas = Output(Vec(rchannel, dataT()))
16 | val rvalids = Output(Vec(rchannel, Bool()))
17 | val clear = Input(Bool())
18 | }
19 |
20 | /**
21 | * 环形队列,多端口读,单端口写,读出多个时,需要拉高pop,并且传入popN指定读并弹出的数据个数
22 | * 注意,读出数据个数(popN+1)需要根据rvalids判断,不能超过有效的数据个数,模块不做检查
23 | *
24 | * @param dataT
25 | * @param len
26 | * @param rchannel
27 | * @param wchannel
28 | * @param id
29 | */
30 | class RingBuff[T <: Bundle](dataT: ()=> T, len: Int, rchannel: Int, debug_id: Int) extends Module{
31 | require((len&(len-1))==0)
32 | val io = IO(new RingBuffIO(dataT, rchannel))
33 | val buff = Reg(Vec(len, dataT()))
34 | val buff_head = RegInit(0.U(log2Up(len).W))
35 | val buff_tail = RegInit(0.U(log2Up(len).W))
36 | val buff_valid = RegInit(VecInit.fill(len)(false.B))
37 | val empty = !buff_valid(buff_head)
38 | val full = buff_valid(buff_tail)
39 |
40 | io.empty := empty
41 | io.full := full
42 | for(i <- 0 until rchannel){
43 | io.rdatas(i) := buff(buff_head+i.U)
44 | io.rvalids(i) := buff_valid(buff_head+i.U)
45 | }
46 |
47 | when(io.clear){
48 | buff_head := 0.U
49 | buff_tail := 0.U
50 | for(i <- 0 until len)
51 | buff_valid(i) := false.B
52 | }.otherwise{
53 | when(io.push && !full){
54 | buff_tail := buff_tail + 1.U
55 | buff(buff_tail) := io.wdata
56 | buff_valid(buff_tail) := true.B
57 | }
58 |
59 | when(io.pop){
60 | for(i <- 0 until rchannel){
61 | when(io.popN === i.U){
62 | buff_head := buff_head + (i+1).U
63 | for(j <- 0 to i){
64 | buff_valid(buff_head + j.U) := false.B
65 | }
66 | }
67 | }
68 | }
69 | }
70 |
71 |
72 |
73 | if(GlobalConfg.SIM){
74 | val dpic_perf_instrs_buff = Module(new DPIC_PERF_BUFF)
75 | dpic_perf_instrs_buff.io.clk := clock
76 | dpic_perf_instrs_buff.io.rst := reset
77 | dpic_perf_instrs_buff.io.id := debug_id.U
78 | dpic_perf_instrs_buff.io.head := buff_head
79 | dpic_perf_instrs_buff.io.tail := buff_tail
80 | dpic_perf_instrs_buff.io.full := full
81 | dpic_perf_instrs_buff.io.reload := io.clear
82 | }
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/cache/CacheMini.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit.cache
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | import nagicore.bus._
7 | import chisel3.util.random.LFSR
8 | import nagicore.utils.isPowerOf2
9 | import nagicore.GlobalConfg
10 | import nagicore.unit.RingBuff
11 | /**
12 | * 有个写缓存队列(write buffer),每次写的时候,直接一拍写到缓存队列里面,不阻塞前面的流水线,后台调AXI4自己慢慢写去
13 | *
14 | * @note busy拉高表示请求阻塞前级流水
15 | *
16 | * @param addrBits
17 | * @param dataBits
18 | * @param writeBuffLen 写缓存队列大小
19 | */
20 | class CacheMini(addrBits:Int, dataBits: Int, writeBuffLen: Int, L0Size: Int, debug_id: Int) extends Module{
21 | require(isPowerOf2(writeBuffLen))
22 | val io = IO(new Bundle{
23 | val axi = new AXI4IO(addrBits, dataBits)
24 | val in = Input(new Bundle {
25 | val req = Bool()
26 | val bits = new Bundle {
27 | val addr = UInt(addrBits.W)
28 | val size = UInt(2.W)
29 | val we = Bool()
30 | val wmask = UInt((dataBits/8).W)
31 | val wdata = UInt(dataBits.W)
32 | val uncache = Bool()
33 | }
34 | })
35 | val out = Output(new Bundle {
36 | val busy = Bool()
37 | val rdata = UInt(dataBits.W)
38 | })
39 | })
40 | class WriteInfo extends Bundle{
41 | val addr = UInt(addrBits.W)
42 | val size = UInt(2.W)
43 | val wmask = UInt((dataBits/8).W)
44 | val wdata = UInt(dataBits.W)
45 | }
46 | val write_buff = Module(new RingBuff(()=>new WriteInfo, writeBuffLen, 1, debug_id))
47 | write_buff.io.push := false.B
48 | write_buff.io.pop := false.B
49 | write_buff.io.wdata := DontCare
50 | write_buff.io.clear := false.B
51 |
52 | val axi_w_agent = Module(new AXI4WriteAgent(addrBits, dataBits, 1))
53 | axi_w_agent.io.axi.aw <> io.axi.aw
54 | axi_w_agent.io.axi.w <> io.axi.w
55 | axi_w_agent.io.axi.b <> io.axi.b
56 | axi_w_agent.io.cmd.in <> DontCare
57 | axi_w_agent.io.cmd.in.req := false.B
58 |
59 | val axi_r_agent = Module(new AXI4ReadAgent(addrBits, dataBits, 1))
60 | axi_r_agent.io.axi.ar <> io.axi.ar
61 | axi_r_agent.io.axi.r <> io.axi.r
62 | axi_r_agent.io.cmd.in <> DontCare
63 | axi_r_agent.io.cmd.in.req := false.B
64 |
65 | val cmd_reg = Reg(io.in.bits.cloneType)
66 |
67 | val rdata_reg = Reg(UInt(dataBits.W))
68 | io.out.rdata := rdata_reg
69 |
70 |
71 | object State extends ChiselEnum {
72 | val idle = Value(1.U)
73 | val waitWriteBuff = Value(2.U)
74 | val waitReadReady = Value(4.U)
75 | val waitRead = Value(8.U)
76 | }
77 | val state = RegInit(State.idle)
78 |
79 | io.out.busy := state =/= State.idle // ...
80 |
81 | class L0Data extends Bundle{
82 | val addr = UInt(addrBits.W)
83 | val data = UInt(dataBits.W)
84 | val valid = Bool()
85 | }
86 | val L0 = RegInit(VecInit(Seq.fill(L0Size){
87 | val bundle = Wire(new L0Data)
88 | bundle := DontCare
89 | bundle.valid := false.B
90 | bundle
91 | }))
92 | // val L0 = Vec(L0Size, RegInit({
93 | // val bundle = Wire(new L0Data)
94 | // bundle := DontCare
95 | // bundle.valid := false.B
96 | // bundle
97 | // }))
98 | val hits = VecInit.tabulate(L0Size)(i =>{
99 | L0(i).addr === io.in.bits.addr && L0(i).valid
100 | })
101 | val hit = hits.reduceTree(_||_) && !io.in.bits.uncache
102 | val hit_data = L0(PriorityEncoder(hits)).data
103 | // 将uncache的数据写入到L0中
104 | def updateL0(addr: UInt, data: UInt) = {
105 | for(i <- 0 until (L0Size-1)){
106 | L0(i+1) := L0(i)
107 | }
108 | L0(0).addr := addr
109 | L0(0).data := data
110 | L0(0).valid := true.B
111 | }
112 |
113 | val ready_read = axi_r_agent.io.cmd.out.ready && write_buff.io.empty && axi_w_agent.io.cmd.out.ready
114 |
115 | if(GlobalConfg.SIM){
116 | import nagicore.unit.DPIC_PERF_CACHE
117 | val dpic_perf_cache = Module(new DPIC_PERF_CACHE)
118 | dpic_perf_cache.io.clk := clock
119 | dpic_perf_cache.io.rst := reset
120 | dpic_perf_cache.io.valid := io.in.req
121 | dpic_perf_cache.io.id := debug_id.U
122 | dpic_perf_cache.io.access_type := Cat(0.U, !io.out.busy)
123 | }
124 |
125 | switch(state){
126 | is(State.idle){
127 | when(io.in.req){
128 | when(io.in.bits.we){
129 | // Write
130 | when(write_buff.io.full){
131 | state := State.waitWriteBuff
132 | io.out.busy := true.B
133 |
134 | cmd_reg := io.in.bits
135 | }.otherwise{
136 | write_buff.io.push := true.B
137 | write_buff.io.wdata := io.in.bits
138 | }
139 |
140 | when(!io.in.bits.uncache){
141 | updateL0(io.in.bits.addr, io.in.bits.wdata)
142 | }
143 | }.otherwise{
144 | // Read
145 | when(hit){
146 | io.out.rdata := hit_data
147 | io.out.busy := false.B
148 | }.elsewhen(ready_read){
149 | axi_r_agent.io.cmd.in.req := true.B
150 | axi_r_agent.io.cmd.in.addr := io.in.bits.addr
151 | axi_r_agent.io.cmd.in.len := 0.U
152 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U
153 |
154 | state := State.waitRead
155 | io.out.busy := true.B
156 | }.otherwise{
157 | cmd_reg := io.in.bits
158 |
159 | state := State.waitReadReady
160 | io.out.busy := true.B
161 | }
162 |
163 | }
164 | }
165 | }
166 | is(State.waitWriteBuff){
167 | when(!write_buff.io.full){
168 | write_buff.io.push := true.B
169 | write_buff.io.wdata := cmd_reg
170 |
171 | io.out.busy := false.B
172 | state := State.idle
173 | }
174 | }
175 | is(State.waitReadReady){
176 | when(ready_read){
177 | axi_r_agent.io.cmd.in.req := true.B
178 | axi_r_agent.io.cmd.in.addr := cmd_reg.addr
179 | axi_r_agent.io.cmd.in.len := 0.U
180 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U
181 |
182 | state := State.waitRead
183 | }
184 | }
185 | is(State.waitRead){
186 | when(axi_r_agent.io.cmd.out.ready){
187 | rdata_reg := axi_r_agent.io.cmd.out.rdata
188 | assert(axi_r_agent.io.cmd.out.resp === 0.U)
189 | when(!io.in.bits.uncache){
190 | updateL0(io.in.bits.addr, axi_r_agent.io.cmd.out.rdata)
191 | }
192 | state := State.idle
193 | }
194 | }
195 | }
196 |
197 | when(!write_buff.io.empty){
198 | when(axi_w_agent.io.cmd.out.ready){
199 | axi_w_agent.io.cmd.in.req := true.B
200 | axi_w_agent.io.cmd.in.addr := write_buff.io.rdatas(0).addr
201 | axi_w_agent.io.cmd.in.len := 0.U
202 | axi_w_agent.io.cmd.in.size := write_buff.io.rdatas(0).size
203 | axi_w_agent.io.cmd.in.wdata(0) := write_buff.io.rdatas(0).wdata
204 | axi_w_agent.io.cmd.in.wmask(0) := write_buff.io.rdatas(0).wmask
205 |
206 | write_buff.io.pop := true.B
207 | }
208 | }
209 |
210 | }
211 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/cache/CacheType.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit.cache
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | import nagicore.bus.{Ram, RamIO}
7 |
8 | object CacheMemType extends Enumeration {
9 | type CacheMemType = Value
10 | val RAM_2cyc, BRAM_1cyc, RAM_1cyc = Value
11 | }
12 |
13 | object CacheReplaceType extends Enumeration {
14 | type CacheReplaceType = Value
15 | val Random, LRU = Value
16 | }
17 |
18 | /**
19 | * CacheRAM 第二个周期返回读内容的同步RAM
20 | *
21 | * @param width
22 | * @param depth
23 | * @param imp
24 | */
25 | class CacheMem(width: Int, depth: Int, imp: CacheMemType.CacheMemType=CacheMemType.RAM_2cyc) extends Module{
26 | val io = IO(new RamIO(width, depth))
27 | imp match {
28 | case _ => {
29 | val sram = Module(new Ram(width, depth))
30 | sram.io <> io
31 | }
32 | }
33 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/cache/UnCache.scala:
--------------------------------------------------------------------------------
1 | package nagicore.unit.cache
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | import nagicore.bus._
7 | import chisel3.util.random.LFSR
8 | import nagicore.utils.isPowerOf2
9 | import nagicore.GlobalConfg
10 | import nagicore.unit.RingBuff
11 | /**
12 | * 为uncache设计的cache(x),读写均直达
13 | * 有个写缓存队列(write buffer),每次写的时候,直接一拍写到缓存队列里面,不阻塞前面的流水线,后台调AXI4自己慢慢写去
14 | *
15 | * @note busy拉高表示请求阻塞前级流水
16 | *
17 | * @param addrBits
18 | * @param dataBits
19 | * @param writeBuffLen 写缓存队列大小
20 | */
21 | class UnCache(addrBits:Int, dataBits: Int, writeBuffLen: Int, debug_id: Int=0) extends Module{
22 | require(isPowerOf2(writeBuffLen))
23 | val io = IO(new Bundle{
24 | val axi = new AXI4IO(addrBits, dataBits)
25 | val in = Input(new Bundle {
26 | val req = Bool()
27 | val bits = new Bundle {
28 | val addr = UInt(addrBits.W)
29 | val we = Bool()
30 | val wmask = UInt((dataBits/8).W)
31 | val size = UInt(2.W)
32 | val wdata = UInt(dataBits.W)
33 | }
34 | })
35 | val out = Output(new Bundle {
36 | val busy = Bool()
37 | val rdata = UInt(dataBits.W)
38 | })
39 | })
40 | class WriteInfo extends Bundle{
41 | val addr = UInt(addrBits.W)
42 | val size = UInt(2.W)
43 | val wmask = UInt((dataBits/8).W)
44 | val wdata = UInt(dataBits.W)
45 | }
46 | val write_buff = Module(new RingBuff(()=>new WriteInfo, writeBuffLen, rchannel=1, debug_id=debug_id))
47 | write_buff.io.push := false.B
48 | write_buff.io.pop := false.B
49 | write_buff.io.wdata := DontCare
50 | write_buff.io.clear := false.B
51 | write_buff.io.popN := 0.U
52 |
53 | val axi_w_agent = Module(new AXI4WriteAgent(addrBits, dataBits, 1))
54 | axi_w_agent.io.axi.aw <> io.axi.aw
55 | axi_w_agent.io.axi.w <> io.axi.w
56 | axi_w_agent.io.axi.b <> io.axi.b
57 | axi_w_agent.io.cmd.in <> DontCare
58 | axi_w_agent.io.cmd.in.req := false.B
59 |
60 | val axi_r_agent = Module(new AXI4ReadAgent(addrBits, dataBits, 1))
61 | axi_r_agent.io.axi.ar <> io.axi.ar
62 | axi_r_agent.io.axi.r <> io.axi.r
63 | axi_r_agent.io.cmd.in <> DontCare
64 | axi_r_agent.io.cmd.in.req := false.B
65 |
66 | val cmd_reg = Reg(io.in.bits.cloneType)
67 |
68 | val rdata_reg = Reg(UInt(dataBits.W))
69 | io.out.rdata := rdata_reg
70 |
71 |
72 | object State extends ChiselEnum {
73 | val idle = Value(1.U)
74 | val waitWriteBuff = Value(2.U)
75 | val waitReadReady = Value(4.U)
76 | val waitRead = Value(8.U)
77 | }
78 | val state = RegInit(State.idle)
79 |
80 | io.out.busy := state =/= State.idle // ...
81 |
82 | val ready_read = axi_r_agent.io.cmd.out.ready&&write_buff.io.empty&&axi_w_agent.io.cmd.out.ready
83 |
84 | switch(state){
85 | is(State.idle){
86 | when(io.in.req){
87 | when(io.in.bits.we){
88 | // Write
89 | when(write_buff.io.empty && axi_w_agent.io.cmd.out.ready){
90 | axi_w_agent.io.cmd.in.req := true.B
91 | axi_w_agent.io.cmd.in.addr := io.in.bits.addr
92 | axi_w_agent.io.cmd.in.len := 0.U
93 | axi_w_agent.io.cmd.in.size := io.in.bits.size
94 | axi_w_agent.io.cmd.in.wdata(0) := io.in.bits.wdata
95 | axi_w_agent.io.cmd.in.wmask(0) := io.in.bits.wmask
96 | }.elsewhen(write_buff.io.full){
97 | state := State.waitWriteBuff
98 | io.out.busy := true.B
99 |
100 | cmd_reg := io.in.bits
101 | }.otherwise{
102 | write_buff.io.push := true.B
103 | write_buff.io.wdata := io.in.bits
104 | }
105 | }.otherwise{
106 | // Read
107 | when(ready_read){
108 | axi_r_agent.io.cmd.in.req := true.B
109 | axi_r_agent.io.cmd.in.addr := io.in.bits.addr
110 | axi_r_agent.io.cmd.in.len := 0.U
111 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U
112 |
113 | state := State.waitRead
114 | }.otherwise{
115 | cmd_reg := io.in.bits
116 |
117 | state := State.waitReadReady
118 | }
119 | io.out.busy := true.B
120 | }
121 | }
122 | }
123 | is(State.waitWriteBuff){
124 | when(!write_buff.io.full){
125 | write_buff.io.push := true.B
126 | write_buff.io.wdata := cmd_reg
127 |
128 | io.out.busy := false.B
129 | state := State.idle
130 | }
131 | }
132 | is(State.waitReadReady){
133 | when(ready_read){
134 | axi_r_agent.io.cmd.in.req := true.B
135 | axi_r_agent.io.cmd.in.addr := cmd_reg.addr
136 | axi_r_agent.io.cmd.in.len := 0.U
137 | axi_r_agent.io.cmd.in.size := log2Up(dataBits).U
138 |
139 | state := State.waitRead
140 | }
141 | }
142 | is(State.waitRead){
143 | when(axi_r_agent.io.cmd.out.ready){
144 | rdata_reg := axi_r_agent.io.cmd.out.rdata
145 | assert(axi_r_agent.io.cmd.out.resp === 0.U)
146 | state := State.idle
147 | }
148 | }
149 | }
150 |
151 | when(!write_buff.io.empty){
152 | when(axi_w_agent.io.cmd.out.ready){
153 | axi_w_agent.io.cmd.in.req := true.B
154 | axi_w_agent.io.cmd.in.addr := write_buff.io.rdatas(0).addr
155 | axi_w_agent.io.cmd.in.len := 0.U
156 | axi_w_agent.io.cmd.in.size := write_buff.io.rdatas(0).size
157 | axi_w_agent.io.cmd.in.wdata(0) := write_buff.io.rdatas(0).wdata
158 | axi_w_agent.io.cmd.in.wmask(0) := write_buff.io.rdatas(0).wmask
159 |
160 | write_buff.io.pop := true.B
161 | }
162 | }
163 |
164 | if(GlobalConfg.SIM){
165 | import nagicore.unit.DPIC_PERF_CACHE
166 | val dpic_perf_cache = Module(new DPIC_PERF_CACHE)
167 | dpic_perf_cache.io.clk := clock
168 | dpic_perf_cache.io.rst := reset
169 | dpic_perf_cache.io.valid := io.in.req
170 | dpic_perf_cache.io.id := debug_id.U
171 | dpic_perf_cache.io.access_type := Cat(0.U, !io.out.busy)
172 | }
173 |
174 | }
175 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/ip/Xiangshan/ArrayMulDataModule.scala:
--------------------------------------------------------------------------------
1 | /***************************************************************************************
2 | * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3 | * Copyright (c) 2020-2021 Peng Cheng Laboratory
4 | *
5 | * XiangShan is licensed under Mulan PSL v2.
6 | * You can use this software according to the terms and conditions of the Mulan PSL v2.
7 | * You may obtain a copy of Mulan PSL v2 at:
8 | * http://license.coscl.org.cn/MulanPSL2
9 | *
10 | * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 | * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 | * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 | *
14 | * See the Mulan PSL v2 for more details.
15 | ***************************************************************************************/
16 |
17 | package nagicore.unit.ip.Xiangshan
18 |
19 | import chisel3._
20 | import chisel3.util._
21 | import nagicore.utils.SignExt
22 |
23 | class ArrayMulDataModule(len: Int) extends Module {
24 | val io = IO(new Bundle() {
25 | val a, b = Input(UInt(len.W))
26 | val regEnables = Input(Vec(2, Bool()))
27 | val result = Output(UInt((2 * len).W))
28 | })
29 | val (a, b) = (io.a, io.b)
30 |
31 | val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W))
32 | b_sext := SignExt(b, len+1)
33 | bx2 := b_sext << 1
34 | neg_b := (~b_sext).asUInt
35 | neg_bx2 := neg_b << 1
36 |
37 | val columns: Array[Seq[Bool]] = Array.fill(2*len)(Seq())
38 |
39 | var last_x = WireInit(0.U(3.W))
40 | for(i <- Range(0, len, 2)){
41 | val x = if(i==0) Cat(a(1,0), 0.U(1.W)) else if(i+1==len) SignExt(a(i, i-1), 3) else a(i+1, i-1)
42 | val pp_temp = MuxLookup(x, 0.U)(Seq(
43 | 1.U -> b_sext,
44 | 2.U -> b_sext,
45 | 3.U -> bx2,
46 | 4.U -> neg_bx2,
47 | 5.U -> neg_b,
48 | 6.U -> neg_b
49 | ))
50 | val s = pp_temp(len)
51 | val t = MuxLookup(last_x, 0.U(2.W))(Seq(
52 | 4.U -> 2.U(2.W),
53 | 5.U -> 1.U(2.W),
54 | 6.U -> 1.U(2.W)
55 | ))
56 | last_x = x
57 | val (pp, weight) = i match {
58 | case 0 =>
59 | (Cat(~s, s, s, pp_temp), 0)
60 | case n if (n==len-1) || (n==len-2) =>
61 | (Cat(~s, pp_temp, t), i-2)
62 | case _ =>
63 | (Cat(1.U(1.W), ~s, pp_temp, t), i-2)
64 | }
65 | for(j <- columns.indices){
66 | if(j >= weight && j < (weight + pp.getWidth)){
67 | columns(j) = columns(j) :+ pp(j-weight)
68 | }
69 | }
70 | }
71 |
72 | def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = {
73 | var sum = Seq[Bool]()
74 | var cout1 = Seq[Bool]()
75 | var cout2 = Seq[Bool]()
76 | col.size match {
77 | case 1 => // do nothing
78 | sum = col ++ cin
79 | case 2 =>
80 | val c22 = Module(new C22)
81 | c22.io.in := col
82 | sum = c22.io.out(0).asBool +: cin
83 | cout2 = Seq(c22.io.out(1).asBool)
84 | case 3 =>
85 | val c32 = Module(new C32)
86 | c32.io.in := col
87 | sum = c32.io.out(0).asBool +: cin
88 | cout2 = Seq(c32.io.out(1).asBool)
89 | case 4 =>
90 | val c53 = Module(new C53)
91 | for((x, y) <- c53.io.in.take(4) zip col){
92 | x := y
93 | }
94 | c53.io.in.last := (if(cin.nonEmpty) cin.head else 0.U)
95 | sum = Seq(c53.io.out(0).asBool) ++ (if(cin.nonEmpty) cin.drop(1) else Nil)
96 | cout1 = Seq(c53.io.out(1).asBool)
97 | cout2 = Seq(c53.io.out(2).asBool)
98 | case n =>
99 | val cin_1 = if(cin.nonEmpty) Seq(cin.head) else Nil
100 | val cin_2 = if(cin.nonEmpty) cin.drop(1) else Nil
101 | val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1)
102 | val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2)
103 | sum = s_1 ++ s_2
104 | cout1 = c_1_1 ++ c_2_1
105 | cout2 = c_1_2 ++ c_2_2
106 | }
107 | (sum, cout1, cout2)
108 | }
109 |
110 | def max(in: Iterable[Int]): Int = in.reduce((a, b) => if(a>b) a else b)
111 | def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = {
112 | if(max(cols.map(_.size)) <= 2){
113 | val sum = Cat(cols.map(_(0)).reverse)
114 | var k = 0
115 | while(cols(k).size == 1) k = k+1
116 | val carry = Cat(cols.drop(k).map(_(1)).reverse)
117 | (sum, Cat(carry, 0.U(k.W)))
118 | } else {
119 | val columns_next = Array.fill(2*len)(Seq[Bool]())
120 | var cout1, cout2 = Seq[Bool]()
121 | for( i <- cols.indices){
122 | val (s, c1, c2) = addOneColumn(cols(i), cout1)
123 | columns_next(i) = s ++ cout2
124 | cout1 = c1
125 | cout2 = c2
126 | }
127 |
128 | val needReg = depth == 4
129 | val toNextLayer = if(needReg)
130 | columns_next.map(_.map(x => RegEnable(x, io.regEnables(1))))
131 | else
132 | columns_next
133 |
134 | addAll(toNextLayer, depth+1)
135 | }
136 | }
137 |
138 | val columns_reg = columns.map(col => col.map(b => RegEnable(b, io.regEnables(0))))
139 | val (sum, carry) = addAll(cols = columns_reg, depth = 0)
140 |
141 | io.result := sum + carry
142 | }
--------------------------------------------------------------------------------
/src/main/scala/nagicore/unit/ip/Xiangshan/CSA.scala:
--------------------------------------------------------------------------------
1 | /***************************************************************************************
2 | * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3 | * Copyright (c) 2020-2021 Peng Cheng Laboratory
4 | *
5 | * XiangShan is licensed under Mulan PSL v2.
6 | * You can use this software according to the terms and conditions of the Mulan PSL v2.
7 | * You may obtain a copy of Mulan PSL v2 at:
8 | * http://license.coscl.org.cn/MulanPSL2
9 | *
10 | * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 | * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 | * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 | *
14 | * See the Mulan PSL v2 for more details.
15 | ***************************************************************************************/
16 |
17 | package nagicore.unit.ip.Xiangshan
18 |
19 | import chisel3._
20 | import chisel3.util._
21 |
22 | abstract class CarrySaveAdderMToN(m: Int, n: Int)(len: Int) extends Module{
23 | val io = IO(new Bundle() {
24 | val in = Input(Vec(m, UInt(len.W)))
25 | val out = Output(Vec(n, UInt(len.W)))
26 | })
27 | }
28 |
29 | class CSA2_2(len: Int) extends CarrySaveAdderMToN(2, 2)(len) {
30 | val temp = Wire(Vec(len, UInt(2.W)))
31 | for((t, i) <- temp.zipWithIndex){
32 | val (a, b) = (io.in(0)(i), io.in(1)(i))
33 | val sum = a ^ b
34 | val cout = a & b
35 | t := Cat(cout, sum)
36 | }
37 | io.out.zipWithIndex.foreach({case(x, i) => x := Cat(temp.reverse map(_(i)))})
38 | }
39 |
40 | class CSA3_2(len: Int) extends CarrySaveAdderMToN(3, 2)(len){
41 | val temp = Wire(Vec(len, UInt(2.W)))
42 | for((t, i) <- temp.zipWithIndex){
43 | val (a, b, cin) = (io.in(0)(i), io.in(1)(i), io.in(2)(i))
44 | val a_xor_b = a ^ b
45 | val a_and_b = a & b
46 | val sum = a_xor_b ^ cin
47 | val cout = a_and_b | (a_xor_b & cin)
48 | t := Cat(cout, sum)
49 | }
50 | io.out.zipWithIndex.foreach({case(x, i) => x := Cat(temp.reverse map(_(i)))})
51 | }
52 |
53 | class CSA5_3(len: Int)extends CarrySaveAdderMToN(5, 3)(len){
54 | val FAs = Array.fill(2)(Module(new CSA3_2(len)))
55 | FAs(0).io.in := io.in.take(3)
56 | FAs(1).io.in := VecInit(FAs(0).io.out(0), io.in(3), io.in(4))
57 | io.out := VecInit(FAs(1).io.out(0), FAs(0).io.out(1), FAs(1).io.out(1))
58 | }
59 |
60 | class C22 extends CSA2_2(1)
61 | class C32 extends CSA3_2(1)
62 | class C53 extends CSA5_3(1)
63 |
64 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/utils/Flags.scala:
--------------------------------------------------------------------------------
1 | package nagicore.utils
2 |
3 | import chisel3._
4 | import chisel3.util._
5 | import chisel3.util.experimental.decode._
6 |
7 | object Flags{
8 | def bp(x : String) : BitPat = BitPat(s"b${x}")
9 | def U(x: String):UInt = s"b$x".U
10 | def castFlags2Bitpat(x : Iterable[String]) : BitPat = BitPat(s"b${x.reduce(_ ++ _)}")
11 | def onehotMux[T <: Data](input: UInt, cases: Iterable[(String, T)]) = {
12 | // check one-hot
13 | assert(cases.map(x => x._1.count(_ == '1')==1).reduce(_ && _))
14 | // check no duplicate
15 | assert(cases.map(x=>x._1).toSet.size == cases.size)
16 | chisel3.util.Mux1H(cases.map(x => input(x._1.length-1 - findFirstOne(x._1).get) -> x._2))
17 | }
18 | /**
19 | * One-hot Flag Check
20 | *
21 | * @param input
22 | * @param expect
23 | * @return
24 | */
25 | def OHis[T <: Data](input: UInt, expect: String): Bool = {
26 | // check one-hot
27 | assert(expect.count(_ == '1')==1)
28 | input(expect.length-1-findFirstOne(expect).get).asBool
29 | }
30 | def is[T <: Data](input: UInt, expect: String): Bool = {
31 | assert(input.getWidth == expect.length)
32 | input === Flags.bp(expect)
33 | }
34 | def CasesMux[T <: Data](input: UInt, cases: Iterable[(String, T)], default: T) : T = {
35 | // check no duplicate
36 | assert(cases.map(x=>x._1).toSet.size == cases.size)
37 | // chisel3.util.Mux1H(cases.map(x => (input === BitPat(s"b${x._1}")) -> x._2))
38 | // decoder(EspressoMinimizer, input, TruthTable(
39 | // cases.map(x => bp(x._1) -> BitPat(x._2.asUInt)),
40 | // BitPat(s"b0")
41 | // ))
42 | MuxCase(default, cases.map(x => (input === BitPat(s"b${x._1}")) -> x._2).toSeq)
43 | }
44 | def ifEqu[T <: Data](input: UInt, target: String, true_res: T, false_res: T) : T = {
45 | Mux(input === BitPat(s"b${target}"), true_res, false_res)
46 | }
47 | /**
48 | * 译码器, 使用decoder进行真值表优化
49 | *
50 | * @param flag_name 控制信号名称
51 | * @param input 输入信号
52 | * @param decode_map 译码表, 格式为 (BitPat, Map[控制信号名, 控制信号值])
53 | * @param default_map 默认译码表, 格式为 Map[控制信号名, 控制信号值]
54 | * @return 在input输入下,flag_name对应的控制信号值
55 | */
56 | def decode_flag(flag_name: String, input: UInt, decode_map: Seq[(BitPat, Map[String, String])], default_map: Map[String, String]) = {
57 | decoder(EspressoMinimizer, input, TruthTable(
58 | decode_map.map(x=> x._1 -> BitPat(s"b${x._2.get(flag_name).get}")),
59 | BitPat(s"b${default_map.get(flag_name).get}")
60 | ))
61 | }
62 | private def findFirstOne(str: String): Option[Int] = {
63 | str.indexOf("1") match {
64 | case -1 => None
65 | case index => Some(index)
66 | }
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/scala/nagicore/utils/utils.scala:
--------------------------------------------------------------------------------
1 | package nagicore.utils
2 |
3 | import chisel3._
4 | import chisel3.util._
5 |
6 | object SignExt {
7 | def apply(a: UInt, len: Int): UInt = {
8 | val aLen = a.getWidth
9 | val signBit = a(aLen-1)
10 | if (aLen >= len) a(len-1,0) else Cat(Fill(len - aLen, signBit), a)
11 | }
12 | }
13 |
14 | object isPowerOf2{
15 | def apply(x: Int): Boolean = {
16 | x > 0 && (x & (x-1)) == 0
17 | }
18 | }
19 |
--------------------------------------------------------------------------------