├── README.md └── rtl └── cpu.v /README.md: -------------------------------------------------------------------------------- 1 | This repository gives you a very mean (a.k.a ba pi) RISC-V RV32I implementation for RISC-V Shanghai Day 2018. 2 | 3 | The synthesize result on Anlogic EG4-20 is about 110MHz fmax and 500LUTs + 4/2 M32Ks. Now I'm improving timing and instruction set support. It is not usable yet, but I will let it fully works before Jun.28 2018. 4 | 5 | Currently it can execute jump, auipc, op-imm, op-r2r and 32-bit load-store. But compare, branch instruction are not supported yet: I'm working on it. 6 | 7 | I wouldn't implement there features: 8 | 9 | 1. Byte/half word load-store, because it may causes about 100LUTs. I perfer user use software to implement it. 10 | 11 | 2. Unaligned load-store, same reason as 1. 12 | 13 | 3. [Maybe] Interrupt controller. 14 | 15 | 4. [Maybe] Reset logic. 16 | 17 | 5. [Maybe] RVC decoder. 18 | 19 | 20 | Jun.19 2018 Zhiyuan Wan 21 | -------------------------------------------------------------------------------- /rtl/cpu.v: -------------------------------------------------------------------------------- 1 | module cpu(clk_i, rstn_i, icb_addr_o, icb_wdata_o, icb_wmask_o, icb_en_o, icb_rdata_i, icb_ready_i); 2 | /* 时钟复位 */ 3 | input clk_i; 4 | input rstn_i; 5 | 6 | /* 外部总线 */ 7 | output [31:0] icb_addr_o; 8 | output [31:0] icb_wdata_o; 9 | output [3:0] icb_wmask_o; 10 | output icb_en_o; 11 | 12 | input [31:0] icb_rdata_i; 13 | input icb_ready_i; 14 | 15 | wire [11:0] rf_int_addr_a; 16 | wire [31:0] rf_int_di_a; 17 | wire [31:0] rf_int_do_a; 18 | wire [3:0] rf_int_we_a; 19 | wire rf_int_ce_a; 20 | 21 | /* 暂时B口仅写 */ 22 | 23 | wire [11:0] rf_int_addr_b; 24 | wire [31:0] rf_int_di_b; 25 | wire [31:0] rf_int_do_b; 26 | wire [3:0] rf_int_we_b; 27 | wire rf_int_ce_b; 28 | 29 | regfile_internal_ram rf_int_ram( 30 | .clka(clk_i), 31 | .addra(rf_int_addr_a), 32 | .dia(rf_int_di_a), 33 | .doa(rf_int_do_a), 34 | .wea(rf_int_we_a), 35 | .cea(rf_int_ce_a), 36 | .rsta(1'b0), 37 | 38 | .clkb(clk_i), 39 | .addrb(rf_int_addr_b), 40 | .dib(rf_int_di_b), 41 | .dob(rf_int_do_b), 42 | .web(rf_int_we_b), 43 | .ceb(rf_int_ce_b), 44 | .rstb(1'b0) 45 | ); 46 | 47 | /* 48 | 地址分配 49 | 0x0000_0000 ~ 0x0000_007F 寄存器 50 | 0x0000_0080 ~ 0x0000_2000 程序/代码空间 51 | 超出此范围为外部总线 52 | */ 53 | 54 | localparam FSM_FETCH = 6'b000001; 55 | localparam FSM_DECODE = 6'b000010; 56 | localparam FSM_REGR = 6'b000100; 57 | localparam FSM_EXEC = 6'b001000; 58 | localparam FSM_MEM = 6'b010000; 59 | localparam FSM_REGW = 6'b100000; 60 | 61 | localparam FSM_FETCH_I = 0; 62 | localparam FSM_DECODE_I = 1; 63 | localparam FSM_REGR_I = 2; 64 | localparam FSM_EXEC_I = 3; 65 | localparam FSM_MEM_I = 4; 66 | localparam FSM_REGW_I = 5; 67 | 68 | `define CASE_FSM 1 //打开后时序变好,提高约10MHz,多占用6个LUT,为什么? 69 | //`define CASE_FSM_NOSTART 1 //打开后不影响LUT占用数量,时钟频率低约2MHz,为什么? 70 | //`define RFRD_IF 1 // 把寄存器地址往前提一拍,减少了一个LUT,降低了10MHz,为什么? 71 | 72 | /* 处理器状态机 */ 73 | reg [5:0] fsm_reg = FSM_REGW; 74 | reg [5:0] fsm_next; 75 | 76 | wire [31:0] lsu_addr; 77 | reg [3:0] lsu_mem_w; 78 | 79 | reg [31:0] pc = 32'h0000_0080; 80 | 81 | reg fetch_enable; 82 | 83 | reg rs1_enable; 84 | reg rs2_enable; 85 | 86 | `ifndef CASE_FSM 87 | reg fsm_wait; //FSM等待信号 88 | `endif 89 | 90 | always @(posedge clk_i) 91 | begin 92 | `ifndef CASE_FSM 93 | if(!fsm_wait) 94 | fsm_reg <= fsm_reg[FSM_REGW_I]? FSM_FETCH: {fsm_reg[4:0], 1'b0}; 95 | `else 96 | fsm_reg <= fsm_next; 97 | `endif 98 | 99 | if(fsm_reg[FSM_FETCH_I]) 100 | begin 101 | end 102 | 103 | end 104 | 105 | always @* 106 | begin 107 | fetch_enable = 0; 108 | rs1_enable = 0; 109 | rs2_enable = 0; 110 | `ifndef CASE_FSM 111 | fsm_wait = 0; 112 | 113 | if(fsm_reg[FSM_FETCH_I] || fsm_reg[FSM_REGW_I]) 114 | fetch_enable = 1; 115 | 116 | if(fsm_reg[FSM_REGR_I]) 117 | begin 118 | rs1_enable = 1; 119 | rs2_enable = 1; 120 | end 121 | `endif 122 | `ifdef CASE_FSM 123 | `ifdef CASE_FSM_NOSTART 124 | fsm_next = 32'bx; 125 | `endif 126 | case(fsm_reg) //synopsys parallel_case 127 | FSM_FETCH: //取指令 128 | begin 129 | fetch_enable = 1; 130 | fsm_next = FSM_DECODE; 131 | end 132 | FSM_DECODE: 133 | begin 134 | fsm_next = FSM_REGR; 135 | end 136 | FSM_REGR: 137 | begin 138 | rs1_enable = 1; 139 | rs2_enable = 1; 140 | fsm_next = FSM_EXEC; 141 | end 142 | FSM_EXEC: 143 | begin 144 | fsm_next = FSM_MEM; 145 | end 146 | FSM_MEM: //多周期运算指令也卡在这里 147 | begin 148 | fsm_next = FSM_REGW; 149 | end 150 | FSM_REGW: 151 | begin 152 | fetch_enable = 1; 153 | fsm_next = FSM_FETCH; 154 | end 155 | `ifndef CASE_FSM_NOSTART 156 | default: 157 | fsm_next = FSM_REGW; 158 | `endif 159 | endcase 160 | `endif 161 | end 162 | 163 | 164 | /* 指令解码 */ 165 | /* 常量 */ 166 | localparam OP_LUI = 7'b0110111; 167 | localparam OP_AUIPC = 7'b0010111; 168 | localparam OP_JAL = 7'b1101111; 169 | localparam OP_JALR = 7'b1100111; 170 | localparam OP_BRANCH = 7'b1100011; 171 | localparam OP_LOAD = 7'b0000011; 172 | localparam OP_STORE = 7'b0100011; 173 | localparam OP_IMM = 7'b0010011; 174 | localparam OP_R2R = 7'b0110011; 175 | //暂不支持CSR 176 | /* ALU 功能 */ 177 | localparam ALU_ADD = 3'b000; 178 | localparam ALU_OR = 3'b110; 179 | localparam ALU_AND = 3'b111; 180 | localparam ALU_XOR = 3'b100; 181 | localparam ALU_SUB = 3'b001; 182 | 183 | wire [31:0] instr = rf_int_do_a; 184 | reg [6:0] opcode = 0; 185 | reg [2:0] alu_opcode = 0; 186 | 187 | reg [4:0] reg_rd = 0; 188 | reg [4:0] reg_rs1 = 0; 189 | reg [4:0] reg_rs2 = 0; 190 | reg [2:0] funct3 = 0; 191 | 192 | reg [31:0] dec_imm = 0; 193 | 194 | wire [6:0] opcode_comb = instr[6:0]; 195 | 196 | wire imm_i = (opcode_comb == OP_IMM || opcode_comb == OP_LOAD || opcode_comb == OP_JALR); 197 | wire imm_s = (opcode_comb == OP_STORE); 198 | wire imm_b = (opcode_comb == OP_BRANCH); 199 | wire imm_u = (opcode_comb == OP_LUI || opcode_comb == OP_AUIPC); 200 | wire imm_j = (opcode_comb == OP_JAL); 201 | 202 | reg alu_op_0_is_reg = 0; 203 | reg alu_op_1_is_imm = 0; 204 | reg need_wb = 0; 205 | reg need_store = 0; 206 | reg need_load = 0; 207 | reg jump = 0; 208 | reg branch = 0; 209 | reg rd_nonzero = 0; 210 | 211 | always @(posedge clk_i) 212 | begin 213 | if(fsm_reg[FSM_DECODE_I])// == FSM_DECODE) 214 | begin 215 | opcode <= opcode_comb; 216 | need_wb <= 1; 217 | need_store <= 0; 218 | need_load <= 0; 219 | jump <= 0; 220 | branch <= 0; 221 | funct3 <= instr[14:12]; 222 | alu_op_1_is_imm <= 1; 223 | alu_op_0_is_reg <= 1; 224 | 225 | alu_opcode <= ALU_ADD; 226 | if(instr[11:7] == 0) 227 | begin 228 | rd_nonzero <= 0; 229 | end 230 | else 231 | begin 232 | rd_nonzero <= 1; 233 | end 234 | /* 指令译码 */ 235 | if(opcode_comb == OP_IMM) 236 | begin 237 | alu_opcode <= instr[14:12]; 238 | end 239 | if(opcode_comb == OP_AUIPC) 240 | begin 241 | alu_op_0_is_reg <= 0; 242 | end 243 | if(opcode_comb == OP_JAL) 244 | begin 245 | jump <= 1; 246 | alu_op_0_is_reg <= 0; 247 | end 248 | if(opcode_comb == OP_STORE) 249 | begin 250 | need_store <= 1; 251 | need_wb <= 0; 252 | end 253 | if(opcode_comb == OP_LOAD) 254 | begin 255 | need_load <= 1; 256 | end 257 | if(opcode_comb == OP_R2R) 258 | begin 259 | alu_op_1_is_imm <= 0; 260 | end 261 | 262 | `ifndef RFRD_IF 263 | reg_rd <= instr[11:7]; 264 | if(opcode_comb == OP_LUI) 265 | begin 266 | alu_op_0_is_reg <= 1; 267 | reg_rs1 <= 0; 268 | end 269 | else 270 | reg_rs1 <= instr[19:15]; 271 | reg_rs2 <= instr[24:20]; 272 | `endif 273 | 274 | /* 立即数译码器 */ 275 | if(imm_i) 276 | dec_imm <= {{21{instr[31]}}, instr[30:20]}; 277 | else if(imm_s) 278 | dec_imm <= {{21{instr[31]}}, instr[30:25], instr[11:7]}; 279 | else if(imm_b) 280 | dec_imm <= {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0}; 281 | else if(imm_u) 282 | dec_imm <= {instr[31:12], 12'b0}; 283 | else if(imm_j) 284 | dec_imm <= {{12{instr[31]}}, instr[19:12] ,instr[20] ,instr[30:21] ,1'b0}; 285 | end 286 | end 287 | `ifdef RFRD_IF 288 | always @(posedge clk_i) 289 | begin 290 | if(fsm_reg[FSM_FETCH_I]) 291 | begin 292 | reg_rd <= instr[11:7]; 293 | reg_rs1 <= instr[19:15]; 294 | reg_rs2 <= instr[24:20]; 295 | end 296 | end 297 | `endif 298 | assign icb_addr_o = (fsm_reg[FSM_MEM_I])? lsu_addr: pc; 299 | assign rf_int_addr_a = (fsm_reg[FSM_REGR_I])? reg_rs1: 300 | icb_addr_o[13:2]; 301 | 302 | wire internal_ram_select = (icb_addr_o[31:14] == 0);//内部RAM选择 303 | 304 | assign rf_int_we_a = icb_wmask_o; 305 | assign rf_int_ce_a = rs1_enable | internal_ram_select; 306 | assign rf_int_addr_b = (fsm_reg[FSM_REGR_I])? reg_rs2: reg_rd; 307 | 308 | 309 | /* 指令执行 */ 310 | wire [31:0] rf_reg1 = rf_int_do_a; 311 | wire [31:0] rf_reg2 = rf_int_do_b; 312 | /* ALU */ 313 | wire [31:0] alu_op_0 = alu_op_0_is_reg? rf_reg1: pc; 314 | wire [31:0] alu_op_1 = alu_op_1_is_imm? dec_imm: rf_reg2; 315 | reg [31:0] alu_result_comb; 316 | reg [31:0] ex_result = 0; 317 | always @* 318 | begin 319 | alu_result_comb = 32'bx; 320 | case(alu_opcode) //synopsys parallel_case 321 | ALU_ADD: alu_result_comb = alu_op_0 + alu_op_1; 322 | ALU_OR: alu_result_comb = alu_op_0 | alu_op_1; 323 | ALU_AND: alu_result_comb = alu_op_0 & alu_op_1; 324 | ALU_XOR: alu_result_comb = alu_op_0 ^ alu_op_1; 325 | endcase 326 | end 327 | 328 | //比较器:分支比较EX阶段计算分支类型, MEM阶段复用ALU计算PC偏移? 329 | 330 | always @(posedge clk_i) 331 | begin 332 | if(fsm_reg[FSM_EXEC_I]) 333 | begin 334 | ex_result <= alu_result_comb; 335 | 336 | end 337 | end 338 | /* 访存和第二阶段多周期指令如移位 */ 339 | always @(posedge clk_i) 340 | begin 341 | if(fsm_reg[FSM_MEM_I]) 342 | begin 343 | if(jump) 344 | pc <= {ex_result[31:2], 2'b00}; 345 | else 346 | pc <= pc + 4; 347 | end 348 | end 349 | /* LSU */ 350 | reg [31:0] icb_wdata; 351 | 352 | always @* 353 | begin 354 | icb_wdata = rf_reg2; 355 | lsu_mem_w = 4'b0000; 356 | if(funct3 == 3'b000) 357 | begin 358 | lsu_mem_w = 4'b0001; 359 | end 360 | if(funct3 == 3'b001) 361 | begin 362 | lsu_mem_w = 4'b0011; 363 | end 364 | if(funct3 == 3'b010) 365 | lsu_mem_w = 4'b1111; 366 | /* 367 | if(funct3 == 3'b000) 368 | begin // LSU这块地址译码时序太差,性能也不好... 369 | lsu_mem_w[0] = (lsu_addr[1:0] == 2'b00); 370 | lsu_mem_w[1] = (lsu_addr[1:0] == 2'b01); 371 | lsu_mem_w[2] = (lsu_addr[1:0] == 2'b10); 372 | lsu_mem_w[3] = (lsu_addr[1:0] == 2'b11); 373 | icb_wdata = {rf_reg2[7:0], rf_reg2[7:0], rf_reg2[7:0], rf_reg2[7:0]}; 374 | end 375 | if(funct3 == 3'b001) 376 | begin 377 | lsu_mem_w[0] = (lsu_addr[1] == 1'b0); 378 | lsu_mem_w[1] = (lsu_addr[1] == 1'b0); 379 | lsu_mem_w[2] = (lsu_addr[1] == 1'b1); 380 | lsu_mem_w[3] = (lsu_addr[1] == 1'b1); 381 | icb_wdata = {rf_reg2[15:0], rf_reg2[15:0]}; 382 | end 383 | if(funct3 == 3'b010) 384 | lsu_mem_w = 4'b1111; 385 | */ 386 | end 387 | 388 | assign lsu_addr = ex_result; 389 | assign icb_wdata_o = icb_wdata; 390 | assign rf_int_di_a = icb_wdata_o; 391 | assign icb_wmask_o = (fsm_reg[FSM_MEM_I] && need_store)? lsu_mem_w: 4'b0; 392 | 393 | wire [31:0] icb_rdata = internal_ram_select? rf_int_do_a: icb_rdata_i; 394 | reg [31:0] data_load; 395 | 396 | always @* //符号扩展 397 | begin 398 | data_load = icb_rdata; 399 | /* 400 | if(funct3[1:0] == 2'b00) 401 | begin 402 | if(lsu_addr[1:0] == 2'b00) 403 | data_load = {{24{funct3[2]? 1'b0:icb_rdata[7]}}, icb_rdata[7:0]}; 404 | if(lsu_addr[1:0] == 2'b01) 405 | data_load = {{24{funct3[2]? 1'b0:icb_rdata[15]}}, icb_rdata[15:8]}; 406 | if(lsu_addr[1:0] == 2'b10) 407 | data_load = {{24{funct3[2]? 1'b0:icb_rdata[23]}}, icb_rdata[23:16]}; 408 | if(lsu_addr[1:0] == 2'b11) 409 | data_load = {{24{funct3[2]? 1'b0:icb_rdata[31]}}, icb_rdata[31:24]}; 410 | end 411 | if(funct3[1:0] == 2'b01) //资源消耗太大,后面再想办法 412 | begin 413 | if(lsu_addr[1] == 1'b0) 414 | data_load = {{16{funct3[2]? 1'b0:icb_rdata[15]}}, icb_rdata[15:0]}; 415 | if(lsu_addr[1] == 1'b1) 416 | data_load = {{16{funct3[2]? 1'b0:icb_rdata[31]}}, icb_rdata[31:16]}; 417 | end 418 | */ 419 | end 420 | 421 | /* 回写 */ 422 | always @(posedge clk_i) 423 | begin 424 | if(fsm_reg[FSM_REGW_I]) 425 | begin 426 | 427 | end 428 | end 429 | 430 | assign rf_int_ce_b = rs2_enable | (fsm_reg[FSM_REGW_I]); 431 | assign rf_int_we_b = (fsm_reg[FSM_REGW_I] && need_wb && rd_nonzero)? 4'b1111: 4'b0; 432 | assign rf_int_di_b = (need_load)? data_load: ex_result; 433 | endmodule 434 | 435 | --------------------------------------------------------------------------------