├── project └── plugins.sbt ├── fst ├── int8_mul.fst ├── int12_xadd.fst ├── int16_dotp.fst ├── int16_mul.fst ├── int24_acc.fst ├── int8_dotp.fst ├── uint4_mul.fst ├── int12_xdotp.fst ├── int16_os_B_P.fst ├── int16_ws_B_P.fst ├── int8_ws_AD_B.fst ├── int8_ws_B_P.fst ├── int12_ws_AB_C_P.fst ├── int16_dotp_ddr.fst └── int24_acc_scale.fst ├── data ├── xdc │ ├── singleClockConstraints.xdc │ └── dualClockConstraints.xdc └── oocFlow.tcl ├── src ├── main │ └── scala │ │ ├── systolic │ │ ├── DDRGearBox.scala │ │ └── GlobalSignalTree.scala │ │ ├── util │ │ ├── StridedAddrGen.scala │ │ ├── PairBundle.scala │ │ ├── AxiStreamSpecRenamer.scala │ │ ├── BarrelShifter.scala │ │ └── LoopsCntGen.scala │ │ ├── xilinx │ │ ├── FloatingIP │ │ │ ├── float16.scala │ │ │ ├── float32.scala │ │ │ └── floating_ip_stub.scala │ │ ├── IP │ │ │ ├── AxiDataMoverCmdGen.scala │ │ │ └── ZynqPsDDRBench.scala │ │ ├── DSP48E2 │ │ │ ├── DSP48E2ConfigMode.scala │ │ │ ├── DSP48E2ConfigWXYZ.scala │ │ │ ├── DSP48E2.scala │ │ │ └── DSP48E2ConfigABCD.scala │ │ └── DSP48E2IntArithmetic │ │ │ ├── standalone │ │ │ ├── int16_mul.scala │ │ │ ├── int12_xadd.scala │ │ │ ├── int24_acc.scala │ │ │ ├── int8_mul.scala │ │ │ ├── uint4_mul.scala │ │ │ ├── int24_acc_scale.scala │ │ │ ├── dpuczdx8g_acc.scala │ │ │ └── ring_acc.scala │ │ │ ├── cascade │ │ │ ├── int8_dotp.scala │ │ │ ├── int16_dotp.scala │ │ │ ├── int16_dotp_ddr.scala │ │ │ ├── int12_xdotp.scala │ │ │ └── int8_dotp_ddr.scala │ │ │ └── dualCascade │ │ │ ├── int16_ws_B_P.scala │ │ │ ├── int16_os_B_P.scala │ │ │ ├── int8_ws_B_P.scala │ │ │ ├── int12_ws_AB_C_P.scala │ │ │ └── int8_ws_AD_B.scala │ │ ├── example │ │ ├── tpu14_sa.scala │ │ ├── firefly16_sa.scala │ │ ├── ehb1024_sa.scala │ │ ├── firefly16_pe.scala │ │ ├── b1024_sa.scala │ │ ├── ehb1024_pe.scala │ │ ├── tpu14_pe.scala │ │ ├── int8_ws_b_p_clb.scala │ │ ├── b1024_pe.scala │ │ └── int12_ws_ab_c_p_clb.scala │ │ └── rearrange │ │ └── SerialController.scala └── test │ └── scala │ ├── verilog_gen.scala │ ├── eval_int16_mul.scala │ ├── eval_int8_mul.scala │ ├── eval_int16_dotp_ddr.scala │ ├── eval_int8_dotp.scala │ ├── eval_uint4_mul.scala │ ├── eval_int24_acc_scale.scala │ ├── eval_int16_os_B_P.scala │ ├── eval_int24_acc.scala │ ├── eval_int12_xadd.scala │ ├── eval_ring_acc.scala │ ├── eval_int16_dotp.scala │ ├── eval_int8_dotp_ddr.scala │ ├── eval_int16_ws_B_P.scala │ ├── int16_ws_b_p_simple.scala │ ├── eval_int8_ws_B_P.scala │ ├── eval_int12_xdotp.scala │ ├── eval_int12_ws_AB_C_P.scala │ └── eval_int8_ws_AD_B.scala ├── .gitignore ├── README.md ├── verilog └── xilinx │ └── DSP48E2Arithmetic │ ├── int16_mul.v │ ├── int24_acc.v │ ├── uint4_mul.v │ ├── int12_xadd.v │ ├── int8_mul.v │ └── int24_acc_scale.v └── LICENSE /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.2") -------------------------------------------------------------------------------- /fst/int8_mul.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int8_mul.fst -------------------------------------------------------------------------------- /fst/int12_xadd.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int12_xadd.fst -------------------------------------------------------------------------------- /fst/int16_dotp.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int16_dotp.fst -------------------------------------------------------------------------------- /fst/int16_mul.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int16_mul.fst -------------------------------------------------------------------------------- /fst/int24_acc.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int24_acc.fst -------------------------------------------------------------------------------- /fst/int8_dotp.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int8_dotp.fst -------------------------------------------------------------------------------- /fst/uint4_mul.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/uint4_mul.fst -------------------------------------------------------------------------------- /fst/int12_xdotp.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int12_xdotp.fst -------------------------------------------------------------------------------- /fst/int16_os_B_P.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int16_os_B_P.fst -------------------------------------------------------------------------------- /fst/int16_ws_B_P.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int16_ws_B_P.fst -------------------------------------------------------------------------------- /fst/int8_ws_AD_B.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int8_ws_AD_B.fst -------------------------------------------------------------------------------- /fst/int8_ws_B_P.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int8_ws_B_P.fst -------------------------------------------------------------------------------- /data/xdc/singleClockConstraints.xdc: -------------------------------------------------------------------------------- 1 | create_clock -period 1.500 -waveform {0.000 0.750} [get_ports clk] -------------------------------------------------------------------------------- /fst/int12_ws_AB_C_P.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int12_ws_AB_C_P.fst -------------------------------------------------------------------------------- /fst/int16_dotp_ddr.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int16_dotp_ddr.fst -------------------------------------------------------------------------------- /fst/int24_acc_scale.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adamgallas/SpinalDLA/HEAD/fst/int24_acc_scale.fst -------------------------------------------------------------------------------- /src/main/scala/systolic/DDRGearBox.scala: -------------------------------------------------------------------------------- 1 | package systolic 2 | 3 | class DDRGearBox { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/main/scala/util/StridedAddrGen.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | class StridedAddrGen { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/FloatingIP/float16.scala: -------------------------------------------------------------------------------- 1 | package xilinx.FloatingIP 2 | 3 | class float16 { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/FloatingIP/float32.scala: -------------------------------------------------------------------------------- 1 | package xilinx.FloatingIP 2 | 3 | class float32 { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Project exclude paths 2 | /project/target/ 3 | /target/ 4 | /.bsp/ 5 | /simWorkspace/ 6 | /tmp/ 7 | /.idea/ 8 | -------------------------------------------------------------------------------- /data/oocFlow.tcl: -------------------------------------------------------------------------------- 1 | synth_design -mode out_of_context -flatten_hierarchy none 2 | opt_design 3 | place_design 4 | phys_opt_design 5 | route_design -------------------------------------------------------------------------------- /src/main/scala/util/PairBundle.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | import scala.language.postfixOps 7 | 8 | case class PairBundle[T <: Data, T2 <: Data](valueType: HardType[T], linkedType: HardType[T2]) extends Bundle { 9 | val A = valueType() 10 | val B = linkedType() 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/systolic/GlobalSignalTree.scala: -------------------------------------------------------------------------------- 1 | package systolic 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | import scala.language.postfixOps 7 | 8 | class GlobalSignalTree[T <: Data](dataType: HardType[T], fanout: Int, leaf: Int) extends Component { 9 | val io = new Bundle { 10 | val input = in(dataType()) 11 | val outputs = out(Vec(dataType(), leaf)) 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /data/xdc/dualClockConstraints.xdc: -------------------------------------------------------------------------------- 1 | create_clock -period 1.500 -waveform {0.000 0.750} [get_ports fast_clk] 2 | create_clock -period 3.000 -waveform {0.000 1.500} [get_ports clk] 3 | 4 | set_property HD.CLK_SRC BUFGCTRL_X0Y0 [get_ports fast_clk] 5 | set_property HD.CLK_SRC BUFGCTRL_X0Y0 [get_ports clk] 6 | 7 | # slow 2 fast 8 | set_multicycle_path -setup -end -from clk -to fast_clk 2 9 | set_multicycle_path -hold -end -from clk -to fast_clk 1 10 | 11 | # fast 2 slow 12 | set_multicycle_path -setup -start -from fast_clk -to clk 2 13 | set_multicycle_path -hold -start -from fast_clk -to clk 1 -------------------------------------------------------------------------------- /src/main/scala/example/tpu14_sa.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | 5 | import scala.language.postfixOps 6 | 7 | class tpu14_sa() extends Component { 8 | 9 | val length = 14 10 | 11 | val pe = for (i <- 0 until length) yield { 12 | new tpu14_pe(isLastPEHorizontal = i == length - 1) 13 | } 14 | 15 | val inp = pe.map(_.inp.toIo()) 16 | val out = pe.map(_.out.toIo()) 17 | val srcH = pe(0).srcH.toIo() 18 | 19 | for (i <- 1 until length) { 20 | pe(i).srcH := pe(i - 1).dstH 21 | } 22 | } 23 | 24 | object tpu extends App { 25 | SpinalVerilog(new tpu14_sa()) 26 | } -------------------------------------------------------------------------------- /src/main/scala/example/firefly16_sa.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | 5 | import scala.language.postfixOps 6 | 7 | class firefly16_sa() extends Component { 8 | 9 | val length = 4 10 | 11 | val pe = for (i <- 0 until length) yield { 12 | new firefly16_pe(isLastPEHorizontal = i == length - 1) 13 | } 14 | 15 | val inp = pe.map(_.inp.toIo()) 16 | val out = pe.map(_.out.toIo()) 17 | val srcH = pe(0).srcH.toIo() 18 | 19 | for (i <- 1 until length) { 20 | pe(i).srcH := pe(i - 1).dstH 21 | } 22 | } 23 | 24 | object firefly extends App { 25 | SpinalVerilog(new firefly16_sa()) 26 | } -------------------------------------------------------------------------------- /src/main/scala/xilinx/IP/AxiDataMoverCmdGen.scala: -------------------------------------------------------------------------------- 1 | package xilinx.IP 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | import scala.language.postfixOps 7 | 8 | object AxiDataMoverCmdGen { 9 | def apply(stream: Stream[util.PairBundle[UInt, UInt]], baseAddr: UInt, inc: Bool = True, eof: Bool = False) = { 10 | val cmd = Stream(Bits(32 + 8 + baseAddr.getWidth bits)) 11 | cmd.arbitrationFrom(stream) 12 | cmd.payload := B"00000000" ## 13 | (stream.A + baseAddr).resize(baseAddr.getWidth) ## 14 | B"0" ## 15 | eof ## 16 | B"000000" ## 17 | inc ## 18 | stream.B.resize(23) 19 | cmd 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2/DSP48E2ConfigMode.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2 2 | 3 | import spinal.core._ 4 | import scala.language.postfixOps 5 | 6 | object DSP48E2ConfigMode { 7 | 8 | def set_alu_attr(attr: DSP48E2Attributes, simd: Int = 1) = { 9 | attr.MREG = 0 10 | attr.USE_MULT = "NONE" 11 | simd match { 12 | case 1 => attr.USE_SIMD = "ONE48" 13 | case 2 => attr.USE_SIMD = "TWO24" 14 | case 4 => attr.USE_SIMD = "FOUR12" 15 | } 16 | } 17 | 18 | def set_mul_attr(attr: DSP48E2Attributes) = { 19 | attr.MREG = 1 20 | attr.USE_MULT = "MULTIPLY" 21 | attr.USE_SIMD = "ONE48" 22 | } 23 | 24 | def assign_m_ctrl(dsp: DSP48E2, ce: Bool = False, rst: Bool = False): Unit = { 25 | dsp.CEs.M := ce 26 | dsp.RSTs.M := rst 27 | } 28 | 29 | def assign_p_ctrl(dsp: DSP48E2, ce: Bool = False, rst: Bool = False): Unit = { 30 | dsp.CEs.P := ce 31 | dsp.RSTs.P := rst 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/rearrange/SerialController.scala: -------------------------------------------------------------------------------- 1 | package rearrange 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | import scala.language.postfixOps 7 | 8 | class SerialController[T <: Data]( 9 | dataType: HardType[T], mem: Mem[T], 10 | inDimWidth: List[Int], 11 | outDimWidth: List[Int], 12 | outStrideWidth: List[Int] 13 | ) extends Component { 14 | 15 | require(outDimWidth.length == outStrideWidth.length) 16 | 17 | val io = new Bundle { 18 | val push = slave(Stream(dataType())) 19 | val pop = master(Stream(dataType())) 20 | val wrCmd = master(Flow(MemWriteCmd(mem))) 21 | val rdPort = master(MemReadPort(dataType(), mem.addressWidth)) 22 | } 23 | 24 | val cfg = new Bundle { 25 | val inDims = in(Vec(inDimWidth.map(w => UInt(w bits)))) 26 | val outDims = in(Vec(outDimWidth.map(w => UInt(w bits)))) 27 | val outStrides = in(Vec(outStrideWidth.map(w => UInt(w bits)))) 28 | } 29 | 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/test/scala/verilog_gen.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone._ 7 | import xilinx.DSP48E2IntArithmetic.cascade._ 8 | import xilinx.DSP48E2IntArithmetic.dualCascade._ 9 | 10 | import scala.language.postfixOps 11 | 12 | object verilog_gen extends App { 13 | 14 | val cfg = SpinalConfig( 15 | mode = Verilog, 16 | anonymSignalPrefix = "tmp", 17 | nameWhenByFile = true, 18 | targetDirectory = "verilog/xilinx/DSP48E2Arithmetic" 19 | ) 20 | 21 | // standalone 22 | 23 | cfg.generate(new int8_mul) 24 | cfg.generate(new int12_xadd) 25 | cfg.generate(new int16_mul) 26 | cfg.generate(new int24_acc) 27 | cfg.generate(new int24_acc_scale) 28 | cfg.generate(new uint4_mul) 29 | 30 | // cascade 31 | 32 | cfg.generate(new int8_dotp(8)) 33 | cfg.generate(new int16_dotp(8)) 34 | cfg.generate(new int16_dotp_ddr(8)) 35 | cfg.generate(new int12_xdotp(8)) 36 | 37 | // dualCascade 38 | 39 | cfg.generate(new int8_ws_AD_B(8)) 40 | cfg.generate(new int8_ws_B_P(8)) 41 | cfg.generate(new int12_ws_AB_C_P(8, 8)) 42 | cfg.generate(new int16_os_B_P(8)) 43 | cfg.generate(new int16_ws_B_P(8)) 44 | } -------------------------------------------------------------------------------- /src/main/scala/util/AxiStreamSpecRenamer.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | /** 7 | * This method renames the SpinalHDL Stream Bus to the correct naming of the AXI4-Stream Bus that can be recognized by Xilinx Vivado. 8 | */ 9 | 10 | object AxiStreamSpecRenamer { 11 | def apply[T <: Bundle](that: T, widthT: Boolean = true): T = { 12 | def doIt() = { 13 | that.flatten.foreach(bt => { 14 | 15 | val t = if (widthT) "t" else "" 16 | bt.setName(bt.getName().replace("_payload_fragment", "_" + t + "data")) 17 | bt.setName(bt.getName().replace("_payload_last", "_" + t + "last")) 18 | 19 | bt.setName(bt.getName().replace("_fragment", "_" + t + "data")) 20 | bt.setName(bt.getName().replace("_payload", "_" + t + "data")) 21 | bt.setName(bt.getName().replace("_valid", "_" + t + "valid")) 22 | bt.setName(bt.getName().replace("_ready", "_" + t + "ready")) 23 | bt.setName(bt.getName().replace("_last", "_" + t + "last")) 24 | 25 | if (bt.getName().startsWith("io_")) bt.setName(bt.getName().replaceFirst("io_", "")) 26 | }) 27 | } 28 | 29 | if (Component.current == that.component) 30 | that.component.addPrePopTask(() => { 31 | doIt() 32 | }) 33 | else 34 | doIt() 35 | that 36 | } 37 | } -------------------------------------------------------------------------------- /src/test/scala/eval_int16_mul.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.int16_mul 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int16_mul extends App { 12 | 13 | val length = 32 14 | val a = Array.fill(length)(Random.nextInt(256) - 128) 15 | val b = Array.fill(length)(Random.nextInt(256) - 128) 16 | val ab = (a, b).zipped.map(_ * _) 17 | 18 | SimConfig.withFstWave 19 | .addRtl("data/sim/DSP48E2.v") 20 | .compile(new int16_mul) 21 | .doSim { dut => 22 | import dut._ 23 | 24 | io.a #= 0 25 | io.b #= 0 26 | 27 | clockDomain.forkStimulus(10) 28 | clockDomain.waitSampling(32) 29 | 30 | for (i <- 0 until length + latency) { 31 | if (i < length) { 32 | io.a #= a(i) & 0xffff 33 | io.b #= b(i) & 0xffff 34 | } 35 | else { 36 | io.a #= 0 37 | io.b #= 0 38 | } 39 | if (i > latency) { 40 | val index = i - latency - 1 41 | assert(ab(index) == io.ab.toBigInt.toInt) 42 | } 43 | clockDomain.waitSampling(1) 44 | } 45 | io.a #= 0 46 | io.b #= 0 47 | 48 | clockDomain.waitSampling(32) 49 | simSuccess() 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/example/ehb1024_sa.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | 5 | import scala.language.postfixOps 6 | 7 | class ehb1024_sa( 8 | clkx2: ClockDomain, 9 | length: Int, 10 | width: Int, 11 | height: Int 12 | ) extends Component { 13 | 14 | val pe = for (i <- 0 until height) yield { 15 | for (j <- 0 until width) yield { 16 | new ehb1024_pe( 17 | clkx2 = clkx2, 18 | length = length, 19 | isLastPEVertical = i == height - 1, 20 | isLastPEHorizontal = j == width - 1 21 | ) 22 | } 23 | } 24 | 25 | val inp = for (i <- 0 until height) yield { 26 | for (j <- 0 until width) yield { 27 | pe(i)(j).inp.toIo() 28 | } 29 | } 30 | 31 | val out = for (i <- 0 until height) yield { 32 | for (j <- 0 until width) yield { 33 | pe(i)(j).out.toIo() 34 | } 35 | } 36 | 37 | val srcH = for (i <- 0 until height) yield { 38 | pe(i)(0).srcH.toIo() 39 | } 40 | 41 | val srcV = for (j <- 0 until width) yield { 42 | pe(0)(j).srcV.toIo() 43 | } 44 | 45 | 46 | for (i <- 0 until height) { 47 | for (j <- 1 until width) { 48 | pe(i)(j).srcH := pe(i)(j - 1).dstH 49 | } 50 | } 51 | 52 | for (j <- 0 until width) { 53 | for (i <- 1 until height) { 54 | pe(i)(j).srcV := pe(i - 1)(j).dstV 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/example/firefly16_pe.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | import xilinx.DSP48E2IntArithmetic.dualCascade.int12_ws_AB_C_P 5 | 6 | import scala.language.postfixOps 7 | 8 | class firefly16_pe(isLastPEHorizontal: Boolean = false) extends Component { 9 | 10 | val length = 16 11 | 12 | case class inpBdl() extends Bundle { 13 | val a = Vec(Bits(8 bits), 4) 14 | val b = Vec(Bits(8 bits), 4) 15 | val enPrefetch = in Bool() 16 | val enFetch = in Bool() 17 | val clrPrefetch = in Vec(Bool(), length) 18 | } 19 | 20 | case class outBdl() extends Bundle { 21 | val out = Vec(Bits(12 bits), 4) 22 | } 23 | 24 | case class horizBdl() extends Bundle { 25 | val aSel = in Vec(Bits(1 bits), length) 26 | val bSel = in Vec(Bits(1 bits), length) 27 | } 28 | 29 | val inp = new inpBdl().asInput() 30 | val out = new outBdl().asOutput() 31 | val srcH = new horizBdl().asInput() 32 | val dstH = if (!isLastPEHorizontal) new horizBdl().asOutput() else null 33 | 34 | val srcHDly = RegNext(srcH) 35 | if (!isLastPEHorizontal) dstH := srcHDly 36 | 37 | val chain = new int12_ws_ab_c_p_clb(length, 8) 38 | 39 | chain.io.enPrefetch := inp.enPrefetch 40 | chain.io.enFetch := inp.enFetch 41 | chain.io.clrPrefetch := inp.clrPrefetch 42 | chain.io.a := inp.a 43 | chain.io.b := inp.b 44 | 45 | chain.io.aSel := srcHDly.aSel 46 | chain.io.bSel := srcHDly.bSel 47 | 48 | out.out := chain.io.ab 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/int16_mul.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int16_mul() extends Component { 10 | val io = new Bundle { 11 | val a = in Bits (16 bits) 12 | val b = in Bits (16 bits) 13 | val ab = out Bits (32 bits) 14 | } 15 | 16 | import DSP48E2ConfigMode._ 17 | import DSP48E2ConfigABCD._ 18 | import DSP48E2ConfigWXYZ._ 19 | 20 | val latency = 4 21 | val attr = new DSP48E2Attributes 22 | 23 | set_mul_attr(attr) 24 | inmode.set_static_inmode_attr(attr) 25 | opmode.set_static_opmode_attr(attr) 26 | a.set_static_a_input_attr(attr,2) 27 | b.set_static_b_input_attr(attr,2) 28 | c.set_mute_c_attr(attr) 29 | d.set_mute_d_attr(attr) 30 | 31 | val dsp = new DSP48E2(attr) 32 | 33 | inmode.assign_static_inmode_ctrl(dsp) 34 | inmode.assign_default(dsp) 35 | opmode.assign_static_opmode_ctrl(dsp) 36 | w.w_sel_c(dsp,False) 37 | x.x_sel_m(dsp,True) 38 | y.y_sel_m(dsp,True) 39 | z.z_sel_p(dsp,False) 40 | a.assign_static_a_input_ctrl(dsp,2) 41 | b.assign_static_b_input_ctrl(dsp,2) 42 | c.assign_mute_c_ctrl(dsp) 43 | d.assign_mute_d_ctrl(dsp) 44 | 45 | assign_m_ctrl(dsp, ce = True, rst = False) 46 | assign_p_ctrl(dsp, ce = True, rst = False) 47 | 48 | dsp.DATAIN.A := io.b.asSInt.resize(30).asBits 49 | dsp.DATAIN.B := io.a.asSInt.resize(18).asBits 50 | io.ab := dsp.DATAOUT.P.take(32) 51 | } -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/int12_xadd.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int12_xadd extends Component { 10 | 11 | val io = new Bundle { 12 | val aSel = in Bits (1 bits) 13 | val bSel = in Bits (1 bits) 14 | val a = in Vec(Bits(12 bits), 4) 15 | val b = in Vec(Bits(12 bits), 4) 16 | val ab = out Vec(Bits(12 bits), 4) 17 | } 18 | 19 | import DSP48E2ConfigMode._ 20 | import DSP48E2ConfigABCD._ 21 | import DSP48E2ConfigWXYZ._ 22 | 23 | val latency = 2 24 | val attr = new DSP48E2Attributes 25 | 26 | set_alu_attr(attr, simd = 4) 27 | inmode.set_static_inmode_attr(attr) 28 | opmode.set_dynamic_opmode_attr(attr) 29 | ab_concat.set_ab_concat_attr(attr) 30 | c.set_c_input_attr(attr) 31 | d.set_mute_d_attr(attr) 32 | 33 | val dsp = new DSP48E2(attr) 34 | 35 | inmode.assign_static_inmode_ctrl(dsp) 36 | inmode.assign_default(dsp) 37 | opmode.assign_dynamic_opmode_ctrl(dsp) 38 | w.w_sel_c(dsp, io.bSel.msb) 39 | x.x_sel_ab(dsp, io.aSel.msb) 40 | y.y_sel_c(dsp, False) 41 | z.z_sel_p(dsp, False) 42 | ab_concat.assign_ab_concat_ctrl(dsp) 43 | c.assign_c_input_ctrl(dsp) 44 | d.assign_mute_d_ctrl(dsp) 45 | 46 | assign_m_ctrl(dsp) 47 | assign_p_ctrl(dsp, ce = True, rst = False) 48 | 49 | val AB = io.a.asBits 50 | dsp.DATAIN.A := AB.drop(18) 51 | dsp.DATAIN.B := AB.take(18) 52 | dsp.DATAIN.C := io.b.asBits 53 | 54 | val P = dsp.DATAOUT.P 55 | io.ab.assignFromBits(P) 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/int24_acc.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int24_acc() extends Component { 10 | val io = new Bundle { 11 | val a = in Bits (24 bits) 12 | val b = in Bits (24 bits) 13 | val c = in Bits (24 bits) 14 | val d = in Bits (24 bits) 15 | 16 | val ab = out Bits (24 bits) 17 | val cd = out Bits (24 bits) 18 | 19 | val valid = in Bool() 20 | val last = in Bool() 21 | } 22 | 23 | import DSP48E2ConfigMode._ 24 | import DSP48E2ConfigABCD._ 25 | import DSP48E2ConfigWXYZ._ 26 | 27 | val accValid = Bool().setAsReg().init(False) 28 | accValid.setWhen(io.valid).clearWhen(io.last) 29 | 30 | val latency = 2 31 | val attr = new DSP48E2Attributes 32 | 33 | set_alu_attr(attr,simd = 2) 34 | inmode.set_static_inmode_attr(attr) 35 | opmode.set_dynamic_opmode_attr(attr) 36 | ab_concat.set_ab_concat_attr(attr) 37 | c.set_c_input_attr(attr) 38 | d.set_mute_d_attr(attr) 39 | 40 | val dsp = new DSP48E2(attr) 41 | 42 | inmode.assign_static_inmode_ctrl(dsp) 43 | inmode.assign_default(dsp) 44 | opmode.assign_dynamic_opmode_ctrl(dsp) 45 | w.w_sel_c(dsp, io.valid) 46 | x.x_sel_ab(dsp, io.valid) 47 | y.y_sel_c(dsp, False) 48 | z.z_sel_p(dsp, accValid) 49 | ab_concat.assign_ab_concat_ctrl(dsp) 50 | c.assign_c_input_ctrl(dsp) 51 | d.assign_mute_d_ctrl(dsp) 52 | 53 | assign_m_ctrl(dsp) 54 | assign_p_ctrl(dsp, ce = True, rst = False) 55 | 56 | val ac = io.a ## io.c 57 | val bd = io.b ## io.d 58 | dsp.DATAIN.A := ac.drop(18) 59 | dsp.DATAIN.B := ac.take(18) 60 | dsp.DATAIN.C := bd 61 | 62 | io.ab := dsp.DATAOUT.P.drop(24) 63 | io.cd := dsp.DATAOUT.P.take(24) 64 | } 65 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/int8_mul.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int8_mul extends Component { 10 | val io = new Bundle { 11 | val a = in Bits (8 bits) 12 | val b = in Bits (8 bits) 13 | val c = in Bits (8 bits) 14 | val ab = out Bits (16 bits) 15 | val ac = out Bits (16 bits) 16 | } 17 | 18 | import DSP48E2ConfigMode._ 19 | import DSP48E2ConfigABCD._ 20 | import DSP48E2ConfigWXYZ._ 21 | 22 | val latency = 4 23 | val attr = new DSP48E2Attributes 24 | 25 | set_mul_attr(attr) 26 | inmode.set_static_inmode_attr(attr) 27 | opmode.set_static_opmode_attr(attr) 28 | ad_pack.set_ad_pack_attr(attr) 29 | ad_pack.set_static_b_attr(attr) 30 | c.set_c_input_attr(attr) 31 | 32 | val dsp = new DSP48E2(attr) 33 | 34 | inmode.assign_static_inmode_ctrl(dsp) 35 | inmode.assign_default(dsp) 36 | opmode.assign_static_opmode_ctrl(dsp) 37 | w.w_sel_c(dsp, True) 38 | x.x_sel_m(dsp, True) 39 | y.y_sel_m(dsp, True) 40 | z.z_sel_p(dsp, False) 41 | ad_pack.assign_ad_pack_ctrl(dsp) 42 | ad_pack.assign_static_b_ctrl(dsp) 43 | c.assign_c_input_ctrl(dsp) 44 | 45 | assign_m_ctrl(dsp, ce = True, rst = False) 46 | assign_p_ctrl(dsp, ce = True, rst = False) 47 | 48 | val abNeg = io.a.orR & io.b.orR & (io.a.msb ^ io.b.msb) 49 | val abNegReg = Delay(abNeg, 2) 50 | dsp.DATAIN.A := io.b.asSInt.resize(30).asBits 51 | dsp.DATAIN.B := io.a.asSInt.resize(18).asBits 52 | dsp.DATAIN.D := io.c.asSInt.expand ## B(27 - 9 bits, default -> false) 53 | dsp.DATAIN.C := B(29 bits, default -> false) ## abNegReg ## B(18 bits, default -> false) 54 | 55 | val P = dsp.DATAOUT.P 56 | io.ab := P.take(16) 57 | io.ac := P.drop(18).take(16) 58 | } 59 | -------------------------------------------------------------------------------- /src/test/scala/eval_int8_mul.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.int8_mul 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int8_mul extends App { 12 | 13 | // simulate 14 | 15 | val length = 32 16 | val a = Array.fill(length)(Random.nextInt(256) - 128) 17 | val b = Array.fill(length)(Random.nextInt(256) - 128) 18 | val c = Array.fill(length)(Random.nextInt(256) - 128) 19 | val ab = (a, b).zipped.map(_ * _) 20 | val ac = (a, c).zipped.map(_ * _) 21 | 22 | SimConfig.withFstWave 23 | .addRtl("data/sim/DSP48E2.v") 24 | .compile(new int8_mul) 25 | .doSim { dut => 26 | import dut._ 27 | 28 | io.a #= 0 29 | io.b #= 0 30 | io.c #= 0 31 | 32 | clockDomain.forkStimulus(10) 33 | clockDomain.waitSampling(32) 34 | 35 | for (i <- 0 until length + latency) { 36 | if (i < length) { 37 | io.a #= a(i) & 0xff 38 | io.b #= b(i) & 0xff 39 | io.c #= c(i) & 0xff 40 | } 41 | else { 42 | io.a #= 0 43 | io.b #= 0 44 | io.c #= 0 45 | } 46 | if (i > latency) { 47 | val index = i - latency - 1 48 | assert( 49 | io.ab.toInt == (ab(index) & 0xffff), 50 | s"ab ${a(index)}, ${b(index)}, ${c(index)}, ${(io.ab.toInt << 16) >> 16}, ${ab(index)}") 51 | assert( 52 | io.ac.toInt == (ac(index) & 0xffff), 53 | s"ac ${a(index)}, ${b(index)}, ${c(index)}, ${(io.ac.toInt << 16) >> 16}, ${ac(index)}") 54 | } 55 | clockDomain.waitSampling(1) 56 | } 57 | io.a #= 0 58 | io.b #= 0 59 | io.c #= 0 60 | 61 | clockDomain.waitSampling(32) 62 | simSuccess() 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/scala/util/BarrelShifter.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import spinal.core.sim._ 6 | 7 | import scala.language.postfixOps 8 | import scala.util.Random 9 | 10 | object BarrelShifter { 11 | 12 | def right(x: Bits, shift: UInt) = { 13 | require(shift.getWidth == log2Up(x.getWidth)) 14 | val xExt = x ## x 15 | val xShift = xExt >> shift 16 | xShift.takeLow(x.getWidth) 17 | } 18 | 19 | def left(x: Bits, shift: UInt) = { 20 | require(shift.getWidth == log2Up(x.getWidth)) 21 | val xExt = x ## x 22 | val xShift = xExt << shift 23 | xShift.dropLow(x.getWidth) 24 | } 25 | 26 | def rightT[T <: Data](x: Vec[T], shift: UInt) = { 27 | require(shift.getWidth == log2Up(x.length)) 28 | val xBin = x.map(_.asBits.asBools).transpose 29 | val xShift = xBin.map(v => right(v.asBits, shift).asBools).transpose 30 | val ret = cloneOf(x) 31 | (ret, xShift).zipped.foreach((dst, src) => dst.assignFromBits(src.asBits)) 32 | ret 33 | } 34 | 35 | def leftT[T <: Data](x: Vec[T], shift: UInt) = { 36 | require(shift.getWidth == log2Up(x.length)) 37 | val xBin = x.map(_.asBits.asBools).transpose 38 | val xShift = xBin.map(v => left(v.asBits, shift).asBools).transpose 39 | val ret = cloneOf(x) 40 | (ret, xShift).zipped.foreach((dst, src) => dst.assignFromBits(src.asBits)) 41 | ret 42 | } 43 | 44 | def main(args: Array[String]): Unit = { 45 | // SpinalVerilog(new Component { 46 | // val shift = in UInt (3 bits) 47 | // val x = in Vec(UInt(64 bits), 8) 48 | // val y = out Vec(UInt(64 bits), 8) 49 | // y := rightT(x, shift) 50 | // }) 51 | 52 | SpinalVerilog(new Component { 53 | val shift = in Vec(UInt(3 bits), 8) 54 | val x = in Vec(UInt(64 bits), 8) 55 | val y = out Vec(UInt(64 bits), 8) 56 | y := Vec(shift.map(sel => x(sel))) 57 | }) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/scala/example/b1024_sa.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | 5 | import scala.language.postfixOps 6 | 7 | class b1024_sa( 8 | clkx1: ClockDomain, 9 | length: Int, 10 | width: Int, 11 | height: Int 12 | ) extends Component { 13 | 14 | val pe = for (i <- 0 until height) yield { 15 | for (j <- 0 until width) yield { 16 | new b1024_pe( 17 | clkx1 = clkx1, 18 | length = length, 19 | isLastPEVertical = i == height - 1, 20 | isLastPEHorizontal = j == width - 1 21 | ) 22 | } 23 | } 24 | 25 | val inp = for (i <- 0 until height) yield { 26 | for (j <- 0 until width) yield { 27 | pe(i)(j).inp.toIo() 28 | } 29 | } 30 | 31 | val out = for (i <- 0 until height) yield { 32 | for (j <- 0 until width) yield { 33 | pe(i)(j).out.toIo() 34 | } 35 | } 36 | 37 | val srcH = for (i <- 0 until height) yield { 38 | pe(i)(0).srcH.toIo() 39 | } 40 | 41 | // val srcV = for (j <- 0 until width) yield { 42 | // pe(0)(j).srcV.toIo() 43 | // } 44 | 45 | val srcV0 = for (j <- 0 until width) yield { 46 | cloneOf(pe(0)(j).srcV).asInput() 47 | } 48 | 49 | val srcV1 = for (j <- 0 until width) yield { 50 | cloneOf(pe(0)(j).srcV).asInput() 51 | } 52 | 53 | val muxSignal = in Vec(Bool(), width) 54 | for (j <- 0 until width) { 55 | pe(0)(j).srcV := Mux(muxSignal(j), srcV0(j), srcV1(j)) 56 | } 57 | 58 | for (i <- 0 until height) { 59 | for (j <- 1 until width) { 60 | pe(i)(j).srcH := pe(i)(j - 1).dstH 61 | } 62 | } 63 | 64 | for (j <- 0 until width) { 65 | for (i <- 1 until height) { 66 | pe(i)(j).srcV := pe(i - 1)(j).dstV 67 | } 68 | } 69 | } 70 | 71 | object sa extends App { 72 | SpinalVerilog(new b1024_sa(ClockDomain.external("slow"), 4, 4, 4)) 73 | SpinalVerilog(new ehb1024_sa(ClockDomain.external("fast"), 4, 4, 4)) 74 | } -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/uint4_mul.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class uint4_mul() extends Component { 10 | val io = new Bundle { 11 | val w1 = in Bits (4 bits) 12 | val w2 = in Bits (4 bits) 13 | val a1 = in Bits (4 bits) 14 | val a2 = in Bits (4 bits) 15 | val a1w1 = out Bits (8 bits) 16 | val a1w2 = out Bits (8 bits) 17 | val a2w1 = out Bits (8 bits) 18 | val a2w2 = out Bits (8 bits) 19 | } 20 | 21 | import DSP48E2ConfigMode._ 22 | import DSP48E2ConfigABCD._ 23 | import DSP48E2ConfigWXYZ._ 24 | 25 | val latency = 4 26 | val attr = new DSP48E2Attributes 27 | 28 | set_mul_attr(attr) 29 | inmode.set_static_inmode_attr(attr) 30 | opmode.set_static_opmode_attr(attr) 31 | ad_pack.set_ad_pack_attr(attr) 32 | ad_pack.set_static_b_attr(attr) 33 | c.set_mute_c_attr(attr) 34 | 35 | val dsp = new DSP48E2(attr) 36 | 37 | inmode.assign_static_inmode_ctrl(dsp) 38 | inmode.assign_default(dsp) 39 | opmode.assign_static_opmode_ctrl(dsp) 40 | w.w_sel_c(dsp, False) 41 | x.x_sel_m(dsp, True) 42 | y.y_sel_m(dsp, True) 43 | z.z_sel_p(dsp, False) 44 | ad_pack.assign_ad_pack_ctrl(dsp) 45 | ad_pack.assign_static_b_ctrl(dsp) 46 | c.assign_mute_c_ctrl(dsp) 47 | 48 | assign_m_ctrl(dsp, ce = True, rst = False) 49 | assign_p_ctrl(dsp, ce = True, rst = False) 50 | 51 | dsp.DATAIN.A := io.w1.asSInt.resize(30).asBits 52 | dsp.DATAIN.B := B"000" ## io.a2 ## B"0000000" ## io.a1 53 | dsp.DATAIN.D := io.w2.msb ## io.w2 ## B(22 bits, default -> False) 54 | 55 | val P = dsp.DATAOUT.P 56 | 57 | val sa1w1 = P(7 downto 0).asSInt 58 | val sa2w1 = P(18 downto 11).asSInt 59 | val sa1w2 = P(29 downto 22).asSInt 60 | val sa2w2 = P(40 downto 33).asSInt 61 | 62 | io.a1w1 := sa1w1.asBits 63 | io.a2w1 := sa2w1.asBits 64 | io.a1w2 := sa1w2.asBits 65 | io.a2w2 := sa2w2.asBits 66 | } -------------------------------------------------------------------------------- /src/test/scala/eval_int16_dotp_ddr.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.cascade.int16_dotp_ddr 7 | 8 | import scala.collection.mutable.ArrayBuffer 9 | import scala.language.postfixOps 10 | import scala.util.Random 11 | 12 | object eval_int16_dotp_ddr extends App { 13 | 14 | val sampleLength = 32 15 | val vecLength = 8 16 | 17 | SpinalVerilog(new int16_dotp_ddr(vecLength)) 18 | 19 | val a = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 20 | val b = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 21 | 22 | val res = ArrayBuffer[Int]() 23 | for (s <- 0 until sampleLength / 2) { 24 | val ac = (a(s * 2), b(s * 2)).zipped.map(_ * _).sum 25 | val ad = (a(s * 2 + 1), b(s * 2)).zipped.map(_ * _).sum 26 | val bc = (a(s * 2), b(s * 2 + 1)).zipped.map(_ * _).sum 27 | val bd = (a(s * 2 + 1), b(s * 2 + 1)).zipped.map(_ * _).sum 28 | println(ac, bc, ad, bd) 29 | } 30 | 31 | SimConfig.withFstWave 32 | .addRtl("data/sim/DSP48E2.v") 33 | .compile(new int16_dotp_ddr(vecLength)) 34 | .doSim { dut => 35 | import dut._ 36 | 37 | io.a.foreach(_ #= 0) 38 | io.b.foreach(_ #= 0) 39 | 40 | clockDomain.forkStimulus(10) 41 | clockDomain.waitSampling(32) 42 | 43 | def check() = { 44 | for (j <- 0 until vecLength) { 45 | fork { 46 | clockDomain.waitSampling(j) 47 | for (i <- 0 until sampleLength) { 48 | io.a(j) #= a(i)(j) & 0xffff 49 | io.b(j) #= b(i)(j) & 0xffff 50 | clockDomain.waitSampling(2) 51 | } 52 | } 53 | } 54 | } 55 | 56 | for (p <- 0 until 1) { 57 | fork { 58 | check() 59 | } 60 | clockDomain.waitSampling(sampleLength) 61 | } 62 | 63 | clockDomain.waitSampling(32) 64 | simSuccess() 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/scala/eval_int8_dotp.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.cascade.int8_dotp 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int8_dotp extends App { 12 | 13 | val sampleLength = 32 14 | val vecLength = 8 15 | 16 | val a = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(128))) 17 | val b = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(128))) 18 | val c = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(128))) 19 | val ab = (a, b).zipped.map((a, b) => (a, b).zipped.map(_ * _).sum) 20 | val ac = (a, c).zipped.map((a, c) => (a, c).zipped.map(_ * _).sum) 21 | 22 | SimConfig.withFstWave 23 | .addRtl("data/sim/DSP48E2.v") 24 | .compile(new int8_dotp(vecLength)) 25 | .doSim { dut => 26 | import dut._ 27 | 28 | io.a.foreach(_ #= 0) 29 | io.b.foreach(_ #= 0) 30 | io.c.foreach(_ #= 0) 31 | 32 | clockDomain.forkStimulus(10) 33 | clockDomain.waitSampling(32) 34 | 35 | for (i <- 0 until sampleLength + latency) { 36 | for (j <- 0 until vecLength) { 37 | if (j <= i && i < sampleLength + j) { 38 | io.a(j) #= a(i - j)(j) & 0xff 39 | io.b(j) #= b(i - j)(j) & 0xff 40 | io.c(j) #= c(i - j)(j) & 0xff 41 | } 42 | else { 43 | io.a(j) #= 0 44 | io.b(j) #= 0 45 | io.c(j) #= 0 46 | } 47 | } 48 | if (i > latency) { 49 | val index = i - latency - 1 50 | assert(io.ab.toInt == (ab(index) & 0x3ffff)) 51 | assert(io.ac.toInt == (ac(index) & 0x3ffff)) 52 | } 53 | clockDomain.waitSampling(1) 54 | } 55 | 56 | io.a.foreach(_ #= 0) 57 | io.b.foreach(_ #= 0) 58 | io.c.foreach(_ #= 0) 59 | clockDomain.waitSampling(32) 60 | 61 | simSuccess() 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/test/scala/eval_uint4_mul.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.uint4_mul 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_uint4_mul extends App { 12 | 13 | val sampleLength = 32 14 | val a1 = Array.fill(sampleLength)(Random.nextInt(16)) 15 | val a2 = Array.fill(sampleLength)(Random.nextInt(16)) 16 | val w1 = Array.fill(sampleLength)(Random.nextInt(16) - 8) 17 | val w2 = Array.fill(sampleLength)(Random.nextInt(16) - 8) 18 | 19 | val a1w1 = (a1, w1).zipped.map(_ * _) 20 | val a1w2 = (a1, w2).zipped.map(_ * _) 21 | val a2w1 = (a2, w1).zipped.map(_ * _) 22 | val a2w2 = (a2, w2).zipped.map(_ * _) 23 | 24 | def bits2int(src: Int) = { 25 | (src << (32 - 8) >> (32 - 8)) 26 | } 27 | 28 | SimConfig.withFstWave 29 | .addRtl("data/sim/DSP48E2.v") 30 | .compile(new uint4_mul) 31 | .doSim { dut => 32 | import dut._ 33 | 34 | io.a1 #= 0 35 | io.a2 #= 0 36 | io.w1 #= 0 37 | io.w2 #= 0 38 | 39 | clockDomain.forkStimulus(10) 40 | clockDomain.waitSampling(32) 41 | 42 | for (i <- 0 until sampleLength + latency) { 43 | if (i < sampleLength) { 44 | io.a1 #= a1(i) & 0xf 45 | io.a2 #= a2(i) & 0xf 46 | io.w1 #= w1(i) & 0xf 47 | io.w2 #= w2(i) & 0xf 48 | } 49 | else { 50 | io.a1 #= 0 51 | io.a2 #= 0 52 | io.w1 #= 0 53 | io.w2 #= 0 54 | } 55 | if (i > latency) { 56 | val index = i - latency - 1 57 | println(bits2int(io.a1w1.toInt), a1w1(index)) 58 | println(bits2int(io.a2w1.toInt), a2w1(index)) 59 | println(bits2int(io.a1w2.toInt), a1w2(index)) 60 | println(bits2int(io.a2w2.toInt), a2w2(index)) 61 | } 62 | clockDomain.waitSampling() 63 | } 64 | 65 | clockDomain.waitSampling(32) 66 | simSuccess() 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/scala/eval_int24_acc_scale.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.int24_acc_scale 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int24_acc_scale extends App { 12 | 13 | SpinalVerilog(new int24_acc_scale) 14 | 15 | val pass = 4 16 | val sample = 32 17 | val scale = 9 18 | val a = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 19 | val b = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 20 | val aAcc = a.map(_.sum) 21 | val bAcc = b.map(_.sum) 22 | val aAccScale = aAcc.map(_ * scale) 23 | val bAccScale = bAcc.map(_ * scale) 24 | 25 | SimConfig.withFstWave 26 | .addRtl("data/sim/DSP48E2.v") 27 | .compile(new int24_acc_scale) 28 | .doSim { dut => 29 | import dut._ 30 | 31 | io.a #= 0 32 | io.b #= 0 33 | io.scale #= scale 34 | io.valid #= false 35 | io.last #= false 36 | 37 | clockDomain.forkStimulus(10) 38 | clockDomain.waitSampling(32) 39 | 40 | def check(p: Int) = { 41 | for (i <- 0 until sample + accLatency + 1) { 42 | if (i < sample) { 43 | io.a #= a(p)(i) & 0xffffff 44 | io.b #= b(p)(i) & 0xffffff 45 | } 46 | if (i == sample + accLatency) { 47 | println(aAcc(p), (io.aAcc.toInt << 8) >> 8) 48 | println(bAcc(p), (io.bAcc.toInt << 8) >> 8) 49 | } 50 | 51 | if (i == 0) io.valid #= true 52 | if (i == sample - 1) io.last #= true 53 | if (i == sample) { 54 | io.valid #= false 55 | io.last #= false 56 | } 57 | 58 | clockDomain.waitSampling(1) 59 | } 60 | } 61 | 62 | for (p <- 0 until pass) { 63 | fork { 64 | check(p) 65 | } 66 | clockDomain.waitSampling(sample + 5) 67 | } 68 | 69 | clockDomain.waitSampling(1024) 70 | simSuccess() 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/test/scala/eval_int16_os_B_P.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.dualCascade.int16_os_B_P 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int16_os_B_P extends App { 12 | 13 | SpinalVerilog(new int16_os_B_P(8)) 14 | 15 | val accLength = 16 16 | val vecLength = 4 17 | val a = Array.fill(accLength)(Random.nextInt(256) - 128) 18 | val b = Array.fill(vecLength)(Array.fill(accLength)(Random.nextInt(256) - 128)) 19 | val ab = for (v <- 0 until vecLength) yield { 20 | (a, b(v)).zipped.map(_ * _).sum 21 | } 22 | 23 | println(a.mkString(", ")) 24 | println(ab.mkString(", ")) 25 | 26 | SimConfig.withFstWave 27 | .addRtl("data/sim/DSP48E2.v") 28 | .compile(new int16_os_B_P(vecLength)) 29 | .doSimUntilVoid { dut => 30 | import dut._ 31 | 32 | io.a #= 0 33 | io.b.foreach(_ #= 0) 34 | io.accValid #= false 35 | io.accLast #= false 36 | 37 | clockDomain.forkStimulus(10) 38 | clockDomain.waitSampling(32) 39 | 40 | def check() = { 41 | for (i <- 0 until accLength + latency + vecLength) { 42 | if (i < accLength) { 43 | io.a #= a(i) & 0xffff 44 | io.accValid #= true 45 | } 46 | if (i == accLength - 1) io.accLast #= true 47 | if (i == accLength) { 48 | io.accValid #= false 49 | io.accLast #= false 50 | } 51 | for (j <- 0 until vecLength) { 52 | if (j <= i && i < accLength + j) 53 | io.b(j) #= b(j)(i - j) & 0xffff 54 | else io.b(j) #= 0 55 | } 56 | if (i >= accLength + latency) { 57 | assert(io.ab.toBigInt.toInt == ab.reverse(i - accLength - latency)) 58 | } 59 | clockDomain.waitSampling(1) 60 | } 61 | } 62 | 63 | for (p <- 0 until 4) { 64 | fork { 65 | check() 66 | } 67 | clockDomain.waitSampling(accLength + vecLength) 68 | } 69 | 70 | clockDomain.waitSampling(128) 71 | simSuccess() 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/test/scala/eval_int24_acc.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.int24_acc 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int24_acc extends App { 12 | 13 | SpinalVerilog(new int24_acc) 14 | 15 | val pass = 4 16 | val sample = 32 17 | val a = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 18 | val b = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 19 | val c = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 20 | val d = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 21 | val ab = (a, b).zipped.map(_.sum + _.sum) 22 | val cd = (c, d).zipped.map(_.sum + _.sum) 23 | 24 | SimConfig.withFstWave 25 | .addRtl("data/sim/DSP48E2.v") 26 | .compile(new int24_acc) 27 | .doSim { dut => 28 | import dut._ 29 | 30 | io.a #= 0 31 | io.b #= 0 32 | io.c #= 0 33 | io.d #= 0 34 | io.valid #= false 35 | io.last #= false 36 | 37 | clockDomain.forkStimulus(10) 38 | clockDomain.waitSampling(32) 39 | 40 | def check(p: Int) = { 41 | for (i <- 0 until sample + latency + 1) { 42 | if (i < sample) { 43 | io.a #= a(p)(i) & 0xffffff 44 | io.b #= b(p)(i) & 0xffffff 45 | io.c #= c(p)(i) & 0xffffff 46 | io.d #= d(p)(i) & 0xffffff 47 | } 48 | if (i == sample + latency) { 49 | assert(ab(p) == (io.ab.toInt << 8) >> 8) 50 | assert(cd(p) == (io.cd.toInt << 8) >> 8) 51 | } 52 | 53 | if (i == 0) io.valid #= true 54 | if (i == sample - 1) io.last #= true 55 | if (i == sample) { 56 | io.valid #= false 57 | io.last #= false 58 | } 59 | 60 | clockDomain.waitSampling(1) 61 | } 62 | } 63 | 64 | for (p <- 0 until pass) { 65 | fork { 66 | check(p) 67 | } 68 | clockDomain.waitSampling(sample) 69 | } 70 | 71 | clockDomain.waitSampling(1024) 72 | simSuccess() 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/test/scala/eval_int12_xadd.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.int12_xadd 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int12_xadd extends App { 12 | 13 | // simulate 14 | 15 | val length = 32 16 | val aSel = Array.fill(length)(Random.nextInt(2)) 17 | val bSel = Array.fill(length)(Random.nextInt(2)) 18 | val a = Array.fill(length)(Array.fill(4)(Random.nextInt(4096) - 2048)) 19 | val b = Array.fill(length)(Array.fill(4)(Random.nextInt(4096) - 2048)) 20 | val ab = for (i <- 0 until length) yield { 21 | for (j <- 0 until 4) yield { 22 | aSel(i) * a(i)(j) + bSel(i) * b(i)(j) 23 | } 24 | } 25 | 26 | SimConfig.withFstWave 27 | .addRtl("data/sim/DSP48E2.v") 28 | .compile(new int12_xadd) 29 | .doSim { dut => 30 | import dut._ 31 | 32 | io.aSel #= 0 33 | io.bSel #= 0 34 | io.a.foreach(_ #= 0) 35 | io.b.foreach(_ #= 0) 36 | 37 | clockDomain.forkStimulus(10) 38 | clockDomain.waitSampling(32) 39 | 40 | for (i <- 0 until length + latency) { 41 | if (i < length) { 42 | io.aSel #= aSel(i) 43 | io.bSel #= bSel(i) 44 | io.a.zip(a(i)).foreach { case (pin, value) => pin #= value & 0xfff } 45 | io.b.zip(b(i)).foreach { case (pin, value) => pin #= value & 0xfff } 46 | } 47 | else { 48 | io.aSel #= 0 49 | io.bSel #= 0 50 | io.a.foreach(_ #= 0) 51 | io.b.foreach(_ #= 0) 52 | } 53 | if (i > latency) { 54 | val index = i - latency - 1 55 | 56 | for (t <- 0 until 4) { 57 | assert( 58 | io.ab(t).toInt == (ab(index)(t) & 0xfff), 59 | s"ab ${aSel(index)}, ${bSel(index)}, ${a(index)(t)}, ${b(index)(t)}, ${(io.ab(t).toInt << 20) >> 20}, ${ab(index)(t)}") 60 | } 61 | } 62 | clockDomain.waitSampling(1) 63 | } 64 | io.aSel #= 0 65 | io.bSel #= 0 66 | io.a.foreach(_ #= 0) 67 | io.b.foreach(_ #= 0) 68 | 69 | clockDomain.waitSampling(32) 70 | simSuccess() 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/test/scala/eval_ring_acc.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.standalone.ring_acc 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_ring_acc extends App { 12 | 13 | SpinalVerilog(new ring_acc) 14 | 15 | val pass = 4 16 | val sample = 32 17 | // val a = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 18 | // val b = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 19 | // val c = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 20 | // val d = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256) - 128)) 21 | // val ab = (a, b).zipped.map(_.sum + _.sum) 22 | // val cd = (c, d).zipped.map(_.sum + _.sum) 23 | 24 | val p3 = Array.fill(pass)(Array.fill(sample)(Random.nextInt(256))) 25 | val sum = Array.fill(pass)(Array.fill(4)(0)) 26 | 27 | for (p <- 0 until pass) { 28 | for (i <- 0 until sample) { 29 | sum(p)(i % 4) += p3(p)(i) 30 | } 31 | } 32 | 33 | SimConfig.withFstWave 34 | .addRtl("data/sim/DSP48E2.v") 35 | .compile(new ring_acc) 36 | .doSim { dut => 37 | import dut._ 38 | 39 | io.inVld #= false 40 | io.fbVld #= false 41 | io.p0 #= 0 42 | io.p1 #= 0 43 | io.p2 #= 0 44 | io.p3 #= 0 45 | io.biasEn #= false 46 | io.bias0 #= 0 47 | io.bias1 #= 0 48 | 49 | clockDomain.forkStimulus(10) 50 | clockDomain.waitSampling(32) 51 | 52 | def check(p: Int) = { 53 | for (i <- 0 until sample + latency + 1) { 54 | if (i < sample) { 55 | io.p3 #= p3(p)(i) & 0xffffff 56 | } 57 | 58 | if (i == 0) io.inVld #= true 59 | if (i == 4) io.fbVld #= true 60 | if (i == sample) { 61 | io.inVld #= false 62 | io.fbVld #= false 63 | } 64 | 65 | clockDomain.waitSampling(1) 66 | } 67 | } 68 | 69 | for (p <- 0 until pass) { 70 | fork { 71 | check(p) 72 | } 73 | clockDomain.waitSampling(sample) 74 | } 75 | 76 | clockDomain.waitSampling(1024) 77 | simSuccess() 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/cascade/int8_dotp.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.cascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int8_dotp(length: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Vec(Bits(8 bits), length) 13 | val b = in Vec(Bits(8 bits), length) 14 | val c = in Vec(Bits(8 bits), length) 15 | val ab = out Bits (18 bits) 16 | val ac = out Bits (18 bits) 17 | } 18 | 19 | import DSP48E2ConfigMode._ 20 | import DSP48E2ConfigABCD._ 21 | import DSP48E2ConfigWXYZ._ 22 | 23 | val latency = length + 4 - 1 24 | 25 | val attrs = Array.fill(length)(new DSP48E2Attributes) 26 | 27 | for (i <- 0 until length) { 28 | set_mul_attr(attrs(i)) 29 | inmode.set_static_inmode_attr(attrs(i)) 30 | opmode.set_static_opmode_attr(attrs(i)) 31 | ad_pack.set_ad_pack_attr(attrs(i)) 32 | ad_pack.set_static_b_attr(attrs(i)) 33 | c.set_mute_c_attr(attrs(i)) 34 | } 35 | 36 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 37 | 38 | for (i <- 0 until length) { 39 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 40 | inmode.assign_default(dsp48e2s(i)) 41 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 42 | w.w_sel_c(dsp48e2s(i), False) 43 | x.x_sel_m(dsp48e2s(i), True) 44 | y.y_sel_m(dsp48e2s(i), True) 45 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 46 | ad_pack.assign_ad_pack_ctrl(dsp48e2s(i)) 47 | ad_pack.assign_static_b_ctrl(dsp48e2s(i)) 48 | c.assign_mute_c_ctrl(dsp48e2s(i)) 49 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 50 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 51 | 52 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 53 | dsp48e2s(i).DATAIN.B := io.a(i).asSInt.resize(18).asBits 54 | dsp48e2s(i).DATAIN.D := io.c(i).asSInt.expand ## B(27 - 9 bits, default -> false) 55 | if (i != 0) dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 56 | } 57 | 58 | val P = dsp48e2s.last.DATAOUT.P 59 | val abRes = P(17 downto 0).asBits 60 | val abNeg = B"0" ## abRes.msb 61 | // val acRes = P(35 downto 18).asSInt + abNeg.asSInt 62 | val acRes = P(35 downto 18).asSInt 63 | 64 | io.ab := abRes 65 | io.ac := acRes.asBits 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/util/LoopsCntGen.scala: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | import scala.language.postfixOps 7 | 8 | object LoopsCntGen { 9 | 10 | def wireOvf(bound: List[UInt], enable: Bool) = { 11 | require(bound.nonEmpty) 12 | val cnt = bound.map(b => UInt(b.getWidth bits) setAsReg() init 0) 13 | val cntOvf = (cnt, bound).zipped.map(_ === _) 14 | when(enable)(cnt.head := cnt.head + 1) 15 | for (i <- 1 until bound.length) { 16 | when(enable && cntOvf.take(i).reduceLeft(_ && _)) { 17 | cnt(i - 1).clearAll() 18 | cnt(i) := cnt(i) + 1 19 | } 20 | } 21 | when(enable && cntOvf.reduceLeft(_ && _))(cnt.last.clearAll()) 22 | (cnt, cntOvf) 23 | } 24 | 25 | def regOvf(bound: List[UInt], enable: Bool) = { 26 | require(bound.nonEmpty) 27 | val cnt = bound.map(b => UInt(b.getWidth bits) setAsReg() init 0) 28 | 29 | val boundIsZero = bound.map(_ === 0) 30 | val boundIsZeroDly = boundIsZero.map(s => RegNext(s, init = False)) 31 | val boundSwitch = (boundIsZero, boundIsZeroDly).zipped.map(_ =/= _) 32 | 33 | val cntOvfLogic = (cnt, bound).zipped.map(_ === _ - 1) 34 | val cntOvf = Vec(Bool().setAsReg().init(False), bound.length) 35 | 36 | val inc = Vec(Bool(), bound.length + 1) 37 | inc.head := enable 38 | for (i <- 1 until inc.length) { 39 | inc(i) := enable && cntOvf.take(i).reduceLeft(_ && _) 40 | } 41 | 42 | for (i <- bound.indices) { 43 | cntOvf(i).clearWhen(inc(i)).setWhen(boundIsZero(i) || inc(i) & cntOvfLogic(i)) 44 | cntOvf(i).clearWhen(boundSwitch(i) & ~boundIsZero(i)) 45 | } 46 | 47 | when(inc.head)(cnt.head := cnt.head + 1) 48 | for (i <- 1 until bound.length) { 49 | when(inc(i)) { 50 | cnt(i - 1).clearAll() 51 | cnt(i) := cnt(i) + 1 52 | } 53 | } 54 | when(inc.last)(cnt.last.clearAll()) 55 | (cnt, cntOvf) 56 | } 57 | 58 | def constant(bound: List[Int], enable: Bool) = { 59 | require(bound.nonEmpty) 60 | val cnt = bound.map(b => Counter(b)) 61 | val cntOvf = cnt.map(_.willOverflowIfInc) 62 | when(enable)(cnt.head.increment()) 63 | for (i <- 1 until bound.length) { 64 | when(enable && cntOvf.take(i).reduceLeft(_ && _)) { 65 | cnt(i - 1).clear() 66 | cnt(i).increment() 67 | } 68 | } 69 | when(enable && cntOvf.reduceLeft(_ && _))(cnt.last.clearAll()) 70 | (cnt.map(_.value), cntOvf) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/int24_acc_scale.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int24_acc_scale() extends Component { 10 | val io = new Bundle { 11 | val a = in Bits (24 bits) 12 | val b = in Bits (24 bits) 13 | val scale = in Bits (18 bits) 14 | val valid = in Bool() 15 | val last = in Bool() 16 | 17 | val aAcc = out Bits (24 bits) 18 | val bAcc = out Bits (24 bits) 19 | val scaleRes = out Bits (48 bits) 20 | } 21 | 22 | import DSP48E2ConfigMode._ 23 | import DSP48E2ConfigABCD._ 24 | import DSP48E2ConfigWXYZ._ 25 | 26 | val accLatency = 2 27 | val scaleLatency = 5 28 | 29 | val accValid = Bool().setAsReg().init(False) 30 | accValid.setWhen(io.valid).clearWhen(io.last) 31 | 32 | val ADCe = Delay(io.last, accLatency, init = False) 33 | val multEnable = Delay(ADCe || Delay(ADCe, 1, init = False), 1, init = False) 34 | val muteA = ADCe // high active 35 | val muteD = Delay(~ADCe, 1, init = True) // low active 36 | 37 | val attr = new DSP48E2Attributes 38 | 39 | set_mul_attr(attr) 40 | inmode.set_dynamic_inmode_attr(attr) 41 | opmode.set_dynamic_opmode_attr(attr) 42 | ad_pack.set_ad_pack_attr(attr) 43 | ad_pack.set_static_b_attr(attr) 44 | c.set_c_input_attr(attr) 45 | 46 | val dsp = new DSP48E2(attr) 47 | 48 | inmode.assign_dynamic_inmode_ctrl(dsp) 49 | inmode.assign_inmode_gate_ab(dsp, muteA) 50 | inmode.assign_inmode_gate_d(dsp, muteD) 51 | inmode.assign_inmode_add_sub(dsp, False) 52 | opmode.assign_dynamic_opmode_ctrl(dsp) 53 | w.w_sel_rnd(dsp, io.b.msb & io.valid) 54 | x.x_sel_ab(dsp, multEnable) 55 | y.y_sel_c_over_m(dsp, io.valid) 56 | z.z_sel_p(dsp, accValid) 57 | ad_pack.assign_ad_pack_ctrl(dsp, use_ad = false, ce = ADCe) 58 | ad_pack.assign_static_b_ctrl(dsp) 59 | c.assign_c_input_ctrl(dsp) 60 | 61 | assign_m_ctrl(dsp, ce = True, rst = False) 62 | assign_p_ctrl(dsp, ce = True, rst = False) 63 | 64 | val ab = io.a ## io.b 65 | val aAccBits = dsp.DATAOUT.P.drop(24) 66 | val bAccBits = dsp.DATAOUT.P.take(24) 67 | 68 | dsp.DATAIN.A := aAccBits.asSInt.resize(30 bits).asBits 69 | dsp.DATAIN.D := bAccBits.asSInt.resize(27 bits).asBits 70 | dsp.DATAIN.B := io.scale 71 | dsp.DATAIN.C := ab 72 | 73 | io.scaleRes := dsp.DATAOUT.P 74 | io.aAcc := aAccBits 75 | io.bAcc := bAccBits 76 | } 77 | -------------------------------------------------------------------------------- /src/test/scala/eval_int16_dotp.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.cascade.int16_dotp 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int16_dotp extends App { 12 | 13 | val sampleLength = 32 14 | val vecLength = 8 15 | val acc = false 16 | 17 | val a = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 18 | val b = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 19 | val ab = for (s <- 0 until sampleLength) yield { 20 | (a(s), b(s)).zipped.map(_ * _).sum 21 | } 22 | 23 | val accRes = ab.sum 24 | 25 | SimConfig.withFstWave 26 | .addRtl("data/sim/DSP48E2.v") 27 | .compile(new int16_dotp(vecLength, acc)) 28 | .doSim { dut => 29 | import dut._ 30 | 31 | io.a.foreach(_ #= 0) 32 | io.b.foreach(_ #= 0) 33 | if (acc) { 34 | io.valid #= false 35 | io.last #= false 36 | } 37 | 38 | clockDomain.forkStimulus(10) 39 | clockDomain.waitSampling(32) 40 | 41 | def check() = { 42 | var sum = 0 43 | for (i <- 0 until sampleLength + latency) { 44 | for (j <- 0 until vecLength) { 45 | if (j <= i && i < sampleLength + j) { 46 | io.a(j) #= a(i - j)(j) & 0xffff 47 | io.b(j) #= b(i - j)(j) & 0xffff 48 | } 49 | } 50 | 51 | if (acc) { 52 | if (i == vecLength - 1) io.valid #= true 53 | if (i == sampleLength + vecLength - 1) io.valid #= false 54 | 55 | if (i == sampleLength + vecLength - 2) io.last #= true 56 | if (i == sampleLength + vecLength - 1) io.last #= false 57 | } 58 | 59 | if (i > latency) { 60 | val index = i - latency - 1 61 | sum += ab(index) 62 | if (acc) assert(sum == io.ab.toBigInt.toInt) 63 | else assert(ab(index) == io.ab.toBigInt.toInt) 64 | } 65 | clockDomain.waitSampling() 66 | } 67 | if (acc) { 68 | println(accRes, io.ab.toBigInt.toInt) 69 | } 70 | } 71 | 72 | for(p<-0 until 4){ 73 | fork{ 74 | check() 75 | } 76 | clockDomain.waitSampling(sampleLength) 77 | } 78 | 79 | clockDomain.waitSampling(32) 80 | simSuccess() 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/test/scala/eval_int8_dotp_ddr.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.cascade.int8_dotp_ddr 7 | 8 | import scala.collection.mutable.ArrayBuffer 9 | import scala.language.postfixOps 10 | import scala.util.Random 11 | 12 | object eval_int8_dotp_ddr extends App { 13 | 14 | val sampleLength = 32 15 | val vecLength = 8 16 | 17 | SpinalVerilog(new int8_dotp_ddr(vecLength)) 18 | 19 | val a = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 20 | val b = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 21 | val c = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 22 | 23 | val res = ArrayBuffer[Int]() 24 | for (s <- 0 until sampleLength / 2) { 25 | val ac = (a(s * 2), b(s * 2)).zipped.map(_ * _).sum 26 | val ad = (a(s * 2 + 1), b(s * 2)).zipped.map(_ * _).sum 27 | val bc = (a(s * 2), b(s * 2 + 1)).zipped.map(_ * _).sum 28 | val bd = (a(s * 2 + 1), b(s * 2 + 1)).zipped.map(_ * _).sum 29 | print(ac, bc, ad, bd) 30 | 31 | val ac_ = (a(s * 2), c(s * 2)).zipped.map(_ * _).sum 32 | val ad_ = (a(s * 2 + 1), c(s * 2)).zipped.map(_ * _).sum 33 | val bc_ = (a(s * 2), c(s * 2 + 1)).zipped.map(_ * _).sum 34 | val bd_ = (a(s * 2 + 1), c(s * 2 + 1)).zipped.map(_ * _).sum 35 | println(ac_, bc_, ad_, bd_) 36 | } 37 | 38 | SimConfig.withFstWave 39 | .addRtl("data/sim/DSP48E2.v") 40 | .compile(new int8_dotp_ddr(vecLength)) 41 | .doSim { dut => 42 | import dut._ 43 | 44 | io.a.foreach(_ #= 0) 45 | io.b.foreach(_ #= 0) 46 | io.c.foreach(_ #= 0) 47 | 48 | clockDomain.forkStimulus(10) 49 | clockDomain.waitSampling(32) 50 | 51 | def check() = { 52 | for (j <- 0 until vecLength) { 53 | fork { 54 | clockDomain.waitSampling(j) 55 | for (i <- 0 until sampleLength) { 56 | io.a(j) #= a(i)(j) & 0xff 57 | io.b(j) #= b(i)(j) & 0xff 58 | io.c(j) #= c(i)(j) & 0xff 59 | clockDomain.waitSampling(2) 60 | } 61 | } 62 | } 63 | } 64 | 65 | for (p <- 0 until 1) { 66 | fork { 67 | check() 68 | } 69 | clockDomain.waitSampling(sampleLength) 70 | } 71 | 72 | clockDomain.waitSampling(32) 73 | simSuccess() 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2/DSP48E2ConfigWXYZ.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2 2 | 3 | import spinal.core._ 4 | import scala.language.postfixOps 5 | 6 | object DSP48E2ConfigWXYZ { 7 | 8 | object opmode { 9 | def add_optional_rnd_attr(dsp: DSP48E2, rnd: BigInt) = { 10 | dsp.addGeneric("RND", B(rnd)) 11 | } 12 | def set_static_opmode_attr(attr: DSP48E2Attributes) = attr.OPMODEREG = 0 13 | 14 | def set_dynamic_opmode_attr(attr: DSP48E2Attributes) = attr.OPMODEREG = 1 15 | 16 | def assign_static_opmode_ctrl(dsp: DSP48E2): Unit = { 17 | dsp.CEs.CTRL.clear() 18 | dsp.RSTs.CTRL.clear() 19 | } 20 | 21 | def assign_dynamic_opmode_ctrl(dsp: DSP48E2): Unit = { 22 | dsp.CEs.CTRL.set() 23 | dsp.RSTs.CTRL.clear() 24 | } 25 | } 26 | 27 | object w { 28 | 29 | def assign_w_ctrl(dsp: DSP48E2, wSel: Bits) = dsp.INST.OPMODE(8 downto 7) := wSel 30 | 31 | def w_sel_p(dsp: DSP48E2, high4P: Bool) = assign_w_ctrl(dsp, B"0" ## high4P) 32 | 33 | def w_sel_rnd(dsp: DSP48E2, high4rnd: Bool) = assign_w_ctrl(dsp, high4rnd ## B"0") 34 | 35 | def w_sel_c(dsp: DSP48E2, high4c: Bool) = assign_w_ctrl(dsp, high4c ## high4c) 36 | } 37 | 38 | object x { 39 | def assign_x_ctrl(dsp: DSP48E2, xSel: Bits) = dsp.INST.OPMODE(1 downto 0) := xSel 40 | 41 | def x_sel_m(dsp: DSP48E2, high4m: Bool) = assign_x_ctrl(dsp, B"0" ## high4m) 42 | 43 | def x_sel_p(dsp: DSP48E2, high4p: Bool) = assign_x_ctrl(dsp, high4p ## B"0") 44 | 45 | def x_sel_ab(dsp: DSP48E2, high4ab: Bool) = assign_x_ctrl(dsp, high4ab ## high4ab) 46 | 47 | } 48 | 49 | object y { 50 | def assign_y_ctrl(dsp: DSP48E2, ySel: Bits) = dsp.INST.OPMODE(3 downto 2) := ySel 51 | 52 | def y_sel_m(dsp: DSP48E2, high4m: Bool) = assign_y_ctrl(dsp, B"0" ## high4m) 53 | 54 | def y_sel_ff(dsp: DSP48E2, high4ff: Bool) = assign_y_ctrl(dsp, high4ff ## B"0") 55 | 56 | def y_sel_c(dsp: DSP48E2, high4c: Bool) = assign_y_ctrl(dsp, high4c ## high4c) 57 | 58 | def y_sel_c_over_m(dsp: DSP48E2, high4c: Bool) = assign_y_ctrl(dsp, high4c ## B"1") 59 | } 60 | 61 | object z { 62 | def assign_z_ctrl(dsp: DSP48E2, zSel: Bits) = dsp.INST.OPMODE(6 downto 4) := zSel 63 | 64 | def z_sel_pcin(dsp: DSP48E2, high4pcin: Bool) = assign_z_ctrl(dsp, B"00" ## high4pcin) 65 | 66 | def z_sel_p(dsp: DSP48E2, high4p: Bool) = assign_z_ctrl(dsp, B"0" ## high4p ## B"0") 67 | 68 | def z_sel_c(dsp: DSP48E2, high4c: Bool) = assign_z_ctrl(dsp, B"0" ## high4c ## high4c) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/cascade/int16_dotp.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.cascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int16_dotp(length: Int, acc: Boolean = false) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Vec(Bits(16 bits), length) 13 | val b = in Vec(Bits(16 bits), length) 14 | val ab = out Bits (48 bits) 15 | 16 | val valid = if (acc) in Bool() else null 17 | val last = if (acc) in Bool() else null 18 | } 19 | 20 | import DSP48E2ConfigMode._ 21 | import DSP48E2ConfigABCD._ 22 | import DSP48E2ConfigWXYZ._ 23 | 24 | val latency = length + 4 - 1 25 | 26 | val valid = if (acc) Delay(io.valid, 2, init = False) else null 27 | val last = if (acc) Delay(io.last, 2, init = False) else null 28 | val lastDSPAccValid = if (acc) Bool().setAsReg().init(False) else null 29 | if (acc) { 30 | lastDSPAccValid.setWhen(valid).clearWhen(last) 31 | } 32 | 33 | 34 | val attrs = Array.fill(length)(new DSP48E2Attributes) 35 | 36 | for (i <- 0 until length) { 37 | set_mul_attr(attrs(i)) 38 | inmode.set_static_inmode_attr(attrs(i)) 39 | opmode.set_static_opmode_attr(attrs(i)) 40 | a.set_static_a_input_attr(attrs(i), 2) 41 | b.set_static_b_input_attr(attrs(i), 2) 42 | c.set_mute_c_attr(attrs(i)) 43 | d.set_mute_d_attr(attrs(i)) 44 | } 45 | 46 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 47 | 48 | for (i <- 0 until length) { 49 | val accCond = i == length - 1 && acc 50 | 51 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 52 | inmode.assign_default(dsp48e2s(i)) 53 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 54 | w.w_sel_p(dsp48e2s(i), if (accCond) lastDSPAccValid else False) 55 | x.x_sel_m(dsp48e2s(i), if (accCond) valid else True) 56 | y.y_sel_m(dsp48e2s(i), if (accCond) valid else True) 57 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) if (accCond) valid else True else False) 58 | a.assign_static_a_input_ctrl(dsp48e2s(i), 2) 59 | b.assign_static_b_input_ctrl(dsp48e2s(i), 2) 60 | c.assign_mute_c_ctrl(dsp48e2s(i)) 61 | d.assign_mute_d_ctrl(dsp48e2s(i)) 62 | 63 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 64 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 65 | 66 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 67 | dsp48e2s(i).DATAIN.B := io.a(i).asSInt.resize(18).asBits 68 | 69 | if (i != 0) dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 70 | 71 | } 72 | 73 | io.ab := dsp48e2s.last.DATAOUT.P 74 | } 75 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/cascade/int16_dotp_ddr.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.cascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int16_dotp_ddr(length: Int, acc: Boolean = false) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Vec(Bits(16 bits), length) 13 | val b = in Vec(Bits(16 bits), length) 14 | val ab = out Bits (48 bits) 15 | } 16 | 17 | import DSP48E2ConfigMode._ 18 | import DSP48E2ConfigABCD._ 19 | import DSP48E2ConfigWXYZ._ 20 | 21 | val latency = length + 4 - 1 22 | 23 | val attrs = Array.fill(length)(new DSP48E2Attributes) 24 | 25 | for (i <- 0 until length) { 26 | set_mul_attr(attrs(i)) 27 | inmode.set_dynamic_inmode_attr(attrs(i)) 28 | opmode.set_static_opmode_attr(attrs(i)) 29 | a.set_time_multiplex_a_input_attr(attrs(i)) 30 | b.set_static_b_input_attr(attrs(i), 2) 31 | c.set_mute_c_attr(attrs(i)) 32 | d.set_mute_d_attr(attrs(i)) 33 | } 34 | 35 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 36 | val ce1 = Vec(Bool(), length) 37 | val ce2 = Vec(Bool(), length) 38 | val selA = Vec(Bool(), length) 39 | 40 | val cnt = UInt(2 bits).setAsReg().init(0) 41 | cnt := cnt + 1 42 | ce1.head := RegNext(cnt === 3, init = False) 43 | ce2.head := RegNext(cnt === 1, init = False) 44 | 45 | selA.head.setAsReg().init(False) 46 | selA.head.toggleWhen(True) 47 | 48 | for (i <- 0 until length) { 49 | 50 | inmode.assign_dynamic_inmode_ctrl(dsp48e2s(i)) 51 | inmode.assign_default(dsp48e2s(i)) 52 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 53 | w.w_sel_p(dsp48e2s(i), False) 54 | x.x_sel_m(dsp48e2s(i), True) 55 | y.y_sel_m(dsp48e2s(i), True) 56 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 57 | a.assign_time_multiplex_a_input_ctrl(dsp48e2s(i), ce1(i), ce2(i), selA(i)) 58 | b.assign_static_b_input_ctrl(dsp48e2s(i),2) 59 | c.assign_mute_c_ctrl(dsp48e2s(i)) 60 | d.assign_mute_d_ctrl(dsp48e2s(i)) 61 | 62 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 63 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 64 | 65 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 66 | dsp48e2s(i).DATAIN.B := io.a(i).asSInt.resize(18).asBits 67 | 68 | if (i != 0) { 69 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 70 | ce1(i).setAsReg() 71 | ce2(i).setAsReg() 72 | selA(i).setAsReg() 73 | ce1(i) := ce1(i - 1) 74 | ce2(i) := ce2(i - 1) 75 | selA(i) := selA(i - 1) 76 | } 77 | } 78 | 79 | io.ab := dsp48e2s.last.DATAOUT.P 80 | } 81 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/cascade/int12_xdotp.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.cascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int12_xdotp(length: Int, acc: Boolean = false) extends Component { 10 | 11 | val io = new Bundle { 12 | val aSel = in Vec(Bits(1 bits), length) 13 | val bSel = in Vec(Bits(1 bits), length) 14 | val a = in Vec(Vec(Bits(12 bits), 4), length) 15 | val b = in Vec(Vec(Bits(12 bits), 4), length) 16 | val ab = out Vec(Bits(12 bits), 4) 17 | 18 | val valid = if (acc) in Bool() else null 19 | val last = if (acc) in Bool() else null 20 | } 21 | 22 | import DSP48E2ConfigMode._ 23 | import DSP48E2ConfigABCD._ 24 | import DSP48E2ConfigWXYZ._ 25 | 26 | val latency = length + 2 - 1 27 | 28 | val lastDSPAccValid = if (acc) Bool().setAsReg().init(False) else null 29 | if (acc) lastDSPAccValid.setWhen(io.valid).clearWhen(io.last) 30 | 31 | val attrs = Array.fill(length)(new DSP48E2Attributes) 32 | 33 | for (i <- 0 until length) { 34 | set_alu_attr(attrs(i), simd = 4) 35 | inmode.set_static_inmode_attr(attrs(i)) 36 | opmode.set_dynamic_opmode_attr(attrs(i)) 37 | ab_concat.set_ab_concat_attr(attrs(i)) 38 | c.set_c_input_attr(attrs(i)) 39 | d.set_mute_d_attr(attrs(i)) 40 | } 41 | 42 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 43 | 44 | for (i <- 0 until length) { 45 | val accCond = i == length - 1 && acc 46 | 47 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 48 | inmode.assign_default(dsp48e2s(i)) 49 | opmode.assign_dynamic_opmode_ctrl(dsp48e2s(i)) 50 | w.w_sel_p(dsp48e2s(i), if (accCond) lastDSPAccValid else False) 51 | x.x_sel_ab(dsp48e2s(i), io.aSel(i).msb) 52 | y.y_sel_c(dsp48e2s(i), io.bSel(i).msb) 53 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) if (accCond) io.valid else True else False) 54 | ab_concat.assign_ab_concat_ctrl(dsp48e2s(i)) 55 | c.assign_c_input_ctrl(dsp48e2s(i)) 56 | d.assign_mute_d_ctrl(dsp48e2s(i)) 57 | 58 | assign_m_ctrl(dsp48e2s(i)) 59 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 60 | 61 | val AB = io.a(i).asBits 62 | dsp48e2s(i).DATAIN.A := AB.drop(18) 63 | dsp48e2s(i).DATAIN.B := AB.take(18) 64 | dsp48e2s(i).DATAIN.C := io.b(i).asBits 65 | if (i != 0) dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 66 | 67 | if(accCond){ 68 | a.add_optional_valid_rst(dsp48e2s(i), io.valid) 69 | b.add_optional_valid_rst(dsp48e2s(i), io.valid) 70 | c.add_optional_valid_rst(dsp48e2s(i), io.valid) 71 | } 72 | } 73 | 74 | val P = dsp48e2s.last.DATAOUT.P 75 | io.ab.assignFromBits(P) 76 | } 77 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/dpuczdx8g_acc.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class dpuczdx8g_acc() extends Component { 10 | 11 | val io = new Bundle { 12 | val inVld = in Bool() 13 | val accVld = in Bool() 14 | val p0 = in Bits (18 bits) 15 | val p1 = in Bits (18 bits) 16 | val p2 = in Bits (18 bits) 17 | val p3 = in Bits (18 bits) 18 | 19 | val biasRst = in Bool() 20 | val bias0 = in Bits (26 bits) 21 | val bias1 = in Bits (26 bits) 22 | 23 | val out0 = out Bits (29 bits) 24 | val out1 = out Bits (29 bits) 25 | } 26 | 27 | import DSP48E2ConfigMode._ 28 | import DSP48E2ConfigABCD._ 29 | import DSP48E2ConfigWXYZ._ 30 | 31 | val p01 = io.p0.asSInt +^ io.p1.asSInt 32 | val p23 = io.p2.asSInt +^ io.p3.asSInt 33 | val p01Reg = RegNext(p01) 34 | val p23Reg = RegNext(p23) 35 | 36 | val attr = new DSP48E2Attributes 37 | set_mul_attr(attr) 38 | inmode.set_static_inmode_attr(attr) 39 | opmode.set_dynamic_opmode_attr(attr) 40 | ad_pack.set_ad_pack_attr(attr) 41 | ad_pack.set_static_b_attr(attr) 42 | c.set_mute_c_attr(attr) 43 | 44 | val dsp0 = new DSP48E2(attr) 45 | 46 | inmode.assign_static_inmode_ctrl(dsp0) 47 | inmode.assign_default(dsp0) 48 | opmode.assign_dynamic_opmode_ctrl(dsp0) 49 | w.w_sel_rnd(dsp0, False) 50 | x.x_sel_m(dsp0, io.inVld) 51 | y.y_sel_m(dsp0, io.inVld) 52 | z.z_sel_p(dsp0, io.accVld) 53 | ad_pack.assign_ad_pack_ctrl(dsp0) 54 | ad_pack.assign_static_b_ctrl(dsp0) 55 | c.assign_mute_c_ctrl(dsp0) 56 | 57 | assign_m_ctrl(dsp0, ce = True, rst = False) 58 | assign_p_ctrl(dsp0, ce = True, rst = False) 59 | 60 | dsp0.DATAIN.B := B(1,18 bits) 61 | dsp0.DATAIN.A := p01Reg.resize(30).asBits 62 | dsp0.DATAIN.D := io.bias0.asSInt.resize(27).asBits 63 | dsp0.RSTs.D := io.biasRst 64 | io.out0 := dsp0.DATAOUT.P.take(29) 65 | 66 | val dsp1 = new DSP48E2(attr) 67 | 68 | inmode.assign_static_inmode_ctrl(dsp1) 69 | inmode.assign_default(dsp1) 70 | opmode.assign_dynamic_opmode_ctrl(dsp1) 71 | w.w_sel_rnd(dsp1, False) 72 | x.x_sel_m(dsp1, io.inVld) 73 | y.y_sel_m(dsp1, io.inVld) 74 | z.z_sel_p(dsp1, io.accVld) 75 | ad_pack.assign_ad_pack_ctrl(dsp1) 76 | ad_pack.assign_static_b_ctrl(dsp1) 77 | c.assign_mute_c_ctrl(dsp1) 78 | 79 | assign_m_ctrl(dsp1, ce = True, rst = False) 80 | assign_p_ctrl(dsp1, ce = True, rst = False) 81 | 82 | dsp1.DATAIN.B := B(1,18 bits) 83 | dsp1.DATAIN.A := p23Reg.resize(30).asBits 84 | dsp1.DATAIN.D := io.bias1.asSInt.resize(27).asBits 85 | dsp1.RSTs.D := io.biasRst 86 | io.out1 := dsp1.DATAOUT.P.take(29) 87 | } 88 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/dualCascade/int16_ws_B_P.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.dualCascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int16_ws_B_P(length:Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Bits (16 bits) 13 | val b = in Vec(Bits(16 bits), length) 14 | val c = out Bits (48 bits) 15 | 16 | val enPrefetch = in Bool() 17 | val enFetch = in Bool() 18 | val clrPrefetch = in Vec(Bool(), length) 19 | } 20 | 21 | import DSP48E2ConfigMode._ 22 | import DSP48E2ConfigABCD._ 23 | import DSP48E2ConfigWXYZ._ 24 | 25 | val latency = length + 4 - 1 26 | 27 | val attrs = Array.fill(length)(new DSP48E2Attributes) 28 | 29 | for (i <- 0 until length) { 30 | set_mul_attr(attrs(i)) 31 | inmode.set_static_inmode_attr(attrs(i)) 32 | opmode.set_static_opmode_attr(attrs(i)) 33 | a.set_static_a_input_attr(attrs(i), 2) 34 | b.set_static_b_input_attr(attrs(i), 2) 35 | c.set_mute_c_attr(attrs(i)) 36 | d.set_mute_d_attr(attrs(i)) 37 | if (i != 0) b.set_b_cascade(attrs(i)) 38 | } 39 | 40 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 41 | val enPrefetchChain = Vec(Bool(), length) 42 | val enFetchChain = Vec(Bool(), length) 43 | 44 | for (i <- 0 until length) { 45 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 46 | inmode.assign_default(dsp48e2s(i)) 47 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 48 | w.w_sel_c(dsp48e2s(i), False) 49 | x.x_sel_m(dsp48e2s(i), True) 50 | y.y_sel_m(dsp48e2s(i), True) 51 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 52 | a.assign_static_a_input_ctrl(dsp48e2s(i), 2) 53 | b.assign_b_pingpong_ctrl(dsp48e2s(i), enPrefetchChain(i), enFetchChain(i)) 54 | c.assign_mute_c_ctrl(dsp48e2s(i)) 55 | d.assign_mute_d_ctrl(dsp48e2s(i)) 56 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 57 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 58 | 59 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 60 | if (i == 0) { 61 | dsp48e2s(i).DATAIN.B := io.a.asSInt.resize(18).asBits 62 | enPrefetchChain(i) := io.enPrefetch 63 | enFetchChain(i) := io.enFetch 64 | } 65 | else { 66 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 67 | dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 68 | enPrefetchChain(i).setAsReg() 69 | enFetchChain(i).setAsReg().init(False) 70 | enPrefetchChain(i) := enPrefetchChain(i - 1) 71 | enFetchChain(i) := enFetchChain(i - 1) 72 | enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 73 | } 74 | } 75 | io.c := dsp48e2s.last.DATAOUT.P 76 | } 77 | -------------------------------------------------------------------------------- /src/main/scala/example/ehb1024_pe.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | import xilinx.DSP48E2IntArithmetic.cascade.int8_dotp_ddr 5 | import xilinx.DSP48E2IntArithmetic.standalone.ring_acc 6 | 7 | import scala.language.postfixOps 8 | 9 | class ehb1024_pe( 10 | clkx2: ClockDomain, 11 | length: Int, 12 | isLastPEVertical: Boolean = false, 13 | isLastPEHorizontal: Boolean = false 14 | ) extends Component { 15 | 16 | case class inpBdl() extends Bundle { 17 | val inVld = Bool() 18 | val fbVld = Bool() 19 | val biasEn = Bool() 20 | val bias0 = Bits(24 bits) 21 | val bias1 = Bits(24 bits) 22 | } 23 | 24 | case class outBdl() extends Bundle { 25 | val out0 = Bits(48 bits) 26 | val out1 = Bits(48 bits) 27 | } 28 | 29 | case class vertBdl() extends Bundle { 30 | val a0 = Vec(Bits(8 bits), length) 31 | val a1 = Vec(Bits(8 bits), length) 32 | } 33 | 34 | case class horizBdl() extends Bundle { 35 | val b0 = Vec(Bits(8 bits), length) 36 | val b1 = Vec(Bits(8 bits), length) 37 | val c0 = Vec(Bits(8 bits), length) 38 | val c1 = Vec(Bits(8 bits), length) 39 | } 40 | 41 | val inp = new inpBdl().asInput() 42 | val srcV = new vertBdl().asInput() 43 | val srcH = new horizBdl().asInput() 44 | val out = new outBdl().asOutput() 45 | 46 | val dstV = if (!isLastPEVertical) new vertBdl().asOutput() else null 47 | val dstH = if (!isLastPEHorizontal) new horizBdl().asOutput() else null 48 | 49 | val srcVDly = RegNext(srcV) 50 | val srcHDly = RegNext(srcH) 51 | 52 | if (!isLastPEVertical) dstV := srcVDly 53 | if (!isLastPEHorizontal) dstH := srcHDly 54 | 55 | srcVDly addTag crossClockDomain 56 | srcHDly addTag crossClockDomain 57 | 58 | val x2 = new ClockingArea(clkx2) { 59 | val chain0 = new int8_dotp_ddr(length) 60 | val chain1 = new int8_dotp_ddr(length) 61 | 62 | val acc = new ring_acc() 63 | 64 | chain0.io.a := srcVDly.a0 65 | chain0.io.b := srcHDly.b0 66 | chain0.io.c := srcHDly.c0 67 | 68 | chain1.io.a := srcVDly.a1 69 | chain1.io.b := srcHDly.b1 70 | chain1.io.c := srcHDly.c1 71 | 72 | acc.io.inVld := inp.inVld 73 | acc.io.fbVld := inp.fbVld 74 | acc.io.biasEn := inp.biasEn 75 | acc.io.bias0 := inp.bias0 76 | acc.io.bias1 := inp.bias1 77 | 78 | acc.io.p0 := RegNext(chain0.io.ab).asSInt.resize(24 bits).asBits 79 | acc.io.p1 := RegNext(chain0.io.ac).asSInt.resize(24 bits).asBits 80 | acc.io.p2 := RegNext(chain1.io.ab).asSInt.resize(24 bits).asBits 81 | acc.io.p3 := RegNext(chain1.io.ac).asSInt.resize(24 bits).asBits 82 | 83 | out.out0 := acc.io.out0 84 | out.out1 := acc.io.out1 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/IP/ZynqPsDDRBench.scala: -------------------------------------------------------------------------------- 1 | package xilinx.IP 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import spinal.lib.bus.amba4.axi.Axi4SpecRenamer 6 | import spinal.lib.bus.amba4.axilite.{AxiLite4, AxiLite4Config, AxiLite4SlaveFactory, AxiLite4SpecRenamer} 7 | import util.AxiStreamSpecRenamer 8 | 9 | import scala.language.postfixOps 10 | 11 | class ZynqPsDDRBench(dataWidth: Int, addrWidth: Int = 32) extends Component { 12 | 13 | require(addrWidth % 8 == 0) 14 | 15 | val io = new Bundle { 16 | val ctrl = slave(AxiLite4(AxiLite4Config(32, 32))) 17 | val mm2s = slave(Stream(Bits(dataWidth bits))) 18 | val cmd = master(Stream(Bits(32 + 8 + addrWidth bits))) 19 | } 20 | 21 | noIoPrefix() 22 | AxiLite4SpecRenamer(io.ctrl) 23 | AxiStreamSpecRenamer(io.mm2s) 24 | AxiStreamSpecRenamer(io.cmd) 25 | 26 | io.mm2s.freeRun() 27 | 28 | val baseAddr = UInt(32 bits).setAsReg().init(0) 29 | val baseAddrEx = if (addrWidth > 32) UInt(addrWidth - 32 bits).setAsReg().init(0) else null 30 | val addr = UInt(32 bits).setAsReg().init(0) 31 | val len = UInt(23 bits).setAsReg().init(0) 32 | val total = UInt(32 bits).setAsReg().init(0) 33 | 34 | val valid = Bool().setAsReg().init(False) 35 | val ready = Bool() 36 | 37 | val halt = Bool().setAsReg().init(True) 38 | val clear = Bool().setAsReg().init(False) 39 | 40 | val cfg = new AxiLite4SlaveFactory(io.ctrl) 41 | cfg.write(baseAddr, 0x00, 0) 42 | if (addrWidth > 32) cfg.write(baseAddrEx, 0x40, 0) 43 | cfg.write(addr, 0x04, 0) 44 | cfg.write(len, 0x08, 0) 45 | cfg.readAndWrite(total, 0x0c, 0) 46 | 47 | cfg.write(valid, 0x10, 0) 48 | cfg.read(ready, 0x14, 0) 49 | valid.clear() 50 | 51 | cfg.readAndWrite(halt, 0x20, 0) 52 | cfg.write(clear, 0x24, 0) 53 | clear.clear() 54 | 55 | val baseAddrPack = if (addrWidth > 32) (baseAddrEx ## baseAddr).asUInt else baseAddr 56 | val cmdFifo = StreamFifo(util.PairBundle(UInt(32 bits), UInt(23 bits)), 1024) 57 | io.cmd << AxiDataMoverCmdGen(cmdFifo.io.pop, baseAddrPack, True, True).haltWhen(halt) 58 | 59 | cmdFifo.io.push.valid := valid 60 | cmdFifo.io.push.payload.A := addr 61 | cmdFifo.io.push.payload.B := len 62 | ready := cmdFifo.io.push.ready 63 | 64 | val transaction = UInt(32 bits).setAsReg().init(0) 65 | val finish = transaction === total 66 | when(io.mm2s.fire) { 67 | transaction := transaction + 1 68 | } 69 | when(clear) { 70 | transaction.clearAll() 71 | } 72 | 73 | val clockCnt = UInt(32 bits).setAsReg().init(0) 74 | val tictok = Bool().setAsReg().init(False) 75 | tictok.setWhen(io.mm2s.fire).clearWhen(finish) 76 | when(tictok) { 77 | clockCnt := clockCnt + 1 78 | } 79 | when(clear) { 80 | clockCnt.clearAll() 81 | } 82 | 83 | halt.setWhen(finish) 84 | 85 | cfg.read(transaction, 0x30, 0) 86 | cfg.read(clockCnt, 0x34, 0) 87 | } 88 | 89 | object ZynqPsDDRBench extends App { 90 | SpinalVerilog(new ZynqPsDDRBench(512, 40)) 91 | } -------------------------------------------------------------------------------- /src/test/scala/eval_int16_ws_B_P.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.dualCascade.int16_ws_B_P 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int16_ws_B_P extends App { 12 | 13 | val pass = 8 14 | val reuse = 8 15 | val vecLength = 8 16 | val sampleLength = pass * reuse 17 | 18 | SpinalVerilog(new int16_ws_B_P(8)) 19 | 20 | val a = Array.fill(pass)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 21 | val b = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(256) - 128))) 22 | 23 | val ab = for (p <- 0 until pass) yield { 24 | for (r <- 0 until reuse) yield { 25 | (a(p), b(p)(r)).zipped.map(_ * _).sum 26 | } 27 | } 28 | 29 | SimConfig.withFstWave 30 | .addRtl("data/sim/DSP48E2.v") 31 | .compile(new int16_ws_B_P(vecLength)) 32 | .doSimUntilVoid { dut => 33 | import dut._ 34 | 35 | io.a #= 0 36 | io.b.foreach(_ #= 0) 37 | io.enPrefetch #= false 38 | io.enFetch #= false 39 | io.clrPrefetch.foreach(_ #= true) 40 | 41 | clockDomain.forkStimulus(10) 42 | clockDomain.waitSampling(32) 43 | 44 | def preLoad(p: Int) = { 45 | io.enPrefetch #= false 46 | for (v <- 0 until vecLength) { 47 | io.a #= a(p)(vecLength - v - 1) & 0xffff 48 | io.enPrefetch #= true 49 | if (v == vecLength - 1) io.clrPrefetch.foreach(_ #= true) 50 | else io.clrPrefetch.foreach(_ #= false) 51 | clockDomain.waitSampling() 52 | } 53 | io.enPrefetch #= false 54 | io.clrPrefetch.foreach(_ #= false) 55 | } 56 | 57 | def load() = { 58 | io.enFetch #= true 59 | clockDomain.waitSampling() 60 | io.enFetch #= false 61 | } 62 | 63 | def procedure(p: Int) = { 64 | 65 | for (i <- 0 until reuse + latency) { 66 | for (j <- 0 until vecLength) { 67 | if (j <= i && i < reuse + j) { 68 | io.b(j) #= b(p)(i - j)(j) & 0xffff 69 | } 70 | } 71 | if (i > latency) { 72 | val index = i - latency - 1 73 | assert(io.c.toBigInt.toInt == ab(p)(index)) 74 | } 75 | clockDomain.waitSampling(1) 76 | } 77 | } 78 | 79 | preLoad(0) 80 | 81 | for (pp <- 0 until pass) { 82 | fork { 83 | load() 84 | } 85 | fork { 86 | procedure(pp) 87 | } 88 | if (pp < pass - 1) { 89 | fork { 90 | preLoad(pp + 1) 91 | } 92 | } 93 | clockDomain.waitSampling(reuse) 94 | } 95 | clockDomain.waitSampling(128) 96 | simSuccess() 97 | 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/cascade/int8_dotp_ddr.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.cascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int8_dotp_ddr(length: Int, acc: Boolean = false) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Vec(Bits(8 bits), length) 13 | val b = in Vec(Bits(8 bits), length) 14 | val c = in Vec(Bits(8 bits), length) 15 | val ab = out Bits (18 bits) 16 | val ac = out Bits (18 bits) 17 | } 18 | 19 | import DSP48E2ConfigMode._ 20 | import DSP48E2ConfigABCD._ 21 | import DSP48E2ConfigWXYZ._ 22 | 23 | val latency = length + 4 - 1 24 | 25 | val attrs = Array.fill(length)(new DSP48E2Attributes) 26 | 27 | for (i <- 0 until length) { 28 | set_mul_attr(attrs(i)) 29 | inmode.set_dynamic_inmode_attr(attrs(i)) 30 | opmode.set_static_opmode_attr(attrs(i)) 31 | ad_pack.set_ad_pack_attr(attrs(i)) 32 | b.set_time_multiplex_b_input_attr(attrs(i)) 33 | c.set_mute_c_attr(attrs(i)) 34 | } 35 | 36 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 37 | val ce1 = Vec(Bool(), length) 38 | val ce2 = Vec(Bool(), length) 39 | val selMux = Vec(Bool(), length) 40 | 41 | val cnt = UInt(2 bits).setAsReg().init(0) 42 | cnt := cnt + 1 43 | ce1.head := RegNext(cnt === 3, init = False) 44 | ce2.head := RegNext(cnt === 1, init = False) 45 | 46 | selMux.head.setAsReg().init(False) 47 | selMux.head.toggleWhen(True) 48 | 49 | for (i <- 0 until length) { 50 | 51 | inmode.assign_dynamic_inmode_ctrl(dsp48e2s(i)) 52 | inmode.assign_default(dsp48e2s(i)) 53 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 54 | w.w_sel_p(dsp48e2s(i), False) 55 | x.x_sel_m(dsp48e2s(i), True) 56 | y.y_sel_m(dsp48e2s(i), True) 57 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 58 | 59 | ad_pack.assign_ad_pack_ctrl(dsp48e2s(i)) 60 | b.assign_time_multiplex_b_input_ctrl(dsp48e2s(i), ce1(i), ce2(i), selMux(i)) 61 | c.assign_mute_c_ctrl(dsp48e2s(i)) 62 | 63 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 64 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 65 | 66 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 67 | dsp48e2s(i).DATAIN.B := io.a(i).asSInt.resize(18).asBits 68 | dsp48e2s(i).DATAIN.D := io.c(i).asSInt.expand ## B(27 - 9 bits, default -> false) 69 | 70 | if (i != 0) { 71 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 72 | ce1(i).setAsReg() 73 | ce2(i).setAsReg() 74 | selMux(i).setAsReg() 75 | ce1(i) := ce1(i - 1) 76 | ce2(i) := ce2(i - 1) 77 | selMux(i) := selMux(i - 1) 78 | } 79 | } 80 | 81 | val P = dsp48e2s.last.DATAOUT.P 82 | val abRes = P(17 downto 0).asBits 83 | val abNeg = B"0" ## abRes.msb 84 | // val acRes = P(35 downto 18).asSInt + abNeg.asSInt 85 | val acRes = P(35 downto 18).asSInt 86 | 87 | io.ab := abRes 88 | io.ac := acRes.asBits 89 | } 90 | -------------------------------------------------------------------------------- /src/test/scala/int16_ws_b_p_simple.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.lib._ 3 | import xilinx.DSP48E2._ 4 | 5 | import scala.language.postfixOps 6 | 7 | class int16_ws_b_p_simple(length:Int) extends Component { 8 | 9 | val io = new Bundle { 10 | val a = in Bits (16 bits) 11 | val b = in Vec(Bits(16 bits), length) 12 | val p_out = out Bits (32 bits) 13 | 14 | val ena = in Vec(Bool(), length) 15 | val ena_d = in Vec(Bool(), length) 16 | 17 | // val enPrefetch = in Bool() 18 | // val enFetch = in Bool() 19 | // val clrPrefetch = in Vec(Bool(), length) 20 | } 21 | 22 | noIoPrefix() 23 | 24 | import DSP48E2ConfigMode._ 25 | import DSP48E2ConfigABCD._ 26 | import DSP48E2ConfigWXYZ._ 27 | 28 | val latency = length + 4 - 1 29 | 30 | val attrs = Array.fill(length)(new DSP48E2Attributes) 31 | 32 | for (i <- 0 until length) { 33 | set_mul_attr(attrs(i)) 34 | inmode.set_static_inmode_attr(attrs(i)) 35 | opmode.set_static_opmode_attr(attrs(i)) 36 | a.set_static_a_input_attr(attrs(i), 2) 37 | b.set_static_b_input_attr(attrs(i), 2) 38 | c.set_mute_c_attr(attrs(i)) 39 | d.set_mute_d_attr(attrs(i)) 40 | if (i != 0) b.set_b_cascade(attrs(i)) 41 | } 42 | 43 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 44 | // val enPrefetchChain = Vec(Bool(), length) 45 | // val enFetchChain = Vec(Bool(), length) 46 | 47 | for (i <- 0 until length) { 48 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 49 | inmode.assign_default(dsp48e2s(i)) 50 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 51 | w.w_sel_c(dsp48e2s(i), False) 52 | x.x_sel_m(dsp48e2s(i), True) 53 | y.y_sel_m(dsp48e2s(i), True) 54 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 55 | a.assign_static_a_input_ctrl(dsp48e2s(i), 2) 56 | b.assign_b_pingpong_ctrl(dsp48e2s(i), io.ena(i), io.ena_d(i)) 57 | c.assign_mute_c_ctrl(dsp48e2s(i)) 58 | d.assign_mute_d_ctrl(dsp48e2s(i)) 59 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 60 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 61 | 62 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 63 | if (i == 0) { 64 | dsp48e2s(i).DATAIN.B := io.a.asSInt.resize(18).asBits 65 | // enPrefetchChain(i) := io.enPrefetch 66 | // enFetchChain(i) := io.enFetch 67 | } 68 | else { 69 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 70 | dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 71 | // enPrefetchChain(i).setAsReg() 72 | // enFetchChain(i).setAsReg().init(False) 73 | // enPrefetchChain(i) := enPrefetchChain(i - 1) 74 | // enFetchChain(i) := enFetchChain(i - 1) 75 | // enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 76 | } 77 | } 78 | io.p_out := dsp48e2s.last.DATAOUT.P.resized 79 | } 80 | 81 | object int16_ws_b_p_simple extends App{ 82 | val config = SpinalConfig(nameWhenByFile = false, anonymSignalPrefix = "t") 83 | config.generateVerilog(new int16_ws_b_p_simple(4)) 84 | } -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/dualCascade/int16_os_B_P.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.dualCascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int16_os_B_P(length: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Bits (16 bits) 13 | val b = in Vec(Bits(16 bits), length) 14 | val ab = out Bits (48 bits) 15 | 16 | val accValid = in Bool() 17 | val accLast = in Bool() 18 | } 19 | 20 | import DSP48E2ConfigMode._ 21 | import DSP48E2ConfigABCD._ 22 | import DSP48E2ConfigWXYZ._ 23 | 24 | val latency = length + 4 - 1 25 | 26 | val selOfM = Vec(Bool(), length) 27 | val selOfPCIN = Vec(Bool(), length) 28 | val selOfP = Vec(Bool(), length) 29 | val rst = Vec(Bool(), length + 1) 30 | 31 | val validDly = RegNext(io.accValid, False) 32 | val lastDly = Delay(io.accLast, 5, init = False) 33 | 34 | val attrs = Array.fill(length)(new DSP48E2Attributes) 35 | 36 | for (i <- 0 until length) { 37 | set_mul_attr(attrs(i)) 38 | inmode.set_static_inmode_attr(attrs(i)) 39 | opmode.set_dynamic_opmode_attr(attrs(i)) 40 | a.set_static_a_input_attr(attrs(i), 2) 41 | b.set_static_b_input_attr(attrs(i), 2) 42 | c.set_mute_c_attr(attrs(i)) 43 | d.set_mute_d_attr(attrs(i)) 44 | if (i != 0) b.set_b_cascade(attrs(i)) 45 | } 46 | 47 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 48 | 49 | for (i <- 0 until length) { 50 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 51 | inmode.assign_default(dsp48e2s(i)) 52 | opmode.assign_dynamic_opmode_ctrl(dsp48e2s(i)) 53 | w.w_sel_p(dsp48e2s(i), selOfP(i)) 54 | x.x_sel_m(dsp48e2s(i), selOfM(i)) 55 | y.y_sel_m(dsp48e2s(i), selOfM(i)) 56 | z.z_sel_pcin(dsp48e2s(i), selOfPCIN(i)) 57 | a.assign_static_a_input_ctrl(dsp48e2s(i), 2) 58 | b.assign_static_b_input_ctrl(dsp48e2s(i), 2) 59 | c.assign_mute_c_ctrl(dsp48e2s(i)) 60 | d.assign_mute_d_ctrl(dsp48e2s(i)) 61 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 62 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = rst(i + 1)) 63 | 64 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 65 | if (i == 0) { 66 | dsp48e2s(i).DATAIN.B := io.a.asSInt.resize(18).asBits 67 | } 68 | else { 69 | dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 70 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 71 | } 72 | 73 | selOfM(i).setAsReg().init(false) 74 | if (i == 0) selOfM(i) := validDly 75 | else selOfM(i) := selOfM(i - 1) 76 | 77 | rst(i).setAsReg().init(false) 78 | if (i == 0) rst(0) := lastDly 79 | else rst(i) := rst(i - 1) 80 | 81 | selOfPCIN(i).setAsReg().init(false) 82 | selOfPCIN(i).setWhen(lastDly).clearWhen(rst(i)) 83 | 84 | selOfP(i).setAsReg().init(false) 85 | selOfP(i).setWhen(selOfM(i)).clearWhen(lastDly) 86 | } 87 | 88 | rst.last.setAsReg().init(false) 89 | rst.last := rst.dropRight(1).last 90 | 91 | io.ab := dsp48e2s.last.DATAOUT.P 92 | } 93 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/dualCascade/int8_ws_B_P.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.dualCascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int8_ws_B_P(length: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Bits (8 bits) 13 | val b = in Vec(Bits(8 bits), length) 14 | val c = in Vec(Bits(8 bits), length) 15 | val ab = out Bits (18 bits) 16 | val ac = out Bits (18 bits) 17 | 18 | val enPrefetch = in Bool() 19 | val enFetch = in Bool() 20 | val clrPrefetch = in Vec(Bool(), length) 21 | } 22 | 23 | import DSP48E2ConfigMode._ 24 | import DSP48E2ConfigABCD._ 25 | import DSP48E2ConfigWXYZ._ 26 | 27 | val latency = length + 4 - 1 28 | 29 | val attrs = Array.fill(length)(new DSP48E2Attributes) 30 | 31 | for (i <- 0 until length) { 32 | set_mul_attr(attrs(i)) 33 | inmode.set_static_inmode_attr(attrs(i)) 34 | opmode.set_static_opmode_attr(attrs(i)) 35 | ad_pack.set_ad_pack_attr(attrs(i)) 36 | ad_pack.set_pingpong_b_attr(attrs(i)) 37 | c.set_mute_c_attr(attrs(i)) 38 | if (i != 0) b.set_b_cascade(attrs(i)) 39 | } 40 | 41 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 42 | val enPrefetchChain = Vec(Bool(), length) 43 | val enFetchChain = Vec(Bool(), length) 44 | 45 | for (i <- 0 until length) { 46 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 47 | inmode.assign_default(dsp48e2s(i)) 48 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 49 | w.w_sel_c(dsp48e2s(i), False) 50 | x.x_sel_m(dsp48e2s(i), True) 51 | y.y_sel_m(dsp48e2s(i), True) 52 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 53 | ad_pack.assign_ad_pack_ctrl(dsp48e2s(i)) 54 | ad_pack.assign_pingpong_b_ctrl(dsp48e2s(i), enPrefetchChain(i), enFetchChain(i)) 55 | c.assign_mute_c_ctrl(dsp48e2s(i)) 56 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 57 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 58 | 59 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 60 | dsp48e2s(i).DATAIN.D := io.c(i).asSInt.expand ## B(27 - 9 bits, default -> false) 61 | 62 | if (i == 0) { 63 | dsp48e2s(i).DATAIN.B := io.a.asSInt.resize(18).asBits 64 | enPrefetchChain(i) := io.enPrefetch 65 | enFetchChain(i) := io.enFetch 66 | } 67 | else { 68 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 69 | dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 70 | enPrefetchChain(i).setAsReg() 71 | enFetchChain(i).setAsReg().init(False) 72 | enPrefetchChain(i) := enPrefetchChain(i - 1) 73 | enFetchChain(i) := enFetchChain(i - 1) 74 | enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 75 | } 76 | } 77 | 78 | val P = dsp48e2s.last.DATAOUT.P 79 | val abRes = P(17 downto 0).asBits 80 | val abNeg = B"0" ## abRes.msb 81 | // val acRes = P(35 downto 18).asSInt + abNeg.asSInt 82 | val acRes = P(35 downto 18).asSInt 83 | 84 | io.ab := abRes 85 | io.ac := acRes.asBits 86 | } 87 | -------------------------------------------------------------------------------- /src/main/scala/example/tpu14_pe.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | import systolic._ 7 | import xilinx.DSP48E2IntArithmetic.dualCascade.int8_ws_B_P 8 | 9 | import scala.language.postfixOps 10 | 11 | class tpu14_pe( 12 | isLastPEHorizontal: Boolean = false 13 | ) extends Component { 14 | 15 | case class inpBdl() extends Bundle { 16 | val a0 = in Bits (8 bits) 17 | val enPrefetch0 = in Bool() 18 | val enFetch0 = in Bool() 19 | val clrPrefetch0 = in Vec(Bool(), 7) 20 | 21 | val a1 = in Bits (8 bits) 22 | val enPrefetch1 = in Bool() 23 | val enFetch1 = in Bool() 24 | val clrPrefetch1 = in Vec(Bool(), 7) 25 | } 26 | 27 | case class outBdl() extends Bundle { 28 | val out = Bits(48 bits) 29 | } 30 | 31 | case class horizBdl() extends Bundle { 32 | val b0 = in Vec(Bits(8 bits), 7) 33 | val c0 = in Vec(Bits(8 bits), 7) 34 | 35 | val b1 = in Vec(Bits(8 bits), 7) 36 | val c1 = in Vec(Bits(8 bits), 7) 37 | } 38 | 39 | val inp = new inpBdl().asInput() 40 | val out = new outBdl().asOutput() 41 | val srcH = new horizBdl().asInput() 42 | val dstH = if (!isLastPEHorizontal) new horizBdl().asOutput() else null 43 | 44 | val srcHDly = RegNext(srcH) 45 | if (!isLastPEHorizontal) dstH := srcHDly 46 | 47 | val chain0 = new int8_ws_B_P(7) 48 | val chain1 = new int8_ws_B_P(7) 49 | 50 | chain0.io.a := inp.a0 51 | chain0.io.enPrefetch := inp.enPrefetch0 52 | chain0.io.enFetch := inp.enFetch0 53 | chain0.io.clrPrefetch := inp.clrPrefetch0 54 | 55 | chain1.io.a := inp.a1 56 | chain1.io.enPrefetch := inp.enPrefetch1 57 | chain1.io.enFetch := inp.enFetch1 58 | chain1.io.clrPrefetch := inp.clrPrefetch1 59 | 60 | chain0.io.b := srcHDly.b0 61 | chain0.io.c := srcHDly.c0 62 | chain1.io.b := srcHDly.b1 63 | chain1.io.c := srcHDly.c1 64 | 65 | import DSP48E2ConfigMode._ 66 | import DSP48E2ConfigABCD._ 67 | import DSP48E2ConfigWXYZ._ 68 | 69 | val attr = new DSP48E2Attributes 70 | 71 | set_alu_attr(attr, simd = 2) 72 | inmode.set_static_inmode_attr(attr) 73 | opmode.set_static_opmode_attr(attr) 74 | ab_concat.set_ab_concat_attr(attr) 75 | c.set_c_input_attr(attr) 76 | d.set_mute_d_attr(attr) 77 | 78 | val dsp = new DSP48E2(attr) 79 | 80 | inmode.assign_static_inmode_ctrl(dsp) 81 | inmode.assign_default(dsp) 82 | opmode.assign_static_opmode_ctrl(dsp) 83 | w.w_sel_c(dsp, True) 84 | x.x_sel_ab(dsp, True) 85 | y.y_sel_c(dsp, False) 86 | z.z_sel_p(dsp, False) 87 | ab_concat.assign_ab_concat_ctrl(dsp) 88 | c.assign_c_input_ctrl(dsp) 89 | d.assign_mute_d_ctrl(dsp) 90 | 91 | assign_m_ctrl(dsp) 92 | assign_p_ctrl(dsp, ce = True, rst = False) 93 | out.out := dsp.DATAOUT.P 94 | 95 | val ab0 = RegNext(chain0.io.ab) 96 | val ac0 = RegNext(chain0.io.ac) 97 | val ab1 = RegNext(chain1.io.ab) 98 | val ac1 = RegNext(chain1.io.ac) 99 | 100 | val pack0 = ab0.asSInt.resize(24 bits) ## ac0.asSInt.resize(24 bits) 101 | val pack1 = ab1.asSInt.resize(24 bits) ## ac1.asSInt.resize(24 bits) 102 | 103 | dsp.DATAIN.A := pack0.drop(18) 104 | dsp.DATAIN.B := pack0.take(18) 105 | dsp.DATAIN.C := pack1 106 | } 107 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/standalone/ring_acc.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.standalone 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class ring_acc() extends Component { 10 | 11 | val io = new Bundle { 12 | val inVld = in Bool() 13 | val fbVld = in Bool() 14 | val p0 = in Bits (24 bits) 15 | val p1 = in Bits (24 bits) 16 | val p2 = in Bits (24 bits) 17 | val p3 = in Bits (24 bits) 18 | 19 | val biasEn = in Bool() 20 | val bias0 = in Bits (24 bits) 21 | val bias1 = in Bits (24 bits) 22 | 23 | val out0 = out Bits (48 bits) 24 | val out1 = out Bits (48 bits) 25 | } 26 | 27 | import DSP48E2ConfigMode._ 28 | import DSP48E2ConfigABCD._ 29 | import DSP48E2ConfigWXYZ._ 30 | 31 | val latency = 2 32 | 33 | val down = new Area { 34 | 35 | val attr = new DSP48E2Attributes 36 | 37 | set_alu_attr(attr,simd = 2) 38 | inmode.set_static_inmode_attr(attr) 39 | opmode.set_dynamic_opmode_attr(attr) 40 | ab_concat.set_ab_concat_attr(attr) 41 | c.set_c_input_attr(attr) 42 | d.set_mute_d_attr(attr) 43 | 44 | val dsp = new DSP48E2(attr) 45 | 46 | inmode.assign_static_inmode_ctrl(dsp) 47 | inmode.assign_default(dsp) 48 | opmode.assign_dynamic_opmode_ctrl(dsp) 49 | w.w_sel_rnd(dsp, False) 50 | x.x_sel_ab(dsp, True) 51 | y.y_sel_c(dsp, False) 52 | z.z_sel_c(dsp, io.biasEn) 53 | ab_concat.assign_ab_concat_ctrl(dsp) 54 | c.assign_c_input_ctrl(dsp) 55 | d.assign_mute_d_ctrl(dsp) 56 | 57 | assign_m_ctrl(dsp) 58 | assign_p_ctrl(dsp, ce = True, rst = False) 59 | 60 | val bias = io.bias0 ## io.bias1 61 | val p0p1 = io.p0 ## io.p1 62 | 63 | dsp.DATAIN.A := p0p1.drop(18) 64 | dsp.DATAIN.B := p0p1.take(18) 65 | dsp.DATAIN.C := bias 66 | } 67 | 68 | val up = new Area{ 69 | 70 | val attr = new DSP48E2Attributes 71 | 72 | set_alu_attr(attr,simd = 2) 73 | inmode.set_static_inmode_attr(attr) 74 | opmode.set_dynamic_opmode_attr(attr) 75 | ab_concat.set_ab_concat_attr(attr) 76 | c.set_c_input_attr(attr) 77 | d.set_mute_d_attr(attr) 78 | 79 | val dsp = new DSP48E2(attr) 80 | 81 | inmode.assign_static_inmode_ctrl(dsp) 82 | inmode.assign_default(dsp) 83 | opmode.assign_dynamic_opmode_ctrl(dsp) 84 | w.w_sel_rnd(dsp, False) 85 | x.x_sel_ab(dsp, io.inVld) 86 | y.y_sel_c(dsp, io.fbVld) 87 | z.z_sel_pcin(dsp, io.inVld) 88 | ab_concat.assign_ab_concat_ctrl(dsp) 89 | c.assign_c_input_ctrl(dsp) 90 | d.assign_mute_d_ctrl(dsp) 91 | 92 | assign_m_ctrl(dsp) 93 | assign_p_ctrl(dsp, ce = True, rst = False) 94 | 95 | val p2p3 = io.p2 ## io.p3 96 | dsp.DATAIN.A := p2p3.drop(18) 97 | dsp.DATAIN.B := p2p3.take(18) 98 | 99 | val delayReg0 = Bits(48 bits) 100 | val delayReg1 = Bits(48 bits) 101 | delayReg0.setAsReg().init(0) 102 | delayReg1.setAsReg().init(0) 103 | 104 | delayReg0 := dsp.DATAOUT.P 105 | delayReg1 := delayReg0 106 | dsp.DATAIN.C := delayReg1 107 | 108 | dsp.CASCDATAIN.P := down.dsp.CASCDATAOUT.P 109 | 110 | io.out0 := delayReg0 111 | io.out1 := delayReg1 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/test/scala/eval_int8_ws_B_P.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.dualCascade.int8_ws_B_P 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int8_ws_B_P extends App { 12 | 13 | val pass = 8 14 | val reuse = 16 15 | val vecLength = 8 16 | val sampleLength = pass * reuse 17 | 18 | val a = Array.fill(pass)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 19 | val b = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(256) - 128))) 20 | val c = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(256) - 128))) 21 | 22 | 23 | val ab = for (p <- 0 until pass) yield { 24 | for (r <- 0 until reuse) yield { 25 | (a(p), b(p)(r)).zipped.map(_ * _).sum 26 | } 27 | } 28 | 29 | val ac = for (p <- 0 until pass) yield { 30 | for (r <- 0 until reuse) yield { 31 | (a(p), c(p)(r)).zipped.map(_ * _).sum 32 | } 33 | } 34 | 35 | SimConfig.withFstWave 36 | .addRtl("data/sim/DSP48E2.v") 37 | .compile(new int8_ws_B_P(vecLength)) 38 | .doSimUntilVoid { dut => 39 | import dut._ 40 | 41 | io.a #= 0 42 | io.b.foreach(_ #= 0) 43 | io.c.foreach(_ #= 0) 44 | io.enPrefetch #= false 45 | io.enFetch #= false 46 | io.clrPrefetch.foreach(_ #= true) 47 | 48 | clockDomain.forkStimulus(10) 49 | clockDomain.waitSampling(32) 50 | 51 | def preLoad(p: Int) = { 52 | io.enPrefetch #= false 53 | for (v <- 0 until vecLength) { 54 | io.a #= a(p)(vecLength - v - 1) & 0xff 55 | io.enPrefetch #= true 56 | if (v == vecLength - 1) io.clrPrefetch.foreach(_ #= true) 57 | else io.clrPrefetch.foreach(_ #= false) 58 | clockDomain.waitSampling() 59 | } 60 | io.enPrefetch #= false 61 | io.clrPrefetch.foreach(_ #= false) 62 | } 63 | 64 | def load() = { 65 | io.enFetch #= true 66 | clockDomain.waitSampling() 67 | io.enFetch #= false 68 | } 69 | 70 | def procedure(p: Int) = { 71 | 72 | for (i <- 0 until reuse + latency) { 73 | for (j <- 0 until vecLength) { 74 | if (j <= i && i < reuse + j) { 75 | io.b(j) #= b(p)(i - j)(j) & 0xff 76 | io.c(j) #= c(p)(i - j)(j) & 0xff 77 | } 78 | } 79 | if (i > latency) { 80 | val index = i - latency - 1 81 | assert((io.ab.toInt << 14) >> 14 == (ab(p)(index))) 82 | assert((io.ac.toInt << 14) >> 14 == (ac(p)(index))) 83 | } 84 | clockDomain.waitSampling(1) 85 | } 86 | } 87 | 88 | preLoad(0) 89 | 90 | for (pp <- 0 until pass) { 91 | fork { 92 | load() 93 | } 94 | fork { 95 | procedure(pp) 96 | } 97 | if (pp < pass - 1) { 98 | fork { 99 | preLoad(pp + 1) 100 | } 101 | } 102 | clockDomain.waitSampling(reuse) 103 | } 104 | clockDomain.waitSampling(128) 105 | simSuccess() 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/dualCascade/int12_ws_AB_C_P.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.dualCascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int12_ws_AB_C_P(length: Int, width: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val aSel = in Vec(Bits(1 bits), length) 13 | val bSel = in Vec(Bits(1 bits), length) 14 | val a = in Vec(Bits(width bits), 4) 15 | val b = in Vec(Bits(width bits), 4) 16 | val ab = out Vec(Bits(12 bits), 4) 17 | 18 | val enPrefetch = in Bool() 19 | val enFetch = in Bool() 20 | val clrPrefetch = in Vec(Bool(), length) 21 | } 22 | 23 | import DSP48E2ConfigMode._ 24 | import DSP48E2ConfigABCD._ 25 | import DSP48E2ConfigWXYZ._ 26 | 27 | require(width <= 12) 28 | val latency = length + 2 - 1 29 | 30 | val attrs = Array.fill(length)(new DSP48E2Attributes) 31 | 32 | for (i <- 0 until length) { 33 | set_alu_attr(attrs(i), simd = 4) 34 | inmode.set_static_inmode_attr(attrs(i)) 35 | opmode.set_dynamic_opmode_attr(attrs(i)) 36 | ab_concat.set_pingpong_ab_concat_attr(attrs(i)) 37 | c.set_c_input_attr(attrs(i)) 38 | d.set_mute_d_attr(attrs(i)) 39 | if (i != 0) { 40 | a.set_a_cascade(attrs(i)) 41 | b.set_b_cascade(attrs(i)) 42 | } 43 | } 44 | 45 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 46 | val enPrefetchChain = Vec(Bool(), length) 47 | val enFetchChain = Vec(Bool(), length) 48 | val aBits = io.a.map(_.asSInt.resize(12 bits)).asBits() 49 | val bBits = io.b.asBits 50 | val cPortDataChain = Vec(Bits(width * 4 bits), length) 51 | 52 | for (i <- 0 until length) { 53 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 54 | inmode.assign_default(dsp48e2s(i)) 55 | opmode.assign_dynamic_opmode_ctrl(dsp48e2s(i)) 56 | w.w_sel_p(dsp48e2s(i), False) 57 | x.x_sel_ab(dsp48e2s(i), io.aSel(i).msb) 58 | y.y_sel_c(dsp48e2s(i), io.bSel(i).msb) 59 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 60 | ab_concat.assign_pingpong_ab_concat_ctrl(dsp48e2s(i), enPrefetchChain(i), enFetchChain(i)) 61 | c.assign_c_input_ctrl(dsp48e2s(i), enFetchChain(i)) 62 | assign_m_ctrl(dsp48e2s(i)) 63 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 64 | 65 | dsp48e2s(i).DATAIN.C := cPortDataChain(i). 66 | subdivideIn(4 slices). 67 | map(_.asSInt.resize(12 bits)). 68 | asBits() 69 | 70 | cPortDataChain(i).setAsReg() 71 | if (i == 0) { 72 | dsp48e2s(i).DATAIN.A := aBits.drop(18) 73 | dsp48e2s(i).DATAIN.B := aBits.take(18) 74 | 75 | enPrefetchChain(i) := io.enPrefetch 76 | enFetchChain(i) := io.enFetch 77 | when(enPrefetchChain(i))(cPortDataChain(i) := bBits) 78 | } 79 | else { 80 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 81 | dsp48e2s(i).CASCDATAIN.A := dsp48e2s(i - 1).CASCDATAOUT.A 82 | dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 83 | 84 | enPrefetchChain(i).setAsReg() 85 | enFetchChain(i).setAsReg().init(False) 86 | enPrefetchChain(i) := enPrefetchChain(i - 1) 87 | enFetchChain(i) := enFetchChain(i - 1) 88 | enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 89 | when(enPrefetchChain(i))(cPortDataChain(i) := cPortDataChain(i - 1)) 90 | } 91 | } 92 | 93 | io.ab.assignFromBits(dsp48e2s.last.DATAOUT.P) 94 | } 95 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2IntArithmetic/dualCascade/int8_ws_AD_B.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2IntArithmetic.dualCascade 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int8_ws_AD_B(length: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Bits (8 bits) 13 | val b = in Bits (8 bits) 14 | val c = in Bits (8 bits) 15 | val abIn = in Vec(Bits(18 bits), length) 16 | val acIn = in Vec(Bits(18 bits), length) 17 | val abOut = out Vec(Bits(18 bits), length) 18 | val acOut = out Vec(Bits(18 bits), length) 19 | 20 | val enPrefetch = in Bool() 21 | val enFetch = in Bool() 22 | val clrPrefetch = in Vec(Bool(), length) 23 | } 24 | 25 | io.abIn.foreach(_.default(B(0, 8 bits))) 26 | io.acIn.foreach(_.default(B(0, 8 bits))) 27 | 28 | import DSP48E2ConfigMode._ 29 | import DSP48E2ConfigABCD._ 30 | import DSP48E2ConfigWXYZ._ 31 | 32 | val attrs = Array.fill(length)(new DSP48E2Attributes) 33 | 34 | for (i <- 0 until length) { 35 | set_mul_attr(attrs(i)) 36 | inmode.set_static_inmode_attr(attrs(i)) 37 | opmode.set_static_opmode_attr(attrs(i)) 38 | ad_pack.set_ad_pack_attr(attrs(i)) 39 | ad_pack.set_pingpong_b_attr(attrs(i)) 40 | c.set_c_input_attr(attrs(i)) 41 | if (i != 0) a.set_a_cascade(attrs(i)) 42 | if (i != 0) b.set_b_cascade(attrs(i)) 43 | } 44 | 45 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 46 | val enPrefetchChain = Vec(Bool(), length) 47 | val enFetchChain = Vec(Bool(), length) 48 | val dPortDataChain = Vec(Bits(8 bits), length) 49 | 50 | for (i <- 0 until length) { 51 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 52 | inmode.assign_default(dsp48e2s(i)) 53 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 54 | w.w_sel_c(dsp48e2s(i), True) 55 | x.x_sel_m(dsp48e2s(i), True) 56 | y.y_sel_m(dsp48e2s(i), True) 57 | z.z_sel_pcin(dsp48e2s(i), False) 58 | ad_pack.assign_ad_pack_ctrl(dsp48e2s(i)) 59 | ad_pack.assign_pingpong_b_ctrl(dsp48e2s(i), enPrefetchChain(i), enFetchChain(i)) 60 | c.assign_c_input_ctrl(dsp48e2s(i)) 61 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 62 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 63 | 64 | dsp48e2s(i).DATAIN.D := dPortDataChain(i).asSInt.expand ## B(27 - 9 bits, default -> false) 65 | dsp48e2s(i).DATAIN.C := io.acIn(i).asSInt.resize(30 bits) ## io.abIn(i) 66 | io.abOut(i) := dsp48e2s(i).DATAOUT.P(17 downto 0) 67 | io.acOut(i) := dsp48e2s(i).DATAOUT.P(35 downto 18) 68 | 69 | a.assign_a_cascade(dsp48e2s(i)) 70 | 71 | if (i == 0) { 72 | dPortDataChain(i) := io.c 73 | dsp48e2s(i).DATAIN.A := io.b.asSInt.resize(30).asBits 74 | dsp48e2s(i).DATAIN.B := io.a.asSInt.resize(18).asBits 75 | enPrefetchChain(i) := io.enPrefetch 76 | enFetchChain(i) := io.enFetch 77 | } 78 | else { 79 | dsp48e2s(i).CASCDATAIN.A := dsp48e2s(i - 1).CASCDATAOUT.A 80 | dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 81 | dPortDataChain(i).setAsReg() 82 | dPortDataChain(i) := dPortDataChain(i - 1) 83 | enPrefetchChain(i).setAsReg() 84 | enFetchChain(i).setAsReg().init(False) 85 | enPrefetchChain(i) := enPrefetchChain(i - 1) 86 | enFetchChain(i) := enFetchChain(i - 1) 87 | enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/scala/eval_int12_xdotp.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.cascade.int12_xdotp 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int12_xdotp extends App { 12 | 13 | val sampleLength = 32 14 | val vecLength = 8 15 | val acc = true 16 | 17 | val aSel = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(2))) 18 | val bSel = Array.fill(sampleLength)(Array.fill(vecLength)(Random.nextInt(2))) 19 | val a = Array.fill(sampleLength)(Array.fill(vecLength)(Array.fill(4)(Random.nextInt(256) - 128))) 20 | val b = Array.fill(sampleLength)(Array.fill(vecLength)(Array.fill(4)(Random.nextInt(256) - 128))) 21 | val ab = for (s <- 0 until sampleLength) yield { 22 | for (t <- 0 until 4) yield { 23 | (for (i <- 0 until vecLength) yield { 24 | aSel(s)(i) * a(s)(i)(t) + bSel(s)(i) * b(s)(i)(t) 25 | }).sum 26 | } 27 | } 28 | 29 | val accRes = ab.transpose.map(_.sum) 30 | 31 | SimConfig.withFstWave 32 | .addRtl("data/sim/DSP48E2.v") 33 | .compile(new int12_xdotp(vecLength, acc)) 34 | .doSim { dut => 35 | import dut._ 36 | 37 | io.aSel.foreach(_ #= 0) 38 | io.bSel.foreach(_ #= 0) 39 | io.a.foreach(_.foreach(_ #= 0)) 40 | io.b.foreach(_.foreach(_ #= 0)) 41 | if (acc) { 42 | io.valid #= false 43 | io.last #= false 44 | } 45 | 46 | clockDomain.forkStimulus(10) 47 | clockDomain.waitSampling(32) 48 | 49 | var sum = Array.fill(4)(0) 50 | 51 | for (i <- 0 until sampleLength + latency) { 52 | 53 | for (j <- 0 until vecLength) { 54 | if (j <= i && i < sampleLength + j) { 55 | io.aSel(j) #= aSel(i - j)(j) 56 | io.bSel(j) #= bSel(i - j)(j) 57 | (io.a(j), a(i - j)(j)).zipped.foreach(_ #= _ & 0xfff) 58 | (io.b(j), b(i - j)(j)).zipped.foreach(_ #= _ & 0xfff) 59 | } 60 | else { 61 | io.aSel(j) #= 0 62 | io.bSel(j) #= 0 63 | io.a(j).foreach(_ #= 0) 64 | io.b(j).foreach(_ #= 0) 65 | } 66 | } 67 | 68 | if (vecLength - 1 <= i && i < sampleLength + vecLength - 1) { 69 | if (acc) { 70 | io.valid #= true 71 | io.last #= (i == sampleLength + vecLength - 2) 72 | } 73 | } 74 | else { 75 | if (acc) { 76 | io.valid #= false 77 | io.last #= false 78 | } 79 | } 80 | 81 | if (i > latency) { 82 | val index = i - latency - 1 83 | for (t <- 0 until 4) { 84 | sum(t) += ab(index)(t) 85 | if (acc) assert(sum(t) == (io.ab(t).toInt << 20) >> 20) 86 | else assert(ab(index)(t) == (io.ab(t).toInt << 20) >> 20) 87 | } 88 | } 89 | clockDomain.waitSampling() 90 | } 91 | 92 | if (acc) { 93 | println(accRes.mkString(", ")) 94 | println(io.ab.map(v => (v.toInt << 20) >> 20).mkString(", ")) 95 | } 96 | 97 | io.aSel.foreach(_ #= 0) 98 | io.bSel.foreach(_ #= 0) 99 | io.a.foreach(_.foreach(_ #= 0)) 100 | io.b.foreach(_.foreach(_ #= 0)) 101 | clockDomain.waitSampling(32) 102 | 103 | simSuccess() 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/FloatingIP/floating_ip_stub.scala: -------------------------------------------------------------------------------- 1 | package xilinx.FloatingIP 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | import scala.language.postfixOps 7 | 8 | class one_op_stub(width: Int, name: String, lat: Int) extends BlackBox { 9 | val latency = lat 10 | val io = new Bundle { 11 | val aclk = if (lat != 0) in Bool() else null 12 | val a = slave(Flow(Bits(width bits))) 13 | val r = master(Flow(Bits(width bits))) 14 | } 15 | noIoPrefix() 16 | 17 | io.a.setName("s_axis_a") 18 | io.r.setName("m_axis_result") 19 | 20 | util.AxiStreamSpecRenamer(io.a) 21 | util.AxiStreamSpecRenamer(io.r) 22 | if (lat != 0) mapClockDomain(clock = io.aclk) 23 | this.setDefinitionName(name) 24 | } 25 | 26 | class two_op_stub(width: Int, name: String, lat: Int) extends BlackBox { 27 | val latency = lat 28 | val io = new Bundle { 29 | val aclk = if (lat != 0) in Bool() else null 30 | val a = slave(Flow(Bits(width bits))) 31 | val b = slave(Flow(Bits(width bits))) 32 | val r = master(Flow(Bits(width bits))) 33 | } 34 | noIoPrefix() 35 | 36 | io.a.setName("s_axis_a") 37 | io.b.setName("s_axis_b") 38 | io.r.setName("m_axis_result") 39 | 40 | util.AxiStreamSpecRenamer(io.a) 41 | util.AxiStreamSpecRenamer(io.b) 42 | util.AxiStreamSpecRenamer(io.r) 43 | if (lat != 0) mapClockDomain(clock = io.aclk) 44 | this.setDefinitionName(name) 45 | } 46 | 47 | class three_op_stub(width:Int, name: String, lat: Int) extends BlackBox { 48 | val latency = lat 49 | val io = new Bundle { 50 | val aclk = if (lat != 0) in Bool() else null 51 | val a = slave(Flow(Bits(width bits))) 52 | val b = slave(Flow(Bits(width bits))) 53 | val c = slave(Flow(Bits(width bits))) 54 | val r = master(Flow(Bits(width bits))) 55 | } 56 | noIoPrefix() 57 | 58 | io.a.setName("s_axis_a") 59 | io.b.setName("s_axis_b") 60 | io.c.setName("s_axis_c") 61 | io.r.setName("m_axis_result") 62 | 63 | util.AxiStreamSpecRenamer(io.a) 64 | util.AxiStreamSpecRenamer(io.b) 65 | util.AxiStreamSpecRenamer(io.c) 66 | util.AxiStreamSpecRenamer(io.r) 67 | if (lat != 0) mapClockDomain(clock = io.aclk) 68 | this.setDefinitionName(name) 69 | } 70 | 71 | class acc_stub(width:Int, name: String, lat: Int) extends BlackBox { 72 | val latency = lat 73 | val io = new Bundle { 74 | val aclk = if (lat != 0) in Bool() else null 75 | val a = slave(Flow(Fragment(Bits(width bits)))) 76 | val r = master(Flow(Fragment(Bits(width bits)))) 77 | } 78 | noIoPrefix() 79 | 80 | io.a.setName("s_axis_a") 81 | io.r.setName("m_axis_result") 82 | 83 | util.AxiStreamSpecRenamer(io.a) 84 | util.AxiStreamSpecRenamer(io.r) 85 | if (lat != 0) mapClockDomain(clock = io.aclk) 86 | this.setDefinitionName(name) 87 | } 88 | 89 | class cmp_stub(width:Int, name: String, lat: Int) extends BlackBox { 90 | val latency = lat 91 | val io = new Bundle { 92 | val aclk = if (lat != 0) in Bool() else null 93 | val a = slave(Flow(Bits(width bits))) 94 | val b = slave(Flow(Bits(width bits))) 95 | val r = master(Flow(Bits(8 bits))) 96 | } 97 | noIoPrefix() 98 | 99 | io.a.setName("s_axis_a") 100 | io.b.setName("s_axis_b") 101 | io.r.setName("m_axis_result") 102 | 103 | util.AxiStreamSpecRenamer(io.a) 104 | util.AxiStreamSpecRenamer(io.b) 105 | util.AxiStreamSpecRenamer(io.r) 106 | if (lat != 0) mapClockDomain(clock = io.aclk) 107 | this.setDefinitionName(name) 108 | } -------------------------------------------------------------------------------- /src/main/scala/example/int8_ws_b_p_clb.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int8_ws_b_p_clb(length: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val a = in Bits (8 bits) 13 | val b = in Vec(Bits(8 bits), length) 14 | val c = in Vec(Bits(8 bits), length) 15 | val ab = out Bits (18 bits) 16 | val ac = out Bits (18 bits) 17 | 18 | val enPrefetch = in Bool() 19 | val enFetch = in Bool() 20 | val clrPrefetch = in Vec(Bool(), length) 21 | } 22 | 23 | import DSP48E2ConfigMode._ 24 | import DSP48E2ConfigABCD._ 25 | import DSP48E2ConfigWXYZ._ 26 | 27 | val latency = length + 4 - 1 28 | 29 | val attrs = Array.fill(length)(new DSP48E2Attributes) 30 | 31 | for (i <- 0 until length) { 32 | set_mul_attr(attrs(i)) 33 | inmode.set_static_inmode_attr(attrs(i)) 34 | opmode.set_static_opmode_attr(attrs(i)) 35 | ad_pack.set_ad_pack_attr(attrs(i)) 36 | b.set_static_b_input_attr(attrs(i), 1) 37 | // ad_pack.set_pingpong_b_attr(attrs(i)) 38 | c.set_mute_c_attr(attrs(i)) 39 | if (i != 0) b.set_b_cascade(attrs(i)) 40 | } 41 | 42 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 43 | val breg = Vec(Bits(8 bits), length) 44 | val enPrefetchChain = Vec(Bool(), length) 45 | val enFetchChain = Vec(Bool(), length) 46 | 47 | breg.foreach(_.setAsReg()) 48 | 49 | for (i <- 0 until length) { 50 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 51 | inmode.assign_default(dsp48e2s(i)) 52 | opmode.assign_static_opmode_ctrl(dsp48e2s(i)) 53 | w.w_sel_c(dsp48e2s(i), False) 54 | x.x_sel_m(dsp48e2s(i), True) 55 | y.y_sel_m(dsp48e2s(i), True) 56 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 57 | ad_pack.assign_ad_pack_ctrl(dsp48e2s(i)) 58 | // ad_pack.assign_pingpong_b_ctrl(dsp48e2s(i), enPrefetchChain(i), enFetchChain(i)) 59 | dsp48e2s(i).CEs.B1 := enFetchChain(i) 60 | inmode.assign_inmode_b(dsp48e2s(i), high4b1 = True) 61 | 62 | c.assign_mute_c_ctrl(dsp48e2s(i)) 63 | assign_m_ctrl(dsp48e2s(i), ce = True, rst = False) 64 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 65 | 66 | dsp48e2s(i).DATAIN.B := breg(i).asSInt.resize(18).asBits 67 | dsp48e2s(i).DATAIN.A := io.b(i).asSInt.resize(30).asBits 68 | dsp48e2s(i).DATAIN.D := io.c(i).asSInt.expand ## B(27 - 9 bits, default -> false) 69 | 70 | if (i == 0) { 71 | // dsp48e2s(i).DATAIN.B := io.a.asSInt.resize(18).asBits 72 | when(enPrefetchChain(i)){ 73 | breg(i) := io.a 74 | } 75 | 76 | enPrefetchChain(i) := io.enPrefetch 77 | enFetchChain(i) := io.enFetch 78 | } 79 | else { 80 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 81 | // dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 82 | when(enPrefetchChain(i)){ 83 | breg(i) := breg(i - 1) 84 | } 85 | 86 | enPrefetchChain(i).setAsReg() 87 | enFetchChain(i).setAsReg().init(False) 88 | enPrefetchChain(i) := enPrefetchChain(i - 1) 89 | enFetchChain(i) := enFetchChain(i - 1) 90 | enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 91 | } 92 | } 93 | 94 | val P = dsp48e2s.last.DATAOUT.P 95 | val abRes = P(17 downto 0).asBits 96 | val abNeg = B"0" ## abRes.msb 97 | val acRes = P(35 downto 18).asSInt 98 | 99 | io.ab := abRes 100 | io.ac := acRes.asBits 101 | } 102 | -------------------------------------------------------------------------------- /src/test/scala/eval_int12_ws_AB_C_P.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.dualCascade.int12_ws_AB_C_P 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int12_ws_AB_C_P extends App { 12 | 13 | val pass = 8 14 | val reuse = 16 15 | val vecLength = 4 16 | val sampleLength = pass * reuse 17 | 18 | val a = Array.fill(pass)(Array.fill(vecLength)(Array.fill(4)(Random.nextInt(256) - 128))) 19 | val b = Array.fill(pass)(Array.fill(vecLength)(Array.fill(4)(Random.nextInt(256) - 128))) 20 | val aSel = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(2)))) 21 | val bSel = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(2)))) 22 | 23 | val ab = for (p <- 0 until pass) yield { 24 | for (r <- 0 until reuse) yield { 25 | for (s <- 0 until 4) yield { 26 | (for (v <- 0 until vecLength) yield { 27 | aSel(p)(r)(v) * a(p)(v)(s) + bSel(p)(r)(v) * b(p)(v)(s) 28 | }).sum 29 | } 30 | } 31 | } 32 | 33 | SimConfig.withFstWave 34 | .addRtl("data/sim/DSP48E2.v") 35 | .compile(new int12_ws_AB_C_P(vecLength, 8)) 36 | .doSimUntilVoid { dut => 37 | import dut._ 38 | 39 | io.a.foreach(_ #= 0) 40 | io.b.foreach(_ #= 0) 41 | io.aSel.foreach(_ #= 0) 42 | io.bSel.foreach(_ #= 0) 43 | io.enPrefetch #= false 44 | io.enFetch #= false 45 | io.clrPrefetch.foreach(_ #= true) 46 | 47 | clockDomain.forkStimulus(10) 48 | clockDomain.waitSampling(32) 49 | 50 | def preLoad(p: Int) = { 51 | io.enPrefetch #= false 52 | for (v <- 0 until vecLength) { 53 | for (s <- 0 until 4) { 54 | io.a(s) #= a(p)(vecLength - v - 1)(s) & 0xff 55 | io.b(s) #= b(p)(vecLength - v - 1)(s) & 0xff 56 | } 57 | io.enPrefetch #= true 58 | if (v == vecLength - 1) io.clrPrefetch.foreach(_ #= true) 59 | else io.clrPrefetch.foreach(_ #= false) 60 | clockDomain.waitSampling() 61 | } 62 | io.enPrefetch #= false 63 | io.clrPrefetch.foreach(_ #= false) 64 | } 65 | 66 | def load() = { 67 | io.enFetch #= true 68 | clockDomain.waitSampling() 69 | io.enFetch #= false 70 | } 71 | 72 | def procedure(p: Int) = { 73 | 74 | for (i <- 0 until reuse + latency) { 75 | for (j <- 0 until vecLength) { 76 | if (j <= i && i < reuse + j) { 77 | io.aSel(j) #= aSel(p)(i - j)(j) 78 | io.bSel(j) #= bSel(p)(i - j)(j) 79 | } 80 | } 81 | if (i > latency) { 82 | val index = i - latency - 1 83 | 84 | for (s <- 0 until 4) { 85 | assert(((io.ab(s).toInt << 20) >> 20) == ab(p)(index)(s)) 86 | } 87 | } 88 | clockDomain.waitSampling(1) 89 | } 90 | } 91 | 92 | preLoad(0) 93 | 94 | for (pp <- 0 until pass) { 95 | fork { 96 | load() 97 | } 98 | fork { 99 | procedure(pp) 100 | } 101 | if (pp < pass - 1) { 102 | fork { 103 | preLoad(pp + 1) 104 | } 105 | } 106 | clockDomain.waitSampling(reuse) 107 | } 108 | clockDomain.waitSampling(128) 109 | simSuccess() 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/scala/example/b1024_pe.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | import xilinx.DSP48E2IntArithmetic.cascade.int8_dotp 5 | import xilinx.DSP48E2IntArithmetic.standalone.dpuczdx8g_acc 6 | 7 | import scala.language.postfixOps 8 | 9 | class b1024_pe( 10 | clkx1: ClockDomain, 11 | length: Int, 12 | isLastPEVertical: Boolean = false, 13 | isLastPEHorizontal: Boolean = false 14 | ) extends Component { 15 | 16 | case class inpBdl() extends Bundle { 17 | val inVld = Bool() 18 | val accVld = Bool() 19 | val biasRst = Bool() 20 | val bias0 = Bits(26 bits) 21 | val bias1 = Bits(26 bits) 22 | val bias2 = Bits(26 bits) 23 | val bias3 = Bits(26 bits) 24 | } 25 | 26 | case class outBdl() extends Bundle { 27 | val out0 = Bits(29 bits) 28 | val out1 = Bits(29 bits) 29 | val out2 = Bits(29 bits) 30 | val out3 = Bits(29 bits) 31 | } 32 | 33 | case class vertBdl() extends Bundle { 34 | val a0 = Vec(Bits(8 bits), length) 35 | val a1 = Vec(Bits(8 bits), length) 36 | } 37 | 38 | case class horizBdl() extends Bundle { 39 | val b0 = Vec(Bits(8 bits), length) 40 | val b1 = Vec(Bits(8 bits), length) 41 | val c0 = Vec(Bits(8 bits), length) 42 | val c1 = Vec(Bits(8 bits), length) 43 | } 44 | 45 | val inp = new inpBdl().asInput() 46 | val srcV = new vertBdl().asInput() 47 | val srcH = new horizBdl().asInput() 48 | val out = new outBdl().asOutput() 49 | 50 | val dstV = if (!isLastPEVertical) new vertBdl().asOutput() else null 51 | val dstH = if (!isLastPEHorizontal) new horizBdl().asOutput() else null 52 | 53 | val srcVDly = RegNext(srcV) 54 | val srcHDly = RegNext(srcH) 55 | 56 | if (!isLastPEVertical) dstV := srcVDly 57 | if (!isLastPEHorizontal) dstH := srcHDly 58 | 59 | val chain0 = new int8_dotp(length) 60 | val chain1 = new int8_dotp(length) 61 | 62 | chain0.io.a := srcVDly.a0 63 | chain0.io.b := srcHDly.b0 64 | chain0.io.c := srcHDly.c0 65 | 66 | chain1.io.a := srcVDly.a1 67 | chain1.io.b := srcHDly.b1 68 | chain1.io.c := srcHDly.c1 69 | 70 | val ab0 = RegNext(chain0.io.ab) addTag crossClockDomain 71 | val ab1 = RegNext(chain1.io.ab) addTag crossClockDomain 72 | val ab0Dly = RegNext(RegNext(chain0.io.ab)) addTag crossClockDomain 73 | val ab1Dly = RegNext(RegNext(chain1.io.ab)) addTag crossClockDomain 74 | 75 | val ac0 = RegNext(chain0.io.ac) addTag crossClockDomain 76 | val ac1 = RegNext(chain1.io.ac) addTag crossClockDomain 77 | val ac0Dly = RegNext(RegNext(chain0.io.ac)) addTag crossClockDomain 78 | val ac1Dly = RegNext(RegNext(chain1.io.ac)) addTag crossClockDomain 79 | 80 | val x1 = new ClockingArea(clkx1) { 81 | val acc0 = new dpuczdx8g_acc() 82 | acc0.io.inVld := inp.inVld 83 | acc0.io.accVld := inp.accVld 84 | acc0.io.biasRst := inp.biasRst 85 | acc0.io.bias0 := inp.bias0 86 | acc0.io.bias1 := inp.bias1 87 | out.out0 := acc0.io.out0 88 | out.out1 := acc0.io.out1 89 | acc0.io.p0 := ab0 90 | acc0.io.p1 := ab1 91 | acc0.io.p2 := ab0Dly 92 | acc0.io.p3 := ab1Dly 93 | 94 | val acc1 = new dpuczdx8g_acc() 95 | acc1.io.inVld := inp.inVld 96 | acc1.io.accVld := inp.accVld 97 | acc1.io.biasRst := inp.biasRst 98 | acc1.io.bias0 := inp.bias2 99 | acc1.io.bias1 := inp.bias3 100 | out.out2 := acc1.io.out0 101 | out.out3 := acc1.io.out1 102 | acc1.io.p0 := ac0 103 | acc1.io.p1 := ac1 104 | acc1.io.p2 := ac0Dly 105 | acc1.io.p3 := ac1Dly 106 | } 107 | } 108 | 109 | object pe extends App { 110 | SpinalVerilog(new b1024_pe(ClockDomain.external("clkx1"), 4)) 111 | SpinalVerilog(new ehb1024_pe(ClockDomain.external("clkx2"), 4)) 112 | } 113 | -------------------------------------------------------------------------------- /src/main/scala/example/int12_ws_ab_c_p_clb.scala: -------------------------------------------------------------------------------- 1 | package example 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | import xilinx.DSP48E2._ 6 | 7 | import scala.language.postfixOps 8 | 9 | class int12_ws_ab_c_p_clb(length: Int, width: Int) extends Component { 10 | 11 | val io = new Bundle { 12 | val aSel = in Vec(Bits(1 bits), length) 13 | val bSel = in Vec(Bits(1 bits), length) 14 | val a = in Vec(Bits(width bits), 4) 15 | val b = in Vec(Bits(width bits), 4) 16 | val ab = out Vec(Bits(12 bits), 4) 17 | 18 | val enPrefetch = in Bool() 19 | val enFetch = in Bool() 20 | val clrPrefetch = in Vec(Bool(), length) 21 | } 22 | 23 | import DSP48E2ConfigMode._ 24 | import DSP48E2ConfigABCD._ 25 | import DSP48E2ConfigWXYZ._ 26 | 27 | require(width <= 12) 28 | val latency = length + 2 - 1 29 | 30 | val attrs = Array.fill(length)(new DSP48E2Attributes) 31 | 32 | for (i <- 0 until length) { 33 | set_alu_attr(attrs(i), simd = 4) 34 | inmode.set_static_inmode_attr(attrs(i)) 35 | opmode.set_dynamic_opmode_attr(attrs(i)) 36 | ab_concat.set_ab_concat_attr(attrs(i)) 37 | c.set_c_input_attr(attrs(i)) 38 | d.set_mute_d_attr(attrs(i)) 39 | if (i != 0) { 40 | a.set_a_cascade(attrs(i)) 41 | b.set_b_cascade(attrs(i)) 42 | } 43 | } 44 | 45 | val dsp48e2s = attrs.map(attr => new DSP48E2(attr)) 46 | val enPrefetchChain = Vec(Bool(), length) 47 | val enFetchChain = Vec(Bool(), length) 48 | val aBits = io.a.asBits 49 | val bBits = io.b.asBits 50 | 51 | val abPortDataChain = Vec(Bits(width * 4 bits), length) 52 | val cPortDataChain = Vec(Bits(width * 4 bits), length) 53 | 54 | for (i <- 0 until length) { 55 | inmode.assign_static_inmode_ctrl(dsp48e2s(i)) 56 | inmode.assign_default(dsp48e2s(i)) 57 | opmode.assign_dynamic_opmode_ctrl(dsp48e2s(i)) 58 | w.w_sel_p(dsp48e2s(i), False) 59 | x.x_sel_ab(dsp48e2s(i), io.aSel(i).msb) 60 | y.y_sel_c(dsp48e2s(i), io.bSel(i).msb) 61 | z.z_sel_pcin(dsp48e2s(i), if (i != 0) True else False) 62 | ab_concat.assign_ab_concat_ctrl(dsp48e2s(i), enFetchChain(i)) 63 | c.assign_c_input_ctrl(dsp48e2s(i), enFetchChain(i)) 64 | assign_m_ctrl(dsp48e2s(i)) 65 | assign_p_ctrl(dsp48e2s(i), ce = True, rst = False) 66 | 67 | val ab = abPortDataChain(i). 68 | subdivideIn(4 slices). 69 | map(_.asSInt.resize(12 bits)). 70 | asBits() 71 | 72 | dsp48e2s(i).DATAIN.A := ab.drop(18) 73 | dsp48e2s(i).DATAIN.B := ab.take(18) 74 | 75 | dsp48e2s(i).DATAIN.C := cPortDataChain(i). 76 | subdivideIn(4 slices). 77 | map(_.asSInt.resize(12 bits)). 78 | asBits() 79 | 80 | abPortDataChain(i).setAsReg() 81 | cPortDataChain(i).setAsReg() 82 | if (i == 0) { 83 | // dsp48e2s(i).DATAIN.A := aBits.drop(18) 84 | // dsp48e2s(i).DATAIN.B := aBits.take(18) 85 | 86 | enPrefetchChain(i) := io.enPrefetch 87 | enFetchChain(i) := io.enFetch 88 | 89 | when(enPrefetchChain(i))(abPortDataChain(i) := aBits) 90 | when(enPrefetchChain(i))(cPortDataChain(i) := bBits) 91 | } 92 | else { 93 | dsp48e2s(i).CASCDATAIN.P := dsp48e2s(i - 1).CASCDATAOUT.P 94 | // dsp48e2s(i).CASCDATAIN.A := dsp48e2s(i - 1).CASCDATAOUT.A 95 | // dsp48e2s(i).CASCDATAIN.B := dsp48e2s(i - 1).CASCDATAOUT.B 96 | 97 | enPrefetchChain(i).setAsReg() 98 | enFetchChain(i).setAsReg().init(False) 99 | enPrefetchChain(i) := enPrefetchChain(i - 1) 100 | enFetchChain(i) := enFetchChain(i - 1) 101 | enPrefetchChain(i).clearWhen(io.clrPrefetch(i)) 102 | 103 | when(enPrefetchChain(i))(abPortDataChain(i) := abPortDataChain(i - 1)) 104 | when(enPrefetchChain(i))(cPortDataChain(i) := cPortDataChain(i - 1)) 105 | } 106 | } 107 | 108 | io.ab.assignFromBits(dsp48e2s.last.DATAOUT.P) 109 | } 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About SpinalDLA 2 | SpinalDLA is an RTL design library for deep learning accelerators based on SpinalHDL, aiming to provide agile development tools for hardware developers specializing in customized hardware. ```(still under development)``` 3 | 4 | SpinalDLA integrates the following features: 5 | 6 | - flexible and versatile DSP-based arithmetic units on Xilinx FPGAs ```(coming soon)``` 7 | - very useful memory controller for deep learning applications ```(coming soon)``` 8 | - versatile systolic array instantiation templates ```(coming soon)``` 9 | - agile simulation for deep neural networks ```(coming soon)``` 10 | - several deep learning accelerator design examples ```(coming soon)``` 11 | - and more... 12 | 13 | # Plug-and-Play RTL Modules 14 | 15 | This library provides users with a set of plug-and-play RTL modules, which does not require users to have a deep understanding of SpinalHDL. Users can simply instantiate the modules and connect them together to build their own deep learning accelerators. 16 | 17 | Located at ```verilog/``` directory, the RTL modules are written in Verilog, which can be easily integrated into any existing RTL design flow. 18 | 19 | The corresponding simulation waveforms are located at ```fst/``` directory, which can be viewed by [GTKWave](http://gtkwave.sourceforge.net/). 20 | 21 | The table is listed below: 22 | 23 | | Module Name | Description | 24 | |:------------------------|:-----------------------------------------------------------------------------------------------------------------| 25 | | ```int_8_mul.v``` | Xilinx INT8 Packing Technique | 26 | | ```unt_4_mul.v``` | Xilinx INT4 Packing Technique | 27 | | ```int12_xadd.v``` | Spike-Based Synaptic Operation | 28 | | ```int8_dotp.v``` | Xilinx INT8 Dot Product Chain | 29 | | ```int8_ws_B_P.v``` | INT8 Weight Stationary Systolic Array Chain with B and P Cascade, B path is used by In-DSP Operand Prefetching. | 30 | | ```int8_ws_AD_B.v``` | INT8 Weight Stationary Systolic Array Chain with AD and B Cascade, B path is used by In-DSP Operand Prefetching. | 31 | | ```int16_dotp.v``` | INT16 Dot Product Chain | 32 | | ```int16_dotp_ddr.v``` | INT16 Dot Product Chain with In-DSP Time-Multiplexing | 33 | | ```int16_ws_B_P.v``` | INT16 Weight Stationary Systolic Array Chain with B and P Cascade | 34 | | ```int16_os_B_P.v``` | INT16 Output Stationary Systolic Array Chain with B and P Cascade, P path is used by Partial Sums Offloading. | 35 | | ```int24_acc_scale.v``` | SIMD=2 Accumulate then Scale Operation | | 36 | 37 | 38 | # How to Use it 39 | This library is based on SpinalHDL with SBT build. ```(still under development)``` 40 | 41 | 1. Add sbt-github-packages plugin to enable sbt to consume the package. Add this line to your `./project/plugins.sbt` file: 42 | ``` 43 | addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.2") 44 | ``` 45 | 2. Add the following lines to your `./build.sbt` file: 46 | ``` 47 | githubTokenSource := TokenSource.GitConfig("github.token") 48 | resolvers += Resolver.githubPackages("adamgallas", "SpinalDLA") 49 | libraryDependencies += "casia" %% "SpinalDLA" % "0.3" 50 | ``` 51 | 52 | 3. Rebuid your project with `sbt compile`. 53 | -------------------------------------------------------------------------------- /src/test/scala/eval_int8_ws_AD_B.scala: -------------------------------------------------------------------------------- 1 | import spinal.core._ 2 | import spinal.core.sim._ 3 | import spinal.lib._ 4 | import spinal.lib.eda.bench.Rtl 5 | import xilinx.DSP48E2._ 6 | import xilinx.DSP48E2IntArithmetic.dualCascade.int8_ws_AD_B 7 | 8 | import scala.language.postfixOps 9 | import scala.util.Random 10 | 11 | object eval_int8_ws_AD_B extends App { 12 | 13 | SpinalVerilog(new int8_ws_AD_B(8)) 14 | 15 | val pass = 8 16 | val reuse = 16 17 | val vecLength = 8 18 | 19 | val a = Array.fill(pass)(Array.fill(vecLength)(Random.nextInt(256) - 128)) 20 | val b = Array.fill(pass)(Array.fill(reuse)(Random.nextInt(256) - 128)) 21 | val c = Array.fill(pass)(Array.fill(reuse)(Random.nextInt(256) - 128)) 22 | val abIn = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(256) - 128))) 23 | val acIn = Array.fill(pass)(Array.fill(reuse)(Array.fill(vecLength)(Random.nextInt(256) - 128))) 24 | 25 | val abOut = for (p <- 0 until pass) yield { 26 | for (r <- 0 until reuse) yield { 27 | for (v <- 0 until vecLength) yield { 28 | abIn(p)(r)(v) + a(p)(v) * b(p)(r) 29 | } 30 | } 31 | } 32 | 33 | val acOut = for (p <- 0 until pass) yield { 34 | for (r <- 0 until reuse) yield { 35 | for (v <- 0 until vecLength) yield { 36 | acIn(p)(r)(v) + a(p)(v) * c(p)(r) 37 | } 38 | } 39 | } 40 | 41 | SimConfig.withFstWave 42 | .addRtl("data/sim/DSP48E2.v") 43 | .compile(new int8_ws_AD_B(vecLength)) 44 | .doSimUntilVoid { dut => 45 | import dut._ 46 | 47 | io.a #= 0 48 | io.b #= 0 49 | io.c #= 0 50 | io.abIn.foreach(_ #= 0) 51 | io.acIn.foreach(_ #= 0) 52 | io.enPrefetch #= false 53 | io.enFetch #= false 54 | io.clrPrefetch.foreach(_ #= true) 55 | 56 | clockDomain.forkStimulus(10) 57 | clockDomain.waitSampling(32) 58 | 59 | def preLoad(p: Int) = { 60 | io.enPrefetch #= false 61 | for (v <- 0 until vecLength) { 62 | io.a #= a(p)(vecLength - v - 1) & 0xff 63 | io.enPrefetch #= true 64 | if (v == vecLength - 1) io.clrPrefetch.foreach(_ #= true) 65 | else io.clrPrefetch.foreach(_ #= false) 66 | clockDomain.waitSampling() 67 | } 68 | io.enPrefetch #= false 69 | io.clrPrefetch.foreach(_ #= false) 70 | } 71 | 72 | def load() = { 73 | io.enFetch #= true 74 | clockDomain.waitSampling() 75 | io.enFetch #= false 76 | } 77 | 78 | def assign_psum(p: Int) = { 79 | clockDomain.waitSampling(2) 80 | for (i <- 0 until reuse + vecLength) { 81 | for (j <- 0 until vecLength) { 82 | if (j <= i && i < reuse + j) { 83 | io.abIn(j) #= abIn(p)(i - j)(j) & 0x3ffff 84 | io.acIn(j) #= acIn(p)(i - j)(j) & 0x3ffff 85 | } 86 | } 87 | clockDomain.waitSampling(1) 88 | } 89 | } 90 | 91 | def assign_input(p: Int) = { 92 | for (i <- 0 until reuse) { 93 | io.b #= b(p)(i) & 0xff 94 | io.c #= c(p)(i) & 0xff 95 | clockDomain.waitSampling(1) 96 | } 97 | } 98 | 99 | def check_output(p: Int) = { 100 | clockDomain.waitSampling(5) 101 | for (i <- 0 until reuse + vecLength) { 102 | println(i) 103 | for (j <- 0 until vecLength) { 104 | if (j <= i && i < reuse + j) { 105 | 106 | println((io.abOut(j).toInt << 14) >> 14, abOut(p)(i - j)(j)) 107 | println((io.acOut(j).toInt << 14) >> 14, acOut(p)(i - j)(j)) 108 | } 109 | } 110 | clockDomain.waitSampling(1) 111 | } 112 | } 113 | 114 | preLoad(0) 115 | 116 | for (pp <- 0 until pass) { 117 | fork { 118 | load() 119 | } 120 | fork { 121 | assign_psum(pp) 122 | } 123 | fork { 124 | assign_input(pp) 125 | } 126 | fork { 127 | check_output(pp) 128 | } 129 | if (pp < pass - 1) { 130 | fork { 131 | preLoad(pp + 1) 132 | } 133 | } 134 | clockDomain.waitSampling(reuse) 135 | } 136 | 137 | clockDomain.waitSampling(128) 138 | simSuccess() 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2/DSP48E2.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2 2 | 3 | import spinal.core._ 4 | 5 | import scala.language.postfixOps 6 | 7 | /** 8 | * The DSP48E2 blackbox wrapper for SpinalHDL is inspired by the Chainsaw implementation: 9 | * 10 | * https://github.com/Chainsaw-Team/Chainsaw/tree/master/src/main/scala/Chainsaw/device 11 | */ 12 | 13 | case class DSP48E2INPUT() extends Bundle { 14 | val A = in Bits (30 bits) 15 | val B = in Bits (18 bits) 16 | val C = in Bits (48 bits) 17 | val D = in Bits (27 bits) 18 | val CARRYIN = in Bits (1 bits) 19 | val all = Seq(A, B, C, D, CARRYIN) 20 | } 21 | 22 | case class DSP48E2OUTPUT() extends Bundle { 23 | val P = out Bits (48 bits) 24 | val CARRYOUT = out Bits (4 bits) 25 | val XOROUT = out Bits (8 bits) 26 | val OVERFLOW, UNDERFLOW = out Bool() 27 | val PATTERNBDETECT, PATTERNDETECT = out Bool() 28 | } 29 | 30 | case class DSP48E2CONTROL() extends Bundle { 31 | val ALUMODE = Bits(4 bits) 32 | val INMODE = Bits(5 bits) 33 | val OPMODE = Bits(9 bits) 34 | val CARRYINSEL = Bits(3 bits) 35 | val all = Seq(ALUMODE, INMODE, OPMODE, CARRYINSEL) 36 | } 37 | 38 | case class DSP48E2CASC() extends Bundle { 39 | val A = Bits(30 bits) 40 | val B = Bits(18 bits) 41 | val P = Bits(48 bits) 42 | val CARRYCAS = Bits(1 bits) 43 | val MULTSIGN = Bits(1 bits) 44 | val all = Seq(A, B, P, CARRYCAS) 45 | 46 | def setAsCascadeOut(): Unit = { 47 | all.foreach(signal => signal.setName(signal.getPartialName() + "COUT")) 48 | this.MULTSIGN.setName("MULTSIGNOUT") 49 | } 50 | 51 | def setAsCascadeIn(): Unit = { 52 | all.foreach(signal => signal.setName(signal.getPartialName() + "CIN")) 53 | this.MULTSIGN.setName("MULTSIGNIN") 54 | } 55 | } 56 | 57 | case class DSP48E2CEs() extends Bundle { 58 | val A1, A2, B1, B2, C, D, AD, M, P, CARRYIN, CTRL, INMODE, ALUMODE = Bool() 59 | val all = Seq(A1, A2, B1, B2, C, D, AD, M, P, CARRYIN, CTRL, INMODE, ALUMODE) 60 | all.foreach(signal => signal.setName("CE" + signal.getPartialName())) 61 | } 62 | 63 | case class DSP48E2RSTs() extends Bundle { 64 | val A, B, C, D, M, P, ALLCARRYIN, CTRL, INMODE, ALUMODE = Bool() 65 | val all = Seq(A, B, C, D, M, P, ALLCARRYIN, CTRL, INMODE, ALUMODE) 66 | all.foreach(signal => signal.setName("RST" + signal.getPartialName())) 67 | } 68 | 69 | class DSP48E2Attributes() { 70 | 71 | var A_INPUT = "DIRECT" 72 | var B_INPUT = "DIRECT" 73 | var AMULTSEL = "A" 74 | var BMULTSEL = "B" 75 | var PREADDINSEL = "A" 76 | 77 | var USE_MULT = "MULTIPLY" 78 | var USE_SIMD = "ONE48" 79 | 80 | var AREG, BREG, CREG, DREG, ADREG, MREG, PREG = 1 81 | var ACASCREG, BCASCREG = 1 82 | var CARRYINREG, CARRYINSELREG = 1 83 | var INMODEREG, OPMODEREG, ALUMODEREG = 1 84 | 85 | def generics = Seq( 86 | "A_INPUT" -> A_INPUT, 87 | "B_INPUT" -> B_INPUT, 88 | "AMULTSEL" -> AMULTSEL, 89 | "BMULTSEL" -> BMULTSEL, 90 | "PREADDINSEL" -> PREADDINSEL, 91 | 92 | "USE_MULT" -> USE_MULT, 93 | "USE_SIMD" -> USE_SIMD, 94 | 95 | "AREG" -> AREG, 96 | "BREG" -> BREG, 97 | "CREG" -> CREG, 98 | "DREG" -> DREG, 99 | "ADREG" -> ADREG, 100 | "MREG" -> MREG, 101 | "PREG" -> PREG, 102 | 103 | "ACASCREG" -> ACASCREG, 104 | "BCASCREG" -> BCASCREG, 105 | 106 | "CARRYINREG" -> CARRYINREG, 107 | "CARRYINSELREG" -> CARRYINSELREG, 108 | 109 | "INMODEREG" -> INMODEREG, 110 | "OPMODEREG" -> OPMODEREG, 111 | "ALUMODEREG" -> ALUMODEREG 112 | ) 113 | } 114 | 115 | class DSP48E2(attrs: DSP48E2Attributes) extends BlackBox { 116 | addGenerics(attrs.generics: _*) 117 | val CLK = in Bool() 118 | val INST = in(DSP48E2CONTROL()) 119 | 120 | val CASCDATAIN = in(DSP48E2CASC()) 121 | val CASCDATAOUT = out(DSP48E2CASC()) 122 | 123 | val CEs = in(DSP48E2CEs()) 124 | val RSTs = in(DSP48E2RSTs()) 125 | 126 | val DATAIN = in(DSP48E2INPUT()) 127 | val DATAOUT = out(DSP48E2OUTPUT()) 128 | 129 | INST.setName("") 130 | DATAIN.setName("") 131 | DATAOUT.setName("") 132 | CASCDATAOUT.setAsCascadeOut() 133 | CASCDATAIN.setAsCascadeIn() 134 | 135 | CEs.all.foreach(_.default(False)) 136 | RSTs.all.foreach(_.default(False)) 137 | DATAIN.all.foreach(s => s.default(s.getZero)) 138 | INST.all.foreach(s => s.default(s.getZero)) 139 | 140 | CASCDATAIN.all.foreach(s => s.default(s.getZero)) 141 | CASCDATAIN.MULTSIGN.default(CASCDATAIN.MULTSIGN.getZero) 142 | 143 | mapClockDomain(clock = CLK) 144 | } 145 | -------------------------------------------------------------------------------- /verilog/xilinx/DSP48E2Arithmetic/int16_mul.v: -------------------------------------------------------------------------------- 1 | // Generator : SpinalHDL v1.9.0 git head : 7d30dbacbd3aa1be42fb2a3d4da5675703aae2ae 2 | // Component : int16_mul 3 | // Git hash : 58987f7f234d2f22202f16d8122fac665f2828cb 4 | 5 | `timescale 1ns/1ps 6 | 7 | module int16_mul ( 8 | input [15:0] io_a, 9 | input [15:0] io_b, 10 | output [31:0] io_ab, 11 | input clk 12 | ); 13 | 14 | reg [4:0] dsp_INMODE; 15 | reg [8:0] dsp_OPMODE; 16 | wire [29:0] dsp_A; 17 | wire [17:0] dsp_B; 18 | wire [29:0] dsp_ACOUT; 19 | wire [17:0] dsp_BCOUT; 20 | wire [47:0] dsp_PCOUT; 21 | wire [0:0] dsp_CARRYCASCOUT; 22 | wire [0:0] dsp_MULTSIGNOUT; 23 | wire [47:0] dsp_P; 24 | wire [3:0] dsp_CARRYOUT; 25 | wire [7:0] dsp_XOROUT; 26 | wire dsp_OVERFLOW; 27 | wire dsp_UNDERFLOW; 28 | wire dsp_PATTERNBDETECT; 29 | wire dsp_PATTERNDETECT; 30 | wire [29:0] tmp_A; 31 | wire [15:0] tmp_A_1; 32 | wire [17:0] tmp_B; 33 | wire [15:0] tmp_B_1; 34 | wire tmp_OPMODE; 35 | function [4:0] zz_dsp_INMODE(input dummy); 36 | begin 37 | zz_dsp_INMODE[1] = 1'b0; 38 | zz_dsp_INMODE[2] = 1'b1; 39 | zz_dsp_INMODE[3] = 1'b0; 40 | zz_dsp_INMODE[0] = 1'b0; 41 | zz_dsp_INMODE[4] = 1'b0; 42 | end 43 | endfunction 44 | wire [4:0] tmp_1; 45 | 46 | assign tmp_A_1 = io_b; 47 | assign tmp_A = {{14{tmp_A_1[15]}}, tmp_A_1}; 48 | assign tmp_B_1 = io_a; 49 | assign tmp_B = {{2{tmp_B_1[15]}}, tmp_B_1}; 50 | DSP48E2 #( 51 | .A_INPUT("DIRECT"), 52 | .B_INPUT("DIRECT"), 53 | .AMULTSEL("A"), 54 | .BMULTSEL("B"), 55 | .PREADDINSEL("A"), 56 | .USE_MULT("MULTIPLY"), 57 | .USE_SIMD("ONE48"), 58 | .AREG(2), 59 | .BREG(2), 60 | .CREG(1), 61 | .DREG(1), 62 | .ADREG(1), 63 | .MREG(1), 64 | .PREG(1), 65 | .ACASCREG(1), 66 | .BCASCREG(1), 67 | .CARRYINREG(1), 68 | .CARRYINSELREG(1), 69 | .INMODEREG(0), 70 | .OPMODEREG(0), 71 | .ALUMODEREG(1) 72 | ) dsp ( 73 | .CLK (clk ), //i 74 | .ALUMODE (4'b0000 ), //i 75 | .INMODE (dsp_INMODE[4:0] ), //i 76 | .OPMODE (dsp_OPMODE[8:0] ), //i 77 | .CARRYINSEL (3'b000 ), //i 78 | .ACIN (30'h00000000 ), //i 79 | .BCIN (18'h00000 ), //i 80 | .PCIN (48'h000000000000 ), //i 81 | .CARRYCASCIN (1'b0 ), //i 82 | .MULTSIGNIN (1'b0 ), //i 83 | .ACOUT (dsp_ACOUT[29:0] ), //o 84 | .BCOUT (dsp_BCOUT[17:0] ), //o 85 | .PCOUT (dsp_PCOUT[47:0] ), //o 86 | .CARRYCASCOUT (dsp_CARRYCASCOUT ), //o 87 | .MULTSIGNOUT (dsp_MULTSIGNOUT ), //o 88 | .CEA1 (1'b1 ), //i 89 | .CEA2 (1'b1 ), //i 90 | .CEB1 (1'b1 ), //i 91 | .CEB2 (1'b1 ), //i 92 | .CEC (1'b0 ), //i 93 | .CED (1'b0 ), //i 94 | .CEAD (1'b0 ), //i 95 | .CEM (1'b1 ), //i 96 | .CEP (1'b1 ), //i 97 | .CECARRYIN (1'b0 ), //i 98 | .CECTRL (1'b0 ), //i 99 | .CEINMODE (1'b0 ), //i 100 | .CEALUMODE (1'b0 ), //i 101 | .RSTA (1'b0 ), //i 102 | .RSTB (1'b0 ), //i 103 | .RSTC (1'b0 ), //i 104 | .RSTD (1'b0 ), //i 105 | .RSTM (1'b0 ), //i 106 | .RSTP (1'b0 ), //i 107 | .RSTALLCARRYIN (1'b0 ), //i 108 | .RSTCTRL (1'b0 ), //i 109 | .RSTINMODE (1'b0 ), //i 110 | .RSTALUMODE (1'b0 ), //i 111 | .A (dsp_A[29:0] ), //i 112 | .B (dsp_B[17:0] ), //i 113 | .C (48'hffffffffffff ), //i 114 | .D (27'h7ffffff ), //i 115 | .CARRYIN (1'b0 ), //i 116 | .P (dsp_P[47:0] ), //o 117 | .CARRYOUT (dsp_CARRYOUT[3:0] ), //o 118 | .XOROUT (dsp_XOROUT[7:0] ), //o 119 | .OVERFLOW (dsp_OVERFLOW ), //o 120 | .UNDERFLOW (dsp_UNDERFLOW ), //o 121 | .PATTERNBDETECT (dsp_PATTERNBDETECT), //o 122 | .PATTERNDETECT (dsp_PATTERNDETECT ) //o 123 | ); 124 | assign tmp_1 = zz_dsp_INMODE(1'b0); 125 | always @(*) dsp_INMODE = tmp_1; 126 | assign tmp_OPMODE = 1'b0; 127 | always @(*) begin 128 | dsp_OPMODE[8 : 7] = {tmp_OPMODE,tmp_OPMODE}; 129 | dsp_OPMODE[1 : 0] = {1'b0,1'b1}; 130 | dsp_OPMODE[3 : 2] = {1'b0,1'b1}; 131 | dsp_OPMODE[6 : 4] = {{1'b0,1'b0},1'b0}; 132 | end 133 | 134 | assign dsp_A = tmp_A; 135 | assign dsp_B = tmp_B; 136 | assign io_ab = dsp_P[31 : 0]; 137 | 138 | endmodule 139 | -------------------------------------------------------------------------------- /verilog/xilinx/DSP48E2Arithmetic/int24_acc.v: -------------------------------------------------------------------------------- 1 | // Generator : SpinalHDL v1.9.0 git head : 7d30dbacbd3aa1be42fb2a3d4da5675703aae2ae 2 | // Component : int24_acc 3 | // Git hash : 58987f7f234d2f22202f16d8122fac665f2828cb 4 | 5 | `timescale 1ns/1ps 6 | 7 | module int24_acc ( 8 | input [23:0] io_a, 9 | input [23:0] io_b, 10 | input [23:0] io_c, 11 | input [23:0] io_d, 12 | output [23:0] io_ab, 13 | output [23:0] io_cd, 14 | input io_valid, 15 | input io_last, 16 | input clk, 17 | input reset 18 | ); 19 | 20 | reg [4:0] dsp_INMODE; 21 | reg [8:0] dsp_OPMODE; 22 | wire [29:0] dsp_A; 23 | wire [17:0] dsp_B; 24 | wire [29:0] dsp_ACOUT; 25 | wire [17:0] dsp_BCOUT; 26 | wire [47:0] dsp_PCOUT; 27 | wire [0:0] dsp_CARRYCASCOUT; 28 | wire [0:0] dsp_MULTSIGNOUT; 29 | wire [47:0] dsp_P; 30 | wire [3:0] dsp_CARRYOUT; 31 | wire [7:0] dsp_XOROUT; 32 | wire dsp_OVERFLOW; 33 | wire dsp_UNDERFLOW; 34 | wire dsp_PATTERNBDETECT; 35 | wire dsp_PATTERNDETECT; 36 | reg accValid; 37 | wire tmp_OPMODE; 38 | wire [47:0] ac; 39 | wire [47:0] bd; 40 | function [4:0] zz_dsp_INMODE(input dummy); 41 | begin 42 | zz_dsp_INMODE[1] = 1'b0; 43 | zz_dsp_INMODE[2] = 1'b1; 44 | zz_dsp_INMODE[3] = 1'b0; 45 | zz_dsp_INMODE[0] = 1'b0; 46 | zz_dsp_INMODE[4] = 1'b0; 47 | end 48 | endfunction 49 | wire [4:0] tmp_1; 50 | 51 | DSP48E2 #( 52 | .A_INPUT("DIRECT"), 53 | .B_INPUT("DIRECT"), 54 | .AMULTSEL("A"), 55 | .BMULTSEL("B"), 56 | .PREADDINSEL("A"), 57 | .USE_MULT("NONE"), 58 | .USE_SIMD("TWO24"), 59 | .AREG(1), 60 | .BREG(1), 61 | .CREG(1), 62 | .DREG(1), 63 | .ADREG(1), 64 | .MREG(0), 65 | .PREG(1), 66 | .ACASCREG(1), 67 | .BCASCREG(1), 68 | .CARRYINREG(1), 69 | .CARRYINSELREG(1), 70 | .INMODEREG(0), 71 | .OPMODEREG(1), 72 | .ALUMODEREG(1) 73 | ) dsp ( 74 | .CLK (clk ), //i 75 | .ALUMODE (4'b0000 ), //i 76 | .INMODE (dsp_INMODE[4:0] ), //i 77 | .OPMODE (dsp_OPMODE[8:0] ), //i 78 | .CARRYINSEL (3'b000 ), //i 79 | .ACIN (30'h00000000 ), //i 80 | .BCIN (18'h00000 ), //i 81 | .PCIN (48'h000000000000 ), //i 82 | .CARRYCASCIN (1'b0 ), //i 83 | .MULTSIGNIN (1'b0 ), //i 84 | .ACOUT (dsp_ACOUT[29:0] ), //o 85 | .BCOUT (dsp_BCOUT[17:0] ), //o 86 | .PCOUT (dsp_PCOUT[47:0] ), //o 87 | .CARRYCASCOUT (dsp_CARRYCASCOUT ), //o 88 | .MULTSIGNOUT (dsp_MULTSIGNOUT ), //o 89 | .CEA1 (1'b0 ), //i 90 | .CEA2 (1'b1 ), //i 91 | .CEB1 (1'b0 ), //i 92 | .CEB2 (1'b1 ), //i 93 | .CEC (1'b1 ), //i 94 | .CED (1'b0 ), //i 95 | .CEAD (1'b0 ), //i 96 | .CEM (1'b0 ), //i 97 | .CEP (1'b1 ), //i 98 | .CECARRYIN (1'b0 ), //i 99 | .CECTRL (1'b1 ), //i 100 | .CEINMODE (1'b0 ), //i 101 | .CEALUMODE (1'b0 ), //i 102 | .RSTA (1'b0 ), //i 103 | .RSTB (1'b0 ), //i 104 | .RSTC (1'b0 ), //i 105 | .RSTD (1'b0 ), //i 106 | .RSTM (1'b0 ), //i 107 | .RSTP (1'b0 ), //i 108 | .RSTALLCARRYIN (1'b0 ), //i 109 | .RSTCTRL (1'b0 ), //i 110 | .RSTINMODE (1'b0 ), //i 111 | .RSTALUMODE (1'b0 ), //i 112 | .A (dsp_A[29:0] ), //i 113 | .B (dsp_B[17:0] ), //i 114 | .C (bd[47:0] ), //i 115 | .D (27'h7ffffff ), //i 116 | .CARRYIN (1'b0 ), //i 117 | .P (dsp_P[47:0] ), //o 118 | .CARRYOUT (dsp_CARRYOUT[3:0] ), //o 119 | .XOROUT (dsp_XOROUT[7:0] ), //o 120 | .OVERFLOW (dsp_OVERFLOW ), //o 121 | .UNDERFLOW (dsp_UNDERFLOW ), //o 122 | .PATTERNBDETECT (dsp_PATTERNBDETECT), //o 123 | .PATTERNDETECT (dsp_PATTERNDETECT ) //o 124 | ); 125 | assign tmp_1 = zz_dsp_INMODE(1'b0); 126 | always @(*) dsp_INMODE = tmp_1; 127 | always @(*) begin 128 | dsp_OPMODE[8 : 7] = {io_valid,io_valid}; 129 | dsp_OPMODE[1 : 0] = {io_valid,io_valid}; 130 | dsp_OPMODE[3 : 2] = {tmp_OPMODE,tmp_OPMODE}; 131 | dsp_OPMODE[6 : 4] = {{1'b0,accValid},1'b0}; 132 | end 133 | 134 | assign tmp_OPMODE = 1'b0; 135 | assign ac = {io_a,io_c}; 136 | assign bd = {io_b,io_d}; 137 | assign dsp_A = ac[47 : 18]; 138 | assign dsp_B = ac[17 : 0]; 139 | assign io_ab = dsp_P[47 : 24]; 140 | assign io_cd = dsp_P[23 : 0]; 141 | always @(posedge clk or posedge reset) begin 142 | if(reset) begin 143 | accValid <= 1'b0; 144 | end else begin 145 | if(io_valid) begin 146 | accValid <= 1'b1; 147 | end 148 | if(io_last) begin 149 | accValid <= 1'b0; 150 | end 151 | end 152 | end 153 | 154 | 155 | endmodule 156 | -------------------------------------------------------------------------------- /verilog/xilinx/DSP48E2Arithmetic/uint4_mul.v: -------------------------------------------------------------------------------- 1 | // Generator : SpinalHDL v1.9.0 git head : 7d30dbacbd3aa1be42fb2a3d4da5675703aae2ae 2 | // Component : uint4_mul 3 | // Git hash : 58987f7f234d2f22202f16d8122fac665f2828cb 4 | 5 | `timescale 1ns/1ps 6 | 7 | module uint4_mul ( 8 | input [3:0] io_w1, 9 | input [3:0] io_w2, 10 | input [3:0] io_a1, 11 | input [3:0] io_a2, 12 | output [7:0] io_a1w1, 13 | output [7:0] io_a1w2, 14 | output [7:0] io_a2w1, 15 | output [7:0] io_a2w2, 16 | input clk 17 | ); 18 | 19 | reg [4:0] dsp_INMODE; 20 | reg [8:0] dsp_OPMODE; 21 | wire [29:0] dsp_A; 22 | wire [17:0] dsp_B; 23 | wire [26:0] dsp_D; 24 | wire [29:0] dsp_ACOUT; 25 | wire [17:0] dsp_BCOUT; 26 | wire [47:0] dsp_PCOUT; 27 | wire [0:0] dsp_CARRYCASCOUT; 28 | wire [0:0] dsp_MULTSIGNOUT; 29 | wire [47:0] dsp_P; 30 | wire [3:0] dsp_CARRYOUT; 31 | wire [7:0] dsp_XOROUT; 32 | wire dsp_OVERFLOW; 33 | wire dsp_UNDERFLOW; 34 | wire dsp_PATTERNBDETECT; 35 | wire dsp_PATTERNDETECT; 36 | wire [29:0] tmp_A; 37 | wire [3:0] tmp_A_1; 38 | wire tmp_OPMODE; 39 | wire [7:0] sa1w1; 40 | wire [7:0] sa2w1; 41 | wire [7:0] sa1w2; 42 | wire [7:0] sa2w2; 43 | function [4:0] zz_dsp_INMODE(input dummy); 44 | begin 45 | zz_dsp_INMODE[1] = 1'b0; 46 | zz_dsp_INMODE[2] = 1'b1; 47 | zz_dsp_INMODE[3] = 1'b0; 48 | zz_dsp_INMODE[0] = 1'b1; 49 | zz_dsp_INMODE[4] = 1'b0; 50 | end 51 | endfunction 52 | wire [4:0] tmp_1; 53 | 54 | assign tmp_A_1 = io_w1; 55 | assign tmp_A = {{26{tmp_A_1[3]}}, tmp_A_1}; 56 | DSP48E2 #( 57 | .A_INPUT("DIRECT"), 58 | .B_INPUT("DIRECT"), 59 | .AMULTSEL("AD"), 60 | .BMULTSEL("B"), 61 | .PREADDINSEL("A"), 62 | .USE_MULT("MULTIPLY"), 63 | .USE_SIMD("ONE48"), 64 | .AREG(1), 65 | .BREG(2), 66 | .CREG(1), 67 | .DREG(1), 68 | .ADREG(1), 69 | .MREG(1), 70 | .PREG(1), 71 | .ACASCREG(1), 72 | .BCASCREG(1), 73 | .CARRYINREG(1), 74 | .CARRYINSELREG(1), 75 | .INMODEREG(0), 76 | .OPMODEREG(0), 77 | .ALUMODEREG(1) 78 | ) dsp ( 79 | .CLK (clk ), //i 80 | .ALUMODE (4'b0000 ), //i 81 | .INMODE (dsp_INMODE[4:0] ), //i 82 | .OPMODE (dsp_OPMODE[8:0] ), //i 83 | .CARRYINSEL (3'b000 ), //i 84 | .ACIN (30'h00000000 ), //i 85 | .BCIN (18'h00000 ), //i 86 | .PCIN (48'h000000000000 ), //i 87 | .CARRYCASCIN (1'b0 ), //i 88 | .MULTSIGNIN (1'b0 ), //i 89 | .ACOUT (dsp_ACOUT[29:0] ), //o 90 | .BCOUT (dsp_BCOUT[17:0] ), //o 91 | .PCOUT (dsp_PCOUT[47:0] ), //o 92 | .CARRYCASCOUT (dsp_CARRYCASCOUT ), //o 93 | .MULTSIGNOUT (dsp_MULTSIGNOUT ), //o 94 | .CEA1 (1'b1 ), //i 95 | .CEA2 (1'b0 ), //i 96 | .CEB1 (1'b1 ), //i 97 | .CEB2 (1'b1 ), //i 98 | .CEC (1'b0 ), //i 99 | .CED (1'b1 ), //i 100 | .CEAD (1'b1 ), //i 101 | .CEM (1'b1 ), //i 102 | .CEP (1'b1 ), //i 103 | .CECARRYIN (1'b0 ), //i 104 | .CECTRL (1'b0 ), //i 105 | .CEINMODE (1'b0 ), //i 106 | .CEALUMODE (1'b0 ), //i 107 | .RSTA (1'b0 ), //i 108 | .RSTB (1'b0 ), //i 109 | .RSTC (1'b0 ), //i 110 | .RSTD (1'b0 ), //i 111 | .RSTM (1'b0 ), //i 112 | .RSTP (1'b0 ), //i 113 | .RSTALLCARRYIN (1'b0 ), //i 114 | .RSTCTRL (1'b0 ), //i 115 | .RSTINMODE (1'b0 ), //i 116 | .RSTALUMODE (1'b0 ), //i 117 | .A (dsp_A[29:0] ), //i 118 | .B (dsp_B[17:0] ), //i 119 | .C (48'hffffffffffff ), //i 120 | .D (dsp_D[26:0] ), //i 121 | .CARRYIN (1'b0 ), //i 122 | .P (dsp_P[47:0] ), //o 123 | .CARRYOUT (dsp_CARRYOUT[3:0] ), //o 124 | .XOROUT (dsp_XOROUT[7:0] ), //o 125 | .OVERFLOW (dsp_OVERFLOW ), //o 126 | .UNDERFLOW (dsp_UNDERFLOW ), //o 127 | .PATTERNBDETECT (dsp_PATTERNBDETECT), //o 128 | .PATTERNDETECT (dsp_PATTERNDETECT ) //o 129 | ); 130 | assign tmp_1 = zz_dsp_INMODE(1'b0); 131 | always @(*) dsp_INMODE = tmp_1; 132 | assign tmp_OPMODE = 1'b0; 133 | always @(*) begin 134 | dsp_OPMODE[8 : 7] = {tmp_OPMODE,tmp_OPMODE}; 135 | dsp_OPMODE[1 : 0] = {1'b0,1'b1}; 136 | dsp_OPMODE[3 : 2] = {1'b0,1'b1}; 137 | dsp_OPMODE[6 : 4] = {{1'b0,1'b0},1'b0}; 138 | end 139 | 140 | assign dsp_A = tmp_A; 141 | assign dsp_B = {{{3'b000,io_a2},7'h00},io_a1}; 142 | assign dsp_D = {{io_w2[3],io_w2},22'h000000}; 143 | assign sa1w1 = dsp_P[7 : 0]; 144 | assign sa2w1 = dsp_P[18 : 11]; 145 | assign sa1w2 = dsp_P[29 : 22]; 146 | assign sa2w2 = dsp_P[40 : 33]; 147 | assign io_a1w1 = sa1w1; 148 | assign io_a2w1 = sa2w1; 149 | assign io_a1w2 = sa1w2; 150 | assign io_a2w2 = sa2w2; 151 | 152 | endmodule 153 | -------------------------------------------------------------------------------- /verilog/xilinx/DSP48E2Arithmetic/int12_xadd.v: -------------------------------------------------------------------------------- 1 | // Generator : SpinalHDL v1.9.0 git head : 7d30dbacbd3aa1be42fb2a3d4da5675703aae2ae 2 | // Component : int12_xadd 3 | // Git hash : 58987f7f234d2f22202f16d8122fac665f2828cb 4 | 5 | `timescale 1ns/1ps 6 | 7 | module int12_xadd ( 8 | input [0:0] io_aSel, 9 | input [0:0] io_bSel, 10 | input [11:0] io_a_0, 11 | input [11:0] io_a_1, 12 | input [11:0] io_a_2, 13 | input [11:0] io_a_3, 14 | input [11:0] io_b_0, 15 | input [11:0] io_b_1, 16 | input [11:0] io_b_2, 17 | input [11:0] io_b_3, 18 | output [11:0] io_ab_0, 19 | output [11:0] io_ab_1, 20 | output [11:0] io_ab_2, 21 | output [11:0] io_ab_3, 22 | input clk 23 | ); 24 | 25 | reg [4:0] dsp_INMODE; 26 | reg [8:0] dsp_OPMODE; 27 | wire [29:0] dsp_A; 28 | wire [17:0] dsp_B; 29 | wire [47:0] dsp_C; 30 | wire [29:0] dsp_ACOUT; 31 | wire [17:0] dsp_BCOUT; 32 | wire [47:0] dsp_PCOUT; 33 | wire [0:0] dsp_CARRYCASCOUT; 34 | wire [0:0] dsp_MULTSIGNOUT; 35 | wire [47:0] dsp_P; 36 | wire [3:0] dsp_CARRYOUT; 37 | wire [7:0] dsp_XOROUT; 38 | wire dsp_OVERFLOW; 39 | wire dsp_UNDERFLOW; 40 | wire dsp_PATTERNBDETECT; 41 | wire dsp_PATTERNDETECT; 42 | wire tmp_OPMODE; 43 | wire tmp_OPMODE_1; 44 | wire tmp_OPMODE_2; 45 | wire [47:0] AB; 46 | function [4:0] zz_dsp_INMODE(input dummy); 47 | begin 48 | zz_dsp_INMODE[1] = 1'b0; 49 | zz_dsp_INMODE[2] = 1'b1; 50 | zz_dsp_INMODE[3] = 1'b0; 51 | zz_dsp_INMODE[0] = 1'b0; 52 | zz_dsp_INMODE[4] = 1'b0; 53 | end 54 | endfunction 55 | wire [4:0] tmp_1; 56 | 57 | DSP48E2 #( 58 | .A_INPUT("DIRECT"), 59 | .B_INPUT("DIRECT"), 60 | .AMULTSEL("A"), 61 | .BMULTSEL("B"), 62 | .PREADDINSEL("A"), 63 | .USE_MULT("NONE"), 64 | .USE_SIMD("FOUR12"), 65 | .AREG(1), 66 | .BREG(1), 67 | .CREG(1), 68 | .DREG(1), 69 | .ADREG(1), 70 | .MREG(0), 71 | .PREG(1), 72 | .ACASCREG(1), 73 | .BCASCREG(1), 74 | .CARRYINREG(1), 75 | .CARRYINSELREG(1), 76 | .INMODEREG(0), 77 | .OPMODEREG(1), 78 | .ALUMODEREG(1) 79 | ) dsp ( 80 | .CLK (clk ), //i 81 | .ALUMODE (4'b0000 ), //i 82 | .INMODE (dsp_INMODE[4:0] ), //i 83 | .OPMODE (dsp_OPMODE[8:0] ), //i 84 | .CARRYINSEL (3'b000 ), //i 85 | .ACIN (30'h00000000 ), //i 86 | .BCIN (18'h00000 ), //i 87 | .PCIN (48'h000000000000 ), //i 88 | .CARRYCASCIN (1'b0 ), //i 89 | .MULTSIGNIN (1'b0 ), //i 90 | .ACOUT (dsp_ACOUT[29:0] ), //o 91 | .BCOUT (dsp_BCOUT[17:0] ), //o 92 | .PCOUT (dsp_PCOUT[47:0] ), //o 93 | .CARRYCASCOUT (dsp_CARRYCASCOUT ), //o 94 | .MULTSIGNOUT (dsp_MULTSIGNOUT ), //o 95 | .CEA1 (1'b0 ), //i 96 | .CEA2 (1'b1 ), //i 97 | .CEB1 (1'b0 ), //i 98 | .CEB2 (1'b1 ), //i 99 | .CEC (1'b1 ), //i 100 | .CED (1'b0 ), //i 101 | .CEAD (1'b0 ), //i 102 | .CEM (1'b0 ), //i 103 | .CEP (1'b1 ), //i 104 | .CECARRYIN (1'b0 ), //i 105 | .CECTRL (1'b1 ), //i 106 | .CEINMODE (1'b0 ), //i 107 | .CEALUMODE (1'b0 ), //i 108 | .RSTA (1'b0 ), //i 109 | .RSTB (1'b0 ), //i 110 | .RSTC (1'b0 ), //i 111 | .RSTD (1'b0 ), //i 112 | .RSTM (1'b0 ), //i 113 | .RSTP (1'b0 ), //i 114 | .RSTALLCARRYIN (1'b0 ), //i 115 | .RSTCTRL (1'b0 ), //i 116 | .RSTINMODE (1'b0 ), //i 117 | .RSTALUMODE (1'b0 ), //i 118 | .A (dsp_A[29:0] ), //i 119 | .B (dsp_B[17:0] ), //i 120 | .C (dsp_C[47:0] ), //i 121 | .D (27'h7ffffff ), //i 122 | .CARRYIN (1'b0 ), //i 123 | .P (dsp_P[47:0] ), //o 124 | .CARRYOUT (dsp_CARRYOUT[3:0] ), //o 125 | .XOROUT (dsp_XOROUT[7:0] ), //o 126 | .OVERFLOW (dsp_OVERFLOW ), //o 127 | .UNDERFLOW (dsp_UNDERFLOW ), //o 128 | .PATTERNBDETECT (dsp_PATTERNBDETECT), //o 129 | .PATTERNDETECT (dsp_PATTERNDETECT ) //o 130 | ); 131 | assign tmp_1 = zz_dsp_INMODE(1'b0); 132 | always @(*) dsp_INMODE = tmp_1; 133 | assign tmp_OPMODE = io_bSel[0]; 134 | always @(*) begin 135 | dsp_OPMODE[8 : 7] = {tmp_OPMODE,tmp_OPMODE}; 136 | dsp_OPMODE[1 : 0] = {tmp_OPMODE_1,tmp_OPMODE_1}; 137 | dsp_OPMODE[3 : 2] = {tmp_OPMODE_2,tmp_OPMODE_2}; 138 | dsp_OPMODE[6 : 4] = {{1'b0,1'b0},1'b0}; 139 | end 140 | 141 | assign tmp_OPMODE_1 = io_aSel[0]; 142 | assign tmp_OPMODE_2 = 1'b0; 143 | assign AB = {io_a_3,{io_a_2,{io_a_1,io_a_0}}}; 144 | assign dsp_A = AB[47 : 18]; 145 | assign dsp_B = AB[17 : 0]; 146 | assign dsp_C = {io_b_3,{io_b_2,{io_b_1,io_b_0}}}; 147 | assign io_ab_0 = dsp_P[11 : 0]; 148 | assign io_ab_1 = dsp_P[23 : 12]; 149 | assign io_ab_2 = dsp_P[35 : 24]; 150 | assign io_ab_3 = dsp_P[47 : 36]; 151 | 152 | endmodule 153 | -------------------------------------------------------------------------------- /verilog/xilinx/DSP48E2Arithmetic/int8_mul.v: -------------------------------------------------------------------------------- 1 | // Generator : SpinalHDL v1.9.0 git head : 7d30dbacbd3aa1be42fb2a3d4da5675703aae2ae 2 | // Component : int8_mul 3 | // Git hash : 58987f7f234d2f22202f16d8122fac665f2828cb 4 | 5 | `timescale 1ns/1ps 6 | 7 | module int8_mul ( 8 | input [7:0] io_a, 9 | input [7:0] io_b, 10 | input [7:0] io_c, 11 | output [15:0] io_ab, 12 | output [15:0] io_ac, 13 | input clk, 14 | input reset 15 | ); 16 | 17 | reg [4:0] dsp_INMODE; 18 | reg [8:0] dsp_OPMODE; 19 | wire [29:0] dsp_A; 20 | wire [17:0] dsp_B; 21 | wire [47:0] dsp_C; 22 | wire [26:0] dsp_D; 23 | wire [29:0] dsp_ACOUT; 24 | wire [17:0] dsp_BCOUT; 25 | wire [47:0] dsp_PCOUT; 26 | wire [0:0] dsp_CARRYCASCOUT; 27 | wire [0:0] dsp_MULTSIGNOUT; 28 | wire [47:0] dsp_P; 29 | wire [3:0] dsp_CARRYOUT; 30 | wire [7:0] dsp_XOROUT; 31 | wire dsp_OVERFLOW; 32 | wire dsp_UNDERFLOW; 33 | wire dsp_PATTERNBDETECT; 34 | wire dsp_PATTERNDETECT; 35 | wire [29:0] tmp_A; 36 | wire [7:0] tmp_A_1; 37 | wire [17:0] tmp_B; 38 | wire [7:0] tmp_B_1; 39 | wire [8:0] tmp_D_1; 40 | wire [29:0] tmp_io_ac; 41 | wire tmp_OPMODE; 42 | wire abNeg; 43 | reg abNeg_delay_1; 44 | reg abNegReg; 45 | wire [7:0] tmp_D; 46 | function [4:0] zz_dsp_INMODE(input dummy); 47 | begin 48 | zz_dsp_INMODE[1] = 1'b0; 49 | zz_dsp_INMODE[2] = 1'b1; 50 | zz_dsp_INMODE[3] = 1'b0; 51 | zz_dsp_INMODE[0] = 1'b1; 52 | zz_dsp_INMODE[4] = 1'b0; 53 | end 54 | endfunction 55 | wire [4:0] tmp_1; 56 | 57 | assign tmp_A_1 = io_b; 58 | assign tmp_A = {{22{tmp_A_1[7]}}, tmp_A_1}; 59 | assign tmp_B_1 = io_a; 60 | assign tmp_B = {{10{tmp_B_1[7]}}, tmp_B_1}; 61 | assign tmp_D_1 = {tmp_D[7],tmp_D}; 62 | assign tmp_io_ac = dsp_P[47 : 18]; 63 | DSP48E2 #( 64 | .A_INPUT("DIRECT"), 65 | .B_INPUT("DIRECT"), 66 | .AMULTSEL("AD"), 67 | .BMULTSEL("B"), 68 | .PREADDINSEL("A"), 69 | .USE_MULT("MULTIPLY"), 70 | .USE_SIMD("ONE48"), 71 | .AREG(1), 72 | .BREG(2), 73 | .CREG(1), 74 | .DREG(1), 75 | .ADREG(1), 76 | .MREG(1), 77 | .PREG(1), 78 | .ACASCREG(1), 79 | .BCASCREG(1), 80 | .CARRYINREG(1), 81 | .CARRYINSELREG(1), 82 | .INMODEREG(0), 83 | .OPMODEREG(0), 84 | .ALUMODEREG(1) 85 | ) dsp ( 86 | .CLK (clk ), //i 87 | .ALUMODE (4'b0000 ), //i 88 | .INMODE (dsp_INMODE[4:0] ), //i 89 | .OPMODE (dsp_OPMODE[8:0] ), //i 90 | .CARRYINSEL (3'b000 ), //i 91 | .ACIN (30'h00000000 ), //i 92 | .BCIN (18'h00000 ), //i 93 | .PCIN (48'h000000000000 ), //i 94 | .CARRYCASCIN (1'b0 ), //i 95 | .MULTSIGNIN (1'b0 ), //i 96 | .ACOUT (dsp_ACOUT[29:0] ), //o 97 | .BCOUT (dsp_BCOUT[17:0] ), //o 98 | .PCOUT (dsp_PCOUT[47:0] ), //o 99 | .CARRYCASCOUT (dsp_CARRYCASCOUT ), //o 100 | .MULTSIGNOUT (dsp_MULTSIGNOUT ), //o 101 | .CEA1 (1'b1 ), //i 102 | .CEA2 (1'b0 ), //i 103 | .CEB1 (1'b1 ), //i 104 | .CEB2 (1'b1 ), //i 105 | .CEC (1'b1 ), //i 106 | .CED (1'b1 ), //i 107 | .CEAD (1'b1 ), //i 108 | .CEM (1'b1 ), //i 109 | .CEP (1'b1 ), //i 110 | .CECARRYIN (1'b0 ), //i 111 | .CECTRL (1'b0 ), //i 112 | .CEINMODE (1'b0 ), //i 113 | .CEALUMODE (1'b0 ), //i 114 | .RSTA (1'b0 ), //i 115 | .RSTB (1'b0 ), //i 116 | .RSTC (1'b0 ), //i 117 | .RSTD (1'b0 ), //i 118 | .RSTM (1'b0 ), //i 119 | .RSTP (1'b0 ), //i 120 | .RSTALLCARRYIN (1'b0 ), //i 121 | .RSTCTRL (1'b0 ), //i 122 | .RSTINMODE (1'b0 ), //i 123 | .RSTALUMODE (1'b0 ), //i 124 | .A (dsp_A[29:0] ), //i 125 | .B (dsp_B[17:0] ), //i 126 | .C (dsp_C[47:0] ), //i 127 | .D (dsp_D[26:0] ), //i 128 | .CARRYIN (1'b0 ), //i 129 | .P (dsp_P[47:0] ), //o 130 | .CARRYOUT (dsp_CARRYOUT[3:0] ), //o 131 | .XOROUT (dsp_XOROUT[7:0] ), //o 132 | .OVERFLOW (dsp_OVERFLOW ), //o 133 | .UNDERFLOW (dsp_UNDERFLOW ), //o 134 | .PATTERNBDETECT (dsp_PATTERNBDETECT), //o 135 | .PATTERNDETECT (dsp_PATTERNDETECT ) //o 136 | ); 137 | assign tmp_1 = zz_dsp_INMODE(1'b0); 138 | always @(*) dsp_INMODE = tmp_1; 139 | assign tmp_OPMODE = 1'b1; 140 | always @(*) begin 141 | dsp_OPMODE[8 : 7] = {tmp_OPMODE,tmp_OPMODE}; 142 | dsp_OPMODE[1 : 0] = {1'b0,1'b1}; 143 | dsp_OPMODE[3 : 2] = {1'b0,1'b1}; 144 | dsp_OPMODE[6 : 4] = {{1'b0,1'b0},1'b0}; 145 | end 146 | 147 | assign abNeg = (((|io_a) && (|io_b)) && (io_a[7] ^ io_b[7])); 148 | assign dsp_A = tmp_A; 149 | assign dsp_B = tmp_B; 150 | assign tmp_D = io_c; 151 | assign dsp_D = {tmp_D_1,18'h00000}; 152 | assign dsp_C = {{29'h00000000,abNegReg},18'h00000}; 153 | assign io_ab = dsp_P[15 : 0]; 154 | assign io_ac = tmp_io_ac[15 : 0]; 155 | always @(posedge clk) begin 156 | abNeg_delay_1 <= abNeg; 157 | abNegReg <= abNeg_delay_1; 158 | end 159 | 160 | 161 | endmodule 162 | -------------------------------------------------------------------------------- /verilog/xilinx/DSP48E2Arithmetic/int24_acc_scale.v: -------------------------------------------------------------------------------- 1 | // Generator : SpinalHDL v1.9.0 git head : 7d30dbacbd3aa1be42fb2a3d4da5675703aae2ae 2 | // Component : int24_acc_scale 3 | // Git hash : 58987f7f234d2f22202f16d8122fac665f2828cb 4 | 5 | `timescale 1ns/1ps 6 | 7 | module int24_acc_scale ( 8 | input [23:0] io_a, 9 | input [23:0] io_b, 10 | input [17:0] io_scale, 11 | input io_valid, 12 | input io_last, 13 | output [23:0] io_aAcc, 14 | output [23:0] io_bAcc, 15 | output [47:0] io_scaleRes, 16 | input clk, 17 | input reset 18 | ); 19 | 20 | reg [4:0] dsp_INMODE; 21 | reg [8:0] dsp_OPMODE; 22 | wire [29:0] dsp_A; 23 | wire [26:0] dsp_D; 24 | wire [29:0] dsp_ACOUT; 25 | wire [17:0] dsp_BCOUT; 26 | wire [47:0] dsp_PCOUT; 27 | wire [0:0] dsp_CARRYCASCOUT; 28 | wire [0:0] dsp_MULTSIGNOUT; 29 | wire [47:0] dsp_P; 30 | wire [3:0] dsp_CARRYOUT; 31 | wire [7:0] dsp_XOROUT; 32 | wire dsp_OVERFLOW; 33 | wire dsp_UNDERFLOW; 34 | wire dsp_PATTERNBDETECT; 35 | wire dsp_PATTERNDETECT; 36 | wire [29:0] tmp_A; 37 | wire [23:0] tmp_A_1; 38 | wire [26:0] tmp_D; 39 | wire [23:0] tmp_D_1; 40 | reg accValid; 41 | reg io_last_delay_1; 42 | reg ADCe; 43 | reg ADCe_delay_1; 44 | reg multEnable; 45 | reg muteD; 46 | wire [47:0] ab; 47 | wire [23:0] aAccBits; 48 | wire [23:0] bAccBits; 49 | 50 | assign tmp_A_1 = aAccBits; 51 | assign tmp_A = {{6{tmp_A_1[23]}}, tmp_A_1}; 52 | assign tmp_D_1 = bAccBits; 53 | assign tmp_D = {{3{tmp_D_1[23]}}, tmp_D_1}; 54 | DSP48E2 #( 55 | .A_INPUT("DIRECT"), 56 | .B_INPUT("DIRECT"), 57 | .AMULTSEL("AD"), 58 | .BMULTSEL("B"), 59 | .PREADDINSEL("A"), 60 | .USE_MULT("MULTIPLY"), 61 | .USE_SIMD("ONE48"), 62 | .AREG(1), 63 | .BREG(2), 64 | .CREG(1), 65 | .DREG(1), 66 | .ADREG(1), 67 | .MREG(1), 68 | .PREG(1), 69 | .ACASCREG(1), 70 | .BCASCREG(1), 71 | .CARRYINREG(1), 72 | .CARRYINSELREG(1), 73 | .INMODEREG(1), 74 | .OPMODEREG(1), 75 | .ALUMODEREG(1) 76 | ) dsp ( 77 | .CLK (clk ), //i 78 | .ALUMODE (4'b0000 ), //i 79 | .INMODE (dsp_INMODE[4:0] ), //i 80 | .OPMODE (dsp_OPMODE[8:0] ), //i 81 | .CARRYINSEL (3'b000 ), //i 82 | .ACIN (30'h00000000 ), //i 83 | .BCIN (18'h00000 ), //i 84 | .PCIN (48'h000000000000 ), //i 85 | .CARRYCASCIN (1'b0 ), //i 86 | .MULTSIGNIN (1'b0 ), //i 87 | .ACOUT (dsp_ACOUT[29:0] ), //o 88 | .BCOUT (dsp_BCOUT[17:0] ), //o 89 | .PCOUT (dsp_PCOUT[47:0] ), //o 90 | .CARRYCASCOUT (dsp_CARRYCASCOUT ), //o 91 | .MULTSIGNOUT (dsp_MULTSIGNOUT ), //o 92 | .CEA1 (ADCe ), //i 93 | .CEA2 (1'b0 ), //i 94 | .CEB1 (1'b1 ), //i 95 | .CEB2 (1'b1 ), //i 96 | .CEC (1'b1 ), //i 97 | .CED (ADCe ), //i 98 | .CEAD (1'b0 ), //i 99 | .CEM (1'b1 ), //i 100 | .CEP (1'b1 ), //i 101 | .CECARRYIN (1'b0 ), //i 102 | .CECTRL (1'b1 ), //i 103 | .CEINMODE (1'b1 ), //i 104 | .CEALUMODE (1'b0 ), //i 105 | .RSTA (1'b0 ), //i 106 | .RSTB (1'b0 ), //i 107 | .RSTC (1'b0 ), //i 108 | .RSTD (1'b0 ), //i 109 | .RSTM (1'b0 ), //i 110 | .RSTP (1'b0 ), //i 111 | .RSTALLCARRYIN (1'b0 ), //i 112 | .RSTCTRL (1'b0 ), //i 113 | .RSTINMODE (1'b0 ), //i 114 | .RSTALUMODE (1'b0 ), //i 115 | .A (dsp_A[29:0] ), //i 116 | .B (io_scale[17:0] ), //i 117 | .C (ab[47:0] ), //i 118 | .D (dsp_D[26:0] ), //i 119 | .CARRYIN (1'b0 ), //i 120 | .P (dsp_P[47:0] ), //o 121 | .CARRYOUT (dsp_CARRYOUT[3:0] ), //o 122 | .XOROUT (dsp_XOROUT[7:0] ), //o 123 | .OVERFLOW (dsp_OVERFLOW ), //o 124 | .UNDERFLOW (dsp_UNDERFLOW ), //o 125 | .PATTERNBDETECT (dsp_PATTERNBDETECT), //o 126 | .PATTERNDETECT (dsp_PATTERNDETECT ) //o 127 | ); 128 | always @(*) begin 129 | dsp_INMODE[1] = ADCe; 130 | dsp_INMODE[2] = muteD; 131 | dsp_INMODE[3] = 1'b0; 132 | dsp_INMODE[0] = 1'b1; 133 | dsp_INMODE[4] = 1'b0; 134 | end 135 | 136 | always @(*) begin 137 | dsp_OPMODE[8 : 7] = {(io_b[23] && io_valid),1'b0}; 138 | dsp_OPMODE[1 : 0] = {multEnable,multEnable}; 139 | dsp_OPMODE[3 : 2] = {io_valid,1'b1}; 140 | dsp_OPMODE[6 : 4] = {{1'b0,accValid},1'b0}; 141 | end 142 | 143 | assign ab = {io_a,io_b}; 144 | assign aAccBits = dsp_P[47 : 24]; 145 | assign bAccBits = dsp_P[23 : 0]; 146 | assign dsp_A = tmp_A; 147 | assign dsp_D = tmp_D; 148 | assign io_scaleRes = dsp_P; 149 | assign io_aAcc = aAccBits; 150 | assign io_bAcc = bAccBits; 151 | always @(posedge clk or posedge reset) begin 152 | if(reset) begin 153 | accValid <= 1'b0; 154 | io_last_delay_1 <= 1'b0; 155 | ADCe <= 1'b0; 156 | ADCe_delay_1 <= 1'b0; 157 | multEnable <= 1'b0; 158 | muteD <= 1'b1; 159 | end else begin 160 | if(io_valid) begin 161 | accValid <= 1'b1; 162 | end 163 | if(io_last) begin 164 | accValid <= 1'b0; 165 | end 166 | io_last_delay_1 <= io_last; 167 | ADCe <= io_last_delay_1; 168 | ADCe_delay_1 <= ADCe; 169 | multEnable <= (ADCe || ADCe_delay_1); 170 | muteD <= (! ADCe); 171 | end 172 | end 173 | 174 | 175 | endmodule 176 | -------------------------------------------------------------------------------- /src/main/scala/xilinx/DSP48E2/DSP48E2ConfigABCD.scala: -------------------------------------------------------------------------------- 1 | package xilinx.DSP48E2 2 | 3 | import spinal.core._ 4 | import scala.language.postfixOps 5 | 6 | object DSP48E2ConfigABCD { 7 | 8 | object inmode { 9 | def set_static_inmode_attr(attr: DSP48E2Attributes) = attr.INMODEREG = 0 10 | 11 | def set_dynamic_inmode_attr(attr: DSP48E2Attributes) = attr.INMODEREG = 1 12 | 13 | def assign_static_inmode_ctrl(dsp: DSP48E2): Unit = { 14 | dsp.CEs.INMODE.clear() 15 | dsp.RSTs.INMODE.clear() 16 | } 17 | 18 | def assign_dynamic_inmode_ctrl(dsp: DSP48E2): Unit = { 19 | dsp.CEs.INMODE.set() 20 | } 21 | 22 | def assign_inmode_a(dsp: DSP48E2, high4a1: Bool) = dsp.INST.INMODE(0) := high4a1 23 | 24 | def assign_inmode_b(dsp: DSP48E2, high4b1: Bool) = dsp.INST.INMODE(4) := high4b1 25 | 26 | def assign_inmode_gate_ab(dsp: DSP48E2, high2gate: Bool) = dsp.INST.INMODE(1) := high2gate 27 | 28 | def assign_inmode_gate_d(dsp: DSP48E2, low2gate: Bool) = dsp.INST.INMODE(2) := low2gate 29 | 30 | def assign_inmode_add_sub(dsp: DSP48E2, high2sub: Bool) = dsp.INST.INMODE(3) := high2sub 31 | 32 | def assign_default(dsp: DSP48E2) = { 33 | assign_inmode_gate_ab(dsp, high2gate = False) 34 | assign_inmode_gate_d(dsp, low2gate = True) 35 | assign_inmode_add_sub(dsp, high2sub = False) 36 | } 37 | } 38 | 39 | object a { 40 | def set_mute_a_attr(attr: DSP48E2Attributes) = attr.AREG = 2 41 | 42 | def assign_mute_a_ctrl(dsp: DSP48E2): Unit = { 43 | dsp.DATAIN.A.setAll() 44 | } 45 | 46 | def add_optional_valid_rst(dsp: DSP48E2, valid: Bool) = { 47 | dsp.addGeneric("IS_RSTA_INVERTED", "1'b1") 48 | if (dsp.RSTs.A.hasAssignement) dsp.RSTs.A.removeAssignments() 49 | dsp.RSTs.A := valid 50 | } 51 | 52 | def set_a_cascade(attr: DSP48E2Attributes) = attr.A_INPUT = "CASCADE" 53 | 54 | def assign_a_cascade(dsp: DSP48E2) = { 55 | dsp.CEs.A2.set() 56 | } 57 | 58 | def set_a_pingpong(attr: DSP48E2Attributes) = attr.AREG = 2 59 | 60 | def assign_a_pingpong_ctrl(dsp: DSP48E2, ce1: Bool, ce2: Bool): Unit = { 61 | inmode.assign_inmode_a(dsp, high4a1 = False) 62 | dsp.CEs.A1 := ce1 63 | dsp.CEs.A2 := ce2 64 | } 65 | 66 | def set_static_a_input_attr(attr: DSP48E2Attributes, pipe: Int) = attr.AREG = pipe 67 | 68 | def assign_static_a_input_ctrl(dsp: DSP48E2, pipe: Int, isALU: Boolean = false) = { 69 | if (pipe == 2) { 70 | dsp.CEs.A1.set() 71 | dsp.CEs.A2.set() 72 | inmode.assign_inmode_a(dsp, high4a1 = False) 73 | } 74 | else { 75 | if (isALU) { 76 | dsp.CEs.A2.set() 77 | inmode.assign_inmode_a(dsp, high4a1 = False) 78 | } 79 | else { 80 | dsp.CEs.A1.set() 81 | inmode.assign_inmode_a(dsp, high4a1 = True) 82 | } 83 | } 84 | } 85 | 86 | def set_time_multiplex_a_input_attr(attr: DSP48E2Attributes) = attr.AREG = 1 87 | 88 | def assign_time_multiplex_a_input_ctrl(dsp: DSP48E2, ce1: Bool = True, ce2: Bool = True, selA1: Bool = True): Unit = { 89 | dsp.CEs.A1 := ce1 90 | dsp.CEs.A2 := ce2 91 | inmode.assign_inmode_a(dsp, high4a1 = selA1) 92 | } 93 | } 94 | 95 | object b { 96 | def set_mute_b_attr(attr: DSP48E2Attributes) = attr.BREG = 2 97 | 98 | def assign_mute_b_ctrl(dsp: DSP48E2): Unit = { 99 | dsp.DATAIN.B.setAll() 100 | } 101 | 102 | def add_optional_valid_rst(dsp: DSP48E2, valid: Bool) = { 103 | dsp.addGeneric("IS_RSTB_INVERTED", "1'b1") 104 | if (dsp.RSTs.B.hasAssignement) dsp.RSTs.B.removeAssignments() 105 | dsp.RSTs.B := valid 106 | } 107 | 108 | def set_b_cascade(attr: DSP48E2Attributes) = attr.B_INPUT = "CASCADE" 109 | 110 | def assign_b_cascade(dsp: DSP48E2) = { 111 | dsp.CEs.B2.set() 112 | } 113 | 114 | def set_b_pingpong(attr: DSP48E2Attributes) = attr.BREG = 2 115 | 116 | def assign_b_pingpong_ctrl(dsp: DSP48E2, ce1: Bool, ce2: Bool): Unit = { 117 | inmode.assign_inmode_b(dsp, high4b1 = False) 118 | dsp.CEs.B1 := ce1 119 | dsp.CEs.B2 := ce2 120 | } 121 | 122 | def set_static_b_input_attr(attr: DSP48E2Attributes, pipe: Int) = attr.BREG = pipe 123 | 124 | def assign_static_b_input_ctrl(dsp: DSP48E2, pipe: Int, isALU: Boolean = false) = { 125 | if (pipe == 2) { 126 | dsp.CEs.B1.set() 127 | dsp.CEs.B2.set() 128 | inmode.assign_inmode_b(dsp, high4b1 = False) 129 | } 130 | else { 131 | if (isALU) { 132 | dsp.CEs.B2.set() 133 | inmode.assign_inmode_b(dsp, high4b1 = False) 134 | } 135 | else { 136 | dsp.CEs.B1.set() 137 | inmode.assign_inmode_b(dsp, high4b1 = True) 138 | } 139 | } 140 | } 141 | 142 | def set_time_multiplex_b_input_attr(attr: DSP48E2Attributes) = attr.BREG = 1 143 | 144 | def assign_time_multiplex_b_input_ctrl(dsp: DSP48E2, ce1: Bool = True, ce2: Bool = True, selB1: Bool = True): Unit = { 145 | dsp.CEs.B1 := ce1 146 | dsp.CEs.B2 := ce2 147 | inmode.assign_inmode_b(dsp, high4b1 = selB1) 148 | } 149 | } 150 | 151 | object c { 152 | def set_mute_c_attr(attr: DSP48E2Attributes) = attr.CREG = 1 153 | 154 | def assign_mute_c_ctrl(dsp: DSP48E2): Unit = { 155 | dsp.DATAIN.C.setAll() 156 | } 157 | 158 | def add_optional_valid_rst(dsp: DSP48E2, valid: Bool) = { 159 | dsp.addGeneric("IS_RSTC_INVERTED", "1'b1") 160 | if (dsp.RSTs.C.hasAssignement) dsp.RSTs.C.removeAssignments() 161 | dsp.RSTs.C := valid 162 | } 163 | 164 | def set_c_input_attr(attr: DSP48E2Attributes) = attr.CREG = 1 165 | 166 | def assign_c_input_ctrl(dsp: DSP48E2, ce: Bool = True): Unit = { 167 | dsp.CEs.C := ce 168 | } 169 | } 170 | 171 | object d { 172 | def set_mute_d_attr(attr: DSP48E2Attributes) = { 173 | attr.ADREG = 1 174 | attr.DREG = 1 175 | } 176 | 177 | def assign_mute_d_ctrl(dsp: DSP48E2): Unit = { 178 | dsp.DATAIN.D.setAll() 179 | } 180 | 181 | def add_optional_valid_rst(dsp: DSP48E2, valid: Bool) = { 182 | dsp.addGeneric("IS_RSTD_INVERTED", "1'b1") 183 | if (dsp.RSTs.D.hasAssignement) dsp.RSTs.D.removeAssignments() 184 | dsp.RSTs.D := valid 185 | } 186 | } 187 | 188 | object ab_concat { 189 | def set_ab_concat_attr(attr: DSP48E2Attributes) = { 190 | attr.AREG = 1 191 | attr.BREG = 1 192 | } 193 | 194 | def assign_ab_concat_ctrl(dsp: DSP48E2, ce: Bool = True): Unit = { 195 | inmode.assign_inmode_a(dsp, high4a1 = False) 196 | inmode.assign_inmode_b(dsp, high4b1 = False) 197 | dsp.CEs.A2 := ce 198 | dsp.CEs.B2 := ce 199 | } 200 | 201 | def set_pingpong_ab_concat_attr(attr: DSP48E2Attributes) = { 202 | a.set_a_pingpong(attr) 203 | b.set_b_pingpong(attr) 204 | } 205 | 206 | def assign_pingpong_ab_concat_ctrl(dsp: DSP48E2, ce1: Bool, ce2: Bool): Unit = { 207 | a.assign_a_pingpong_ctrl(dsp, ce1, ce2) 208 | b.assign_b_pingpong_ctrl(dsp, ce1, ce2) 209 | } 210 | } 211 | 212 | object ad_pack { 213 | def set_ad_pack_attr(attr: DSP48E2Attributes, use_ad: Boolean = true) = { 214 | attr.AREG = 1 215 | attr.DREG = if (use_ad) 1 else 0 216 | attr.ADREG = 1 217 | attr.AMULTSEL = "AD" 218 | attr.PREADDINSEL = "A" 219 | } 220 | 221 | def set_static_b_attr(attr: DSP48E2Attributes, use_ad: Boolean = true) = { 222 | b.set_static_b_input_attr(attr, if (use_ad) 2 else 1) 223 | } 224 | 225 | def set_pingpong_b_attr(attr: DSP48E2Attributes) = { 226 | b.set_b_pingpong(attr) 227 | } 228 | 229 | def assign_ad_pack_ctrl(dsp: DSP48E2, use_ad: Boolean = true, ce: Bool = True): Unit = { 230 | inmode.assign_inmode_a(dsp, high4a1 = True) 231 | dsp.CEs.A1 := ce 232 | dsp.CEs.D := ce 233 | if (use_ad) dsp.CEs.AD.set() 234 | } 235 | 236 | def assign_static_b_ctrl(dsp: DSP48E2, use_ad: Boolean = true): Unit = { 237 | b.assign_static_b_input_ctrl(dsp, if (use_ad) 2 else 1) 238 | } 239 | 240 | def assign_pingpong_b_ctrl(dsp: DSP48E2, ce1: Bool, ce2: Bool): Unit = { 241 | b.assign_b_pingpong_ctrl(dsp, ce1, ce2) 242 | } 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------