├── .envrc ├── .github └── workflows │ └── test.yml ├── .gitignore ├── .gitmodules ├── .mill-version ├── .scalafix.conf ├── .scalafmt.conf ├── .vscode └── settings.json ├── BUILDING.md ├── LICENSE ├── README.md ├── build.sc ├── fpu-wrappers ├── resources │ ├── flopoco │ │ ├── .gitignore │ │ ├── FPCFExp_D1s.v │ │ ├── FPCFExp_D3s.v │ │ ├── FPCFExp_D4s.v │ │ ├── FPCFExp_D5s.v │ │ ├── FPCFExp_D6s.v │ │ ├── FPCFExp_H1s.v │ │ ├── FPCFExp_H2s.v │ │ ├── FPCFExp_H3s.v │ │ ├── FPCFExp_S1s.v │ │ ├── FPCFExp_S2s.v │ │ ├── FPCFExp_S3s.v │ │ ├── FPCFExp_S4s.v │ │ ├── FPCFExp_S5s.v │ │ ├── IEEEFMA_D10s.v │ │ ├── IEEEFMA_D3s.v │ │ ├── IEEEFMA_D5s.v │ │ ├── IEEEFMA_D7s.v │ │ ├── IEEEFMA_D9s.v │ │ ├── IEEEFMA_H1s.v │ │ ├── IEEEFMA_H2s.v │ │ ├── IEEEFMA_H3s.v │ │ ├── IEEEFMA_S1s.v │ │ ├── IEEEFMA_S2s.v │ │ ├── IEEEFMA_S3s.v │ │ ├── IEEEFMA_S4s.v │ │ └── gen.py │ ├── fpnew │ │ ├── .gitignore │ │ ├── FPNewBlackbox.sv │ │ ├── FPNewBlackbox_1s.sv │ │ ├── FPNewBlackbox_2s.sv │ │ ├── FPNewBlackbox_3s.sv │ │ ├── FPNewBlackbox_4s.sv │ │ ├── FPNewBlackbox_5s.sv │ │ ├── FPNewBlackbox_D1l1s.synth.v │ │ ├── FPNewBlackbox_D1l2s.synth.v │ │ ├── FPNewBlackbox_D1l3s.synth.v │ │ ├── FPNewBlackbox_D1l4s.synth.v │ │ ├── FPNewBlackbox_D1l5s.synth.v │ │ ├── FPNewBlackbox_D2l1s.synth.v │ │ ├── FPNewBlackbox_D2l2s.synth.v │ │ ├── FPNewBlackbox_D2l3s.synth.v │ │ ├── FPNewBlackbox_D2l4s.synth.v │ │ ├── FPNewBlackbox_D2l5s.synth.v │ │ ├── FPNewBlackbox_S1l1s.synth.v │ │ ├── FPNewBlackbox_S1l2s.synth.v │ │ ├── FPNewBlackbox_S1l3s.synth.v │ │ ├── FPNewBlackbox_S1l4s.synth.v │ │ ├── FPNewBlackbox_S1l5s.synth.v │ │ ├── FPNewBlackbox_S2l1s.synth.v │ │ ├── FPNewBlackbox_S2l2s.synth.v │ │ ├── FPNewBlackbox_S2l3s.synth.v │ │ ├── FPNewBlackbox_S2l4s.synth.v │ │ ├── FPNewBlackbox_S2l5s.synth.v │ │ ├── Makefile │ │ └── gen.py │ ├── opencores │ │ ├── .gitignore │ │ ├── except.v │ │ ├── fpu.v │ │ ├── post_norm.v │ │ ├── pre_norm.v │ │ ├── pre_norm_fmul.v │ │ └── primitives.v │ └── syn.tcl ├── src │ └── fpuwrapper │ │ ├── Mul.scala │ │ ├── bench.scala │ │ ├── common.scala │ │ ├── emit.scala │ │ ├── float.scala │ │ ├── flopoco │ │ ├── FPCFExp.scala │ │ ├── FPCToIEEE.scala │ │ ├── IEEEFExp.scala │ │ ├── IEEEFMA.scala │ │ └── IEEEToFPC.scala │ │ ├── formal │ │ ├── HFRoundtrip.scala │ │ └── IEEEFMAFormal.scala │ │ ├── fpnew │ │ ├── FPNewBlackbox.scala │ │ └── IEEEFPU.scala │ │ ├── fudian │ │ ├── IEEEFAdd.scala │ │ ├── IEEEFDivSqrt.scala │ │ └── IEEEFMA.scala │ │ ├── hardfloat │ │ ├── FMACommon.scala │ │ ├── HFFCmp.scala │ │ ├── HFFDivSqrt.scala │ │ ├── HFFMA.scala │ │ ├── HFFMul.scala │ │ ├── HFToIEEE.scala │ │ ├── IEEEFMA.scala │ │ ├── IEEEToHF.scala │ │ └── MulCommon.scala │ │ ├── opencores │ │ └── IEEEFPU.scala │ │ ├── sifive.scala │ │ └── synthesis.scala └── test │ └── src │ └── fpuwrapper │ ├── common.scala │ ├── flopoco │ ├── FPCFExpTest.scala │ ├── IEEEFExpTest.scala │ └── IEEEFMATest.scala │ ├── fpnew │ └── IEEEFPUTest.scala │ ├── fudian │ ├── IEEEFAddTest.scala │ ├── IEEEFDivSqrtTest.scala │ └── IEEEFMATest.scala │ ├── hardfloat │ ├── HFFCmpTest.scala │ ├── HFFDivSqrtTest.scala │ ├── HFFMATest.scala │ ├── HFMulTest.scala │ ├── IEEEFMATest.scala │ └── IEEEToHFTest.scala │ └── opencores │ └── IEEEFPUTest.scala ├── shell.nix └── synWorkspace ├── .gitignore ├── report.py └── report.yaml /.envrc: -------------------------------------------------------------------------------- 1 | use nix 2 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ '**' ] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-22.04 10 | strategy: 11 | matrix: 12 | scala: [2.13.10] 13 | jvm: [adopt@1.11] 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | with: 18 | submodules: true 19 | - name: Setup Mill 20 | uses: jodersky/setup-mill@master 21 | with: 22 | mill-version: 0.11.5 23 | - name: Setup nix env 24 | uses: JRMurr/direnv-nix-action@v4.1.0 25 | - name: Install simulators 26 | run: sudo apt-get install -y verilator iverilog 27 | - name: Install other dependencies 28 | run: sudo apt-get install -y gcc libboost-dev z3 29 | - name: Compile 30 | run: mill fpu-wrappers.compile 31 | - name: Test 32 | run: mill fpu-wrappers.test 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .metals/ 2 | .bsp/ 3 | .bloop/ 4 | .direnv/ 5 | target/ 6 | project/project/ 7 | test_run_dir/ 8 | simWorkspace/ 9 | tmp/ 10 | out/ 11 | 12 | *.v 13 | *.sv 14 | *.smt2 15 | *.anno.json 16 | *.fir 17 | ucli.key 18 | fpuwrapper.Simulator 19 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "berkeley-hardfloat"] 2 | path = thirdparty/berkeley-hardfloat 3 | url = git@github.com:ucb-bar/berkeley-hardfloat.git 4 | [submodule "opencores-fpu"] 5 | path = thirdparty/opencores-fpu 6 | url = git@github.com:jiegec/opencores-fpu.git 7 | [submodule "thirdparty/CNRV-FPU"] 8 | path = thirdparty/CNRV-FPU 9 | url = git@github.com:cnrv/CNRV-FPU.git 10 | [submodule "thirdparty/fpnew"] 11 | path = thirdparty/fpnew 12 | url = git@github.com:pulp-platform/fpnew.git 13 | [submodule "thirdparty/fudian"] 14 | path = thirdparty/fudian 15 | url = git@github.com:OpenXiangShan/fudian.git 16 | -------------------------------------------------------------------------------- /.mill-version: -------------------------------------------------------------------------------- 1 | 0.11.5 2 | -------------------------------------------------------------------------------- /.scalafix.conf: -------------------------------------------------------------------------------- 1 | rules = [ 2 | // builtin 3 | ExplicitResultTypes, 4 | RemoveUnused, 5 | NoAutoTupling, 6 | // community 7 | OrganizeImports 8 | ] -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = "3.0.8" 2 | runner.dialect = scala213 -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.watcherExclude": { 3 | "**/target": true 4 | }, 5 | "cSpell.words": [ 6 | "hardfloat" 7 | ] 8 | } -------------------------------------------------------------------------------- /BUILDING.md: -------------------------------------------------------------------------------- 1 | # Berkeley-Hardfloat 2 | 3 | Language: Chisel -> Verilog 4 | 5 | http://www.jhauser.us/arithmetic/HardFloat-1/doc/HardFloat-Verilog.html 6 | 7 | float format: 32 bit -> 33 bit 8 | 9 | # FPNew 10 | 11 | Language: SystemVerilog 12 | 13 | # FloPoCo 14 | 15 | Language: C++ -> VHDL 16 | 17 | float format: 32 bit -> 34 bit 18 | 19 | ## Installation 20 | 21 | ### wcpg 22 | 23 | ```shell 24 | git clone https://scm.gforge.inria.fr/anonscm/git/metalibm/wcpg.git 25 | cd wcpg 26 | sh autogen.sh 27 | ./configure --prefix=$HOME/prefix/wcpg 28 | make install -j 29 | ``` 30 | 31 | ### scalp 32 | 33 | ```shell 34 | git clone https://digidev.digi.e-technik.uni-kassel.de/git/scalp.git 35 | cd scalp 36 | mkdir build 37 | cd build 38 | cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/prefix/scalp -DUSE_LPSOLVE=ON -DLPSOLVE_LIBRARIES=/usr/lib/liblpsolve55_pic.a -DLP_INCLUDE_DIRS=/usr/include 39 | make install -j 40 | ``` 41 | 42 | ### pagsuite 43 | 44 | ```shell 45 | svn co https://digidev.digi.e-technik.uni-kassel.de/home/svn/pagsuite 46 | cd pagsuite/trunk 47 | mkdir build 48 | cd build 49 | cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/prefix/pagsuite -DCMAKE_PREFIX_PATH=$HOME/prefix/scalp 50 | make install -j 51 | ``` 52 | 53 | ### flopoco 54 | 55 | ```shell 56 | git clone https://gitlab.inria.fr/fdupont/flopoco.git 57 | cd flopoco 58 | mkdir build 59 | cd build 60 | cmake .. -DCMAKE_PREFIX_PATH="$HOME/prefix/wcpg;$HOME/prefix/pagsuite;$HOME/prefix/scalp" -DCMAKE_INSTALL_PREFIX=$HOME/prefix/flopoco 61 | make -j 62 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jiajie Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fpu-wrappers 2 | 3 | This repo intends to create wrappers for open source FPU hardware implementations currently including: 4 | 5 | 1. [berkeley-hardfloat](https://github.com/ucb-bar/berkeley-hardfloat) 6 | 2. [fpnew](https://github.com/pulp-platform/fpnew) 7 | 3. [flopoco](http://flopoco.gforge.inria.fr/) 8 | 4. [CNRV-FPU](https://github.com/cnrv/CNRV-FPU) 9 | 5. [opencores-fpu](https://github.com/jiegec/opencores-fpu) 10 | 6. [fudian](https://github.com/OpenXiangShan/fudian) 11 | 7. [vfloat](https://github.com/jiegec/vfloat) 12 | 13 | | op | berkeley-hardfloat | fpnew | flopoco | CNRV-FPU | opencores-fpu | fudian | vfloat | 14 | | ------ | ------------------ | ----- | ------- | -------- | ------------- | ------ | ------ | 15 | | add | Y | Y | Y | Y | Y | Y | Y | 16 | | mul | Y | Y | Y | Y | Y | Y | Y | 17 | | fma | Y | Y | Y | Y | | Y | Y | 18 | | cmp | Y | Y | Y | | | Y | | 19 | | div | Y | Y | Y | Y | Y | Y | Y | 20 | | sqrt | Y | Y | Y | Y | | Y | Y | 21 | | fp2int | Y | Y | Y | Y | Y | Y | Y | 22 | | int2fp | Y | Y | Y | Y | Y | Y | Y | 23 | | fp2fp | Y | Y | Y | | | Y | | 24 | | pow | | | Y | | | | | 25 | | log | | | Y | | | | | 26 | | exp | | | Y | | | | | 27 | | custom | Y | | Y | | | | | 28 | 29 | `custom` means custom floating point format. 30 | 31 | And make performance comparison. 32 | 33 | Module naming convection: 34 | 35 | 1. Type 1: Floating point format + operator(FAdd/FMA/FExp) 36 | 2. Type 2: Floating point format `To` Floating point format 37 | 3. Type 3: Floating point format + FPU(many operations) 38 | 39 | Possible floating point formats: 40 | 41 | 1. HF: berkeley-hardfloat +1 bits 42 | 2. IEEE: IEEE 754 43 | 3. FPC: flopoco +2 bits 44 | 45 | IEEE754 FMA: 46 | 47 | 1. Area: fpnew = flopoco < hardfloat < fudian 48 | 2. Frequency: hardfloat = flopoco > fpnew > fudian 49 | 3. Power: fpnew < hardfloat < flopoco < fudian -------------------------------------------------------------------------------- /build.sc: -------------------------------------------------------------------------------- 1 | import mill._ 2 | import mill.scalalib.publish._ 3 | import scalalib._ 4 | import scalafmt._ 5 | import coursier.maven.MavenRepository 6 | 7 | // learned from https://github.com/OpenXiangShan/fudian/blob/main/build.sc 8 | val defaultVersions = Map( 9 | "chisel" -> ("org.chipsalliance", "6.2.0", false), 10 | "chisel-plugin" -> ("org.chipsalliance", "6.2.0", true), 11 | "scalatest" -> ("org.scalatest", "3.2.10", false), 12 | "spinalhdl-core" -> ("com.github.spinalhdl", "1.10.1", false), 13 | "spinalhdl-lib" -> ("com.github.spinalhdl", "1.10.1", false), 14 | "spinalhdl-idsl-plugin" -> ("com.github.spinalhdl", "1.10.1", false) 15 | ) 16 | 17 | val commonScalaVersion = "2.13.10" 18 | 19 | def getVersion(dep: String) = { 20 | val (org, ver, cross) = defaultVersions(dep) 21 | val version = sys.env.getOrElse(dep + "Version", ver) 22 | if (cross) 23 | ivy"$org:::$dep:$version" 24 | else 25 | ivy"$org::$dep:$version" 26 | } 27 | 28 | trait CommonModule extends ScalaModule { 29 | def scalaVersion = commonScalaVersion 30 | 31 | // for snapshot dependencies 32 | override def repositoriesTask = T.task { 33 | super.repositoriesTask() ++ Seq( 34 | MavenRepository("https://oss.sonatype.org/content/repositories/snapshots") 35 | ) 36 | } 37 | 38 | override def scalacOptions = 39 | Seq("-deprecation", "-feature", "-language:reflectiveCalls") 40 | } 41 | 42 | object hardfloat extends SbtModule with PublishModule { 43 | override def scalaVersion = commonScalaVersion 44 | override def millSourcePath = 45 | os.pwd / "thirdparty" / "berkeley-hardfloat" / "hardfloat" 46 | 47 | override def ivyDeps = super.ivyDeps() ++ Agg( 48 | getVersion("chisel") 49 | ) 50 | 51 | override def scalacPluginIvyDeps = super.scalacPluginIvyDeps() ++ Agg( 52 | getVersion("chisel-plugin") 53 | ) 54 | 55 | // publish 56 | def publishVersion = "1.5-SNAPSHOT" 57 | def pomSettings = PomSettings( 58 | description = artifactName(), 59 | organization = "edu.berkeley.cs", 60 | url = "http://chisel.eecs.berkeley.edu", 61 | licenses = Seq(License.`BSD-3-Clause`), 62 | versionControl = VersionControl.github("ucb-bar", "berkeley-hardfloat"), 63 | developers = Seq( 64 | Developer( 65 | "jhauser-ucberkeley", 66 | "John Hauser", 67 | "https://www.colorado.edu/faculty/hauser/about/" 68 | ), 69 | Developer( 70 | "aswaterman", 71 | "Andrew Waterman", 72 | "https://aspire.eecs.berkeley.edu/author/waterman/" 73 | ), 74 | Developer( 75 | "yunsup", 76 | "Yunsup Lee", 77 | "https://aspire.eecs.berkeley.edu/author/yunsup/" 78 | ) 79 | ) 80 | ) 81 | } 82 | 83 | object fudian extends CommonModule with PublishModule { 84 | override def ivyDeps = super.ivyDeps() ++ Agg( 85 | getVersion("chisel"), 86 | getVersion("scalatest") 87 | ) 88 | 89 | override def millSourcePath = os.pwd / "thirdparty" / "fudian" 90 | 91 | override def scalacPluginIvyDeps = super.scalacPluginIvyDeps() ++ Agg( 92 | getVersion("chisel-plugin") 93 | ) 94 | 95 | // publish 96 | def publishVersion = "1.0-SNAPSHOT" 97 | def pomSettings = PomSettings( 98 | description = artifactName(), 99 | organization = "cn.cas.ict", 100 | url = "https://github.com/openxiangshan/fudian", 101 | licenses = Seq(License.MIT), // Mulan PSL v2 is not included in Mill 102 | versionControl = VersionControl.github("openxiangshan", "fudian"), 103 | developers = Seq() 104 | ) 105 | } 106 | 107 | object `fpu-wrappers` 108 | extends CommonModule 109 | with PublishModule 110 | with ScalafmtModule { 111 | override def ivyDeps = super.ivyDeps() ++ Agg( 112 | getVersion("chisel"), 113 | getVersion("spinalhdl-core"), 114 | getVersion("spinalhdl-lib") 115 | ) 116 | 117 | override def scalacPluginIvyDeps = super.scalacPluginIvyDeps() ++ Agg( 118 | getVersion("spinalhdl-idsl-plugin"), 119 | getVersion("chisel-plugin") 120 | ) 121 | 122 | override def moduleDeps = super.moduleDeps ++ Seq(hardfloat, fudian) 123 | 124 | object test extends ScalaTests with TestModule.ScalaTest { 125 | override def ivyDeps = super.ivyDeps() ++ Agg( 126 | getVersion("scalatest") 127 | ) 128 | } 129 | 130 | // publish 131 | def publishVersion = "1.0-SNAPSHOT" 132 | def pomSettings = PomSettings( 133 | description = artifactName(), 134 | organization = "je.jia", 135 | url = "https://github.com/jiegec/fpu-wrapeprs", 136 | licenses = Seq(License.MIT), 137 | versionControl = VersionControl.github("jiegec", "fpu-wrappers"), 138 | developers = Seq() 139 | ) 140 | } 141 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/flopoco/.gitignore: -------------------------------------------------------------------------------- 1 | !*.v 2 | *.vhdl 3 | *.cache 4 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/flopoco/gen.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | tasks = [{ 5 | 'type': 'H', 6 | 'exp': 5, 7 | 'frac': 10 8 | }, { 9 | 'type': 'S', 10 | 'exp': 8, 11 | 'frac': 23 12 | }, { 13 | 'type': 'D', 14 | 'exp': 11, 15 | 'frac': 52 16 | }] 17 | 18 | home = os.getenv('HOME') 19 | flopoco = home + "/flopoco/build/flopoco" 20 | 21 | def gen_fma(frequency, task): 22 | # generate vhdl 23 | out = subprocess.check_output( 24 | [flopoco, "IEEEFMA", f"wE={task['exp']}", f"wF={task['frac']}", 25 | f"name=IEEEFMA_{task['type']}", f"frequency={frequency}"], 26 | stderr=subprocess.STDOUT).decode('utf-8') 27 | 28 | # parse stages from output 29 | stages = 0 30 | for line in out.splitlines(): 31 | if 'Pipeline depth' in line: 32 | stages = int(line.split(' ')[-1]) 33 | 34 | # save vhdl 35 | name = f"IEEEFMA_{task['type']}{stages}s" 36 | file = f"{name}.vhdl" 37 | file_vhdl08 = f"{name}_vhdl08.vhdl" 38 | os.rename('flopoco.vhdl', file) 39 | 40 | # vhdl08 41 | os.system(f"sed -e 's/std_logic_arith/numeric_std/g' -e 's/std_logic_unsigned/numeric_std_unsigned/g' {file} > {file_vhdl08}") 42 | 43 | # synthesize to verilog 44 | os.system(f"sudo docker run -it --rm -t -v $PWD:/src -w /src hdlc/ghdl:yosys yosys -m ghdl -p 'ghdl --std=08 {name}_vhdl08.vhdl -e IEEEFMA_{task['type']}; write_verilog {name}.v'") 45 | 46 | def gen_exp(frequency, task): 47 | # generate vhdl 48 | out = subprocess.check_output( 49 | [flopoco, "FPExp", f"wE={task['exp']}", f"wF={task['frac']}", 50 | f"name=FPCFExp_{task['type']}", f"plainVHDL=1", f"frequency={frequency}"], 51 | stderr=subprocess.STDOUT).decode('utf-8') 52 | 53 | # parse stages from output 54 | stages = 0 55 | for line in out.splitlines(): 56 | if 'Pipeline depth' in line: 57 | stages = int(line.split(' ')[-1]) 58 | 59 | # save vhdl 60 | name = f"FPCFExp_{task['type']}{stages}s" 61 | file = f"{name}.vhdl" 62 | os.rename('flopoco.vhdl', file) 63 | 64 | # synthesize to verilog 65 | os.system(f"sudo docker run -it --rm -t -v $PWD:/src -w /src hdlc/ghdl:yosys yosys -m ghdl -p 'ghdl -fsynopsys -fexplicit {name}.vhdl -e FPCFExp_{task['type']}; write_verilog {name}.v'") 66 | 67 | for task in tasks: 68 | for frequency in [100, 150, 200, 250, 300]: 69 | gen_fma(frequency, task) 70 | gen_exp(frequency, task) 71 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/.gitignore: -------------------------------------------------------------------------------- 1 | !FPNewBlackbox.sv 2 | !*.synth.v 3 | *.preprocessed.sv 4 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/FPNewBlackbox.sv: -------------------------------------------------------------------------------- 1 | module FPNewBlackbox #( 2 | // fpu features 3 | parameter FLEN = __FLEN__, 4 | parameter ENABLE_VECTORS = 1, 5 | parameter ENABLE_NAN_BOX = 1, 6 | parameter ENABLE_FP32 = __FP32__, 7 | parameter ENABLE_FP64 = __FP64__, 8 | parameter ENABLE_FP16 = 0, 9 | parameter ENABLE_FP8 = 0, 10 | parameter ENABLE_FP16ALT = 0, 11 | parameter ENABLE_INT8 = 0, 12 | parameter ENABLE_INT16 = 0, 13 | parameter ENABLE_INT32 = 0, 14 | parameter ENABLE_INT64 = 0, 15 | // fpu implementation 16 | parameter PIPELINE_STAGES = __STAGES__, 17 | // tag type: logic array 18 | parameter TAG_WIDTH = 2, 19 | // Do not change, follow fp-new definition 20 | localparam int unsigned WIDTH = FLEN, 21 | localparam int unsigned NUM_OPERANDS = 3, 22 | localparam type TagType = logic [TAG_WIDTH-1:0] 23 | 24 | ) ( 25 | // Copied from fpnew_top 26 | input logic clk_i, 27 | input logic rst_ni, 28 | // Input signals 29 | input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, 30 | input fpnew_pkg::roundmode_e rnd_mode_i, 31 | input fpnew_pkg::operation_e op_i, 32 | input logic op_mod_i, 33 | input fpnew_pkg::fp_format_e src_fmt_i, 34 | input fpnew_pkg::fp_format_e dst_fmt_i, 35 | input fpnew_pkg::int_format_e int_fmt_i, 36 | input logic vectorial_op_i, 37 | input TagType tag_i, 38 | // Input Handshake 39 | input logic in_valid_i, 40 | output logic in_ready_o, 41 | input logic flush_i, 42 | // Output signals 43 | output logic [WIDTH-1:0] result_o, 44 | output fpnew_pkg::status_t status_o, 45 | output TagType tag_o, 46 | // Output handshake 47 | output logic out_valid_o, 48 | input logic out_ready_i, 49 | // Indication of valid data in flight 50 | output logic busy_o 51 | ); 52 | 53 | localparam fpnew_pkg::fpu_features_t Features = '{ 54 | Width: int'(FLEN), 55 | EnableVectors: int'(ENABLE_VECTORS), 56 | EnableNanBox: int'(ENABLE_NAN_BOX), 57 | FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0), 58 | IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0) 59 | }; 60 | 61 | // only pipeline regs is customized 62 | localparam fpnew_pkg::fpu_implementation_t Implementation = '{ 63 | PipeRegs: '{'{default: PIPELINE_STAGES}, 64 | '{default: PIPELINE_STAGES}, 65 | '{default: PIPELINE_STAGES}, 66 | '{default: PIPELINE_STAGES}}, 67 | UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL 68 | '{default: fpnew_pkg::DISABLED}, // DIVSQRT 69 | '{default: fpnew_pkg::DISABLED}, // NONCOMP 70 | '{default: fpnew_pkg::DISABLED}}, // CONV 71 | PipeConfig: fpnew_pkg::DISTRIBUTED 72 | }; 73 | 74 | fpnew_top #( 75 | .Features(Features), 76 | .Implementation(Implementation), 77 | .TagType(TagType) 78 | ) inst ( 79 | .clk_i(clk_i), 80 | .rst_ni(rst_ni), 81 | 82 | .operands_i(operands_i), 83 | .rnd_mode_i(rnd_mode_i), 84 | .op_i(op_i), 85 | .op_mod_i(op_mod_i), 86 | .src_fmt_i(src_fmt_i), 87 | .dst_fmt_i(dst_fmt_i), 88 | .int_fmt_i(int_fmt_i), 89 | .vectorial_op_i(vectorial_op_i), 90 | .tag_i(tag_i), 91 | 92 | .in_valid_i(in_valid_i), 93 | .in_ready_o(in_ready_o), 94 | .flush_i(flush_i), 95 | 96 | .result_o(result_o), 97 | .status_o(status_o), 98 | .tag_o(tag_o), 99 | 100 | .out_valid_o(out_valid_o), 101 | .out_ready_i(out_ready_i), 102 | 103 | .busy_o(busy_o) 104 | ); 105 | 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/FPNewBlackbox_1s.sv: -------------------------------------------------------------------------------- 1 | module FPNewBlackbox #( 2 | // fpu features 3 | parameter FLEN = 64, 4 | parameter ENABLE_VECTORS = 1, 5 | parameter ENABLE_NAN_BOX = 1, 6 | parameter ENABLE_FP32 = 1, 7 | parameter ENABLE_FP64 = 0, 8 | parameter ENABLE_FP16 = 0, 9 | parameter ENABLE_FP8 = 0, 10 | parameter ENABLE_FP16ALT = 0, 11 | parameter ENABLE_INT8 = 0, 12 | parameter ENABLE_INT16 = 0, 13 | parameter ENABLE_INT32 = 0, 14 | parameter ENABLE_INT64 = 0, 15 | // fpu implementation 16 | parameter PIPELINE_STAGES = 1, 17 | // tag type: logic array 18 | parameter TAG_WIDTH = 2, 19 | // Do not change, follow fp-new definition 20 | localparam int unsigned WIDTH = FLEN, 21 | localparam int unsigned NUM_OPERANDS = 3, 22 | localparam type TagType = logic [TAG_WIDTH-1:0] 23 | 24 | ) ( 25 | // Copied from fpnew_top 26 | input logic clk_i, 27 | input logic rst_ni, 28 | // Input signals 29 | input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, 30 | input fpnew_pkg::roundmode_e rnd_mode_i, 31 | input fpnew_pkg::operation_e op_i, 32 | input logic op_mod_i, 33 | input fpnew_pkg::fp_format_e src_fmt_i, 34 | input fpnew_pkg::fp_format_e dst_fmt_i, 35 | input fpnew_pkg::int_format_e int_fmt_i, 36 | input logic vectorial_op_i, 37 | input TagType tag_i, 38 | // Input Handshake 39 | input logic in_valid_i, 40 | output logic in_ready_o, 41 | input logic flush_i, 42 | // Output signals 43 | output logic [WIDTH-1:0] result_o, 44 | output fpnew_pkg::status_t status_o, 45 | output TagType tag_o, 46 | // Output handshake 47 | output logic out_valid_o, 48 | input logic out_ready_i, 49 | // Indication of valid data in flight 50 | output logic busy_o 51 | ); 52 | 53 | localparam fpnew_pkg::fpu_features_t Features = '{ 54 | Width: int'(FLEN), 55 | EnableVectors: int'(ENABLE_VECTORS), 56 | EnableNanBox: int'(ENABLE_NAN_BOX), 57 | FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0), 58 | IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0) 59 | }; 60 | 61 | // only pipeline regs is customized 62 | localparam fpnew_pkg::fpu_implementation_t Implementation = '{ 63 | PipeRegs: '{'{default: PIPELINE_STAGES}, 64 | '{default: PIPELINE_STAGES}, 65 | '{default: PIPELINE_STAGES}, 66 | '{default: PIPELINE_STAGES}}, 67 | UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL 68 | '{default: fpnew_pkg::MERGED}, // DIVSQRT 69 | '{default: fpnew_pkg::PARALLEL}, // NONCOMP 70 | '{default: fpnew_pkg::MERGED}}, // CONV 71 | PipeConfig: fpnew_pkg::BEFORE 72 | }; 73 | 74 | fpnew_top #( 75 | .Features(Features), 76 | .Implementation(Implementation), 77 | .TagType(TagType) 78 | ) inst ( 79 | .clk_i(clk_i), 80 | .rst_ni(rst_ni), 81 | 82 | .operands_i(operands_i), 83 | .rnd_mode_i(rnd_mode_i), 84 | .op_i(op_i), 85 | .op_mod_i(op_mod_i), 86 | .src_fmt_i(src_fmt_i), 87 | .dst_fmt_i(dst_fmt_i), 88 | .int_fmt_i(int_fmt_i), 89 | .vectorial_op_i(vectorial_op_i), 90 | .tag_i(tag_i), 91 | 92 | .in_valid_i(in_valid_i), 93 | .in_ready_o(in_ready_o), 94 | .flush_i(flush_i), 95 | 96 | .result_o(result_o), 97 | .status_o(status_o), 98 | .tag_o(tag_o), 99 | 100 | .out_valid_o(out_valid_o), 101 | .out_ready_i(out_ready_i), 102 | 103 | .busy_o(busy_o) 104 | ); 105 | 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/FPNewBlackbox_2s.sv: -------------------------------------------------------------------------------- 1 | module FPNewBlackbox #( 2 | // fpu features 3 | parameter FLEN = 64, 4 | parameter ENABLE_VECTORS = 1, 5 | parameter ENABLE_NAN_BOX = 1, 6 | parameter ENABLE_FP32 = 1, 7 | parameter ENABLE_FP64 = 0, 8 | parameter ENABLE_FP16 = 0, 9 | parameter ENABLE_FP8 = 0, 10 | parameter ENABLE_FP16ALT = 0, 11 | parameter ENABLE_INT8 = 0, 12 | parameter ENABLE_INT16 = 0, 13 | parameter ENABLE_INT32 = 0, 14 | parameter ENABLE_INT64 = 0, 15 | // fpu implementation 16 | parameter PIPELINE_STAGES = 2, 17 | // tag type: logic array 18 | parameter TAG_WIDTH = 2, 19 | // Do not change, follow fp-new definition 20 | localparam int unsigned WIDTH = FLEN, 21 | localparam int unsigned NUM_OPERANDS = 3, 22 | localparam type TagType = logic [TAG_WIDTH-1:0] 23 | 24 | ) ( 25 | // Copied from fpnew_top 26 | input logic clk_i, 27 | input logic rst_ni, 28 | // Input signals 29 | input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, 30 | input fpnew_pkg::roundmode_e rnd_mode_i, 31 | input fpnew_pkg::operation_e op_i, 32 | input logic op_mod_i, 33 | input fpnew_pkg::fp_format_e src_fmt_i, 34 | input fpnew_pkg::fp_format_e dst_fmt_i, 35 | input fpnew_pkg::int_format_e int_fmt_i, 36 | input logic vectorial_op_i, 37 | input TagType tag_i, 38 | // Input Handshake 39 | input logic in_valid_i, 40 | output logic in_ready_o, 41 | input logic flush_i, 42 | // Output signals 43 | output logic [WIDTH-1:0] result_o, 44 | output fpnew_pkg::status_t status_o, 45 | output TagType tag_o, 46 | // Output handshake 47 | output logic out_valid_o, 48 | input logic out_ready_i, 49 | // Indication of valid data in flight 50 | output logic busy_o 51 | ); 52 | 53 | localparam fpnew_pkg::fpu_features_t Features = '{ 54 | Width: int'(FLEN), 55 | EnableVectors: int'(ENABLE_VECTORS), 56 | EnableNanBox: int'(ENABLE_NAN_BOX), 57 | FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0), 58 | IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0) 59 | }; 60 | 61 | // only pipeline regs is customized 62 | localparam fpnew_pkg::fpu_implementation_t Implementation = '{ 63 | PipeRegs: '{'{default: PIPELINE_STAGES}, 64 | '{default: PIPELINE_STAGES}, 65 | '{default: PIPELINE_STAGES}, 66 | '{default: PIPELINE_STAGES}}, 67 | UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL 68 | '{default: fpnew_pkg::MERGED}, // DIVSQRT 69 | '{default: fpnew_pkg::PARALLEL}, // NONCOMP 70 | '{default: fpnew_pkg::MERGED}}, // CONV 71 | PipeConfig: fpnew_pkg::BEFORE 72 | }; 73 | 74 | fpnew_top #( 75 | .Features(Features), 76 | .Implementation(Implementation), 77 | .TagType(TagType) 78 | ) inst ( 79 | .clk_i(clk_i), 80 | .rst_ni(rst_ni), 81 | 82 | .operands_i(operands_i), 83 | .rnd_mode_i(rnd_mode_i), 84 | .op_i(op_i), 85 | .op_mod_i(op_mod_i), 86 | .src_fmt_i(src_fmt_i), 87 | .dst_fmt_i(dst_fmt_i), 88 | .int_fmt_i(int_fmt_i), 89 | .vectorial_op_i(vectorial_op_i), 90 | .tag_i(tag_i), 91 | 92 | .in_valid_i(in_valid_i), 93 | .in_ready_o(in_ready_o), 94 | .flush_i(flush_i), 95 | 96 | .result_o(result_o), 97 | .status_o(status_o), 98 | .tag_o(tag_o), 99 | 100 | .out_valid_o(out_valid_o), 101 | .out_ready_i(out_ready_i), 102 | 103 | .busy_o(busy_o) 104 | ); 105 | 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/FPNewBlackbox_3s.sv: -------------------------------------------------------------------------------- 1 | module FPNewBlackbox #( 2 | // fpu features 3 | parameter FLEN = 64, 4 | parameter ENABLE_VECTORS = 1, 5 | parameter ENABLE_NAN_BOX = 1, 6 | parameter ENABLE_FP32 = 1, 7 | parameter ENABLE_FP64 = 0, 8 | parameter ENABLE_FP16 = 0, 9 | parameter ENABLE_FP8 = 0, 10 | parameter ENABLE_FP16ALT = 0, 11 | parameter ENABLE_INT8 = 0, 12 | parameter ENABLE_INT16 = 0, 13 | parameter ENABLE_INT32 = 0, 14 | parameter ENABLE_INT64 = 0, 15 | // fpu implementation 16 | parameter PIPELINE_STAGES = 3, 17 | // tag type: logic array 18 | parameter TAG_WIDTH = 2, 19 | // Do not change, follow fp-new definition 20 | localparam int unsigned WIDTH = FLEN, 21 | localparam int unsigned NUM_OPERANDS = 3, 22 | localparam type TagType = logic [TAG_WIDTH-1:0] 23 | 24 | ) ( 25 | // Copied from fpnew_top 26 | input logic clk_i, 27 | input logic rst_ni, 28 | // Input signals 29 | input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, 30 | input fpnew_pkg::roundmode_e rnd_mode_i, 31 | input fpnew_pkg::operation_e op_i, 32 | input logic op_mod_i, 33 | input fpnew_pkg::fp_format_e src_fmt_i, 34 | input fpnew_pkg::fp_format_e dst_fmt_i, 35 | input fpnew_pkg::int_format_e int_fmt_i, 36 | input logic vectorial_op_i, 37 | input TagType tag_i, 38 | // Input Handshake 39 | input logic in_valid_i, 40 | output logic in_ready_o, 41 | input logic flush_i, 42 | // Output signals 43 | output logic [WIDTH-1:0] result_o, 44 | output fpnew_pkg::status_t status_o, 45 | output TagType tag_o, 46 | // Output handshake 47 | output logic out_valid_o, 48 | input logic out_ready_i, 49 | // Indication of valid data in flight 50 | output logic busy_o 51 | ); 52 | 53 | localparam fpnew_pkg::fpu_features_t Features = '{ 54 | Width: int'(FLEN), 55 | EnableVectors: int'(ENABLE_VECTORS), 56 | EnableNanBox: int'(ENABLE_NAN_BOX), 57 | FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0), 58 | IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0) 59 | }; 60 | 61 | // only pipeline regs is customized 62 | localparam fpnew_pkg::fpu_implementation_t Implementation = '{ 63 | PipeRegs: '{'{default: PIPELINE_STAGES}, 64 | '{default: PIPELINE_STAGES}, 65 | '{default: PIPELINE_STAGES}, 66 | '{default: PIPELINE_STAGES}}, 67 | UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL 68 | '{default: fpnew_pkg::MERGED}, // DIVSQRT 69 | '{default: fpnew_pkg::PARALLEL}, // NONCOMP 70 | '{default: fpnew_pkg::MERGED}}, // CONV 71 | PipeConfig: fpnew_pkg::BEFORE 72 | }; 73 | 74 | fpnew_top #( 75 | .Features(Features), 76 | .Implementation(Implementation), 77 | .TagType(TagType) 78 | ) inst ( 79 | .clk_i(clk_i), 80 | .rst_ni(rst_ni), 81 | 82 | .operands_i(operands_i), 83 | .rnd_mode_i(rnd_mode_i), 84 | .op_i(op_i), 85 | .op_mod_i(op_mod_i), 86 | .src_fmt_i(src_fmt_i), 87 | .dst_fmt_i(dst_fmt_i), 88 | .int_fmt_i(int_fmt_i), 89 | .vectorial_op_i(vectorial_op_i), 90 | .tag_i(tag_i), 91 | 92 | .in_valid_i(in_valid_i), 93 | .in_ready_o(in_ready_o), 94 | .flush_i(flush_i), 95 | 96 | .result_o(result_o), 97 | .status_o(status_o), 98 | .tag_o(tag_o), 99 | 100 | .out_valid_o(out_valid_o), 101 | .out_ready_i(out_ready_i), 102 | 103 | .busy_o(busy_o) 104 | ); 105 | 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/FPNewBlackbox_4s.sv: -------------------------------------------------------------------------------- 1 | module FPNewBlackbox #( 2 | // fpu features 3 | parameter FLEN = 64, 4 | parameter ENABLE_VECTORS = 1, 5 | parameter ENABLE_NAN_BOX = 1, 6 | parameter ENABLE_FP32 = 1, 7 | parameter ENABLE_FP64 = 0, 8 | parameter ENABLE_FP16 = 0, 9 | parameter ENABLE_FP8 = 0, 10 | parameter ENABLE_FP16ALT = 0, 11 | parameter ENABLE_INT8 = 0, 12 | parameter ENABLE_INT16 = 0, 13 | parameter ENABLE_INT32 = 0, 14 | parameter ENABLE_INT64 = 0, 15 | // fpu implementation 16 | parameter PIPELINE_STAGES = 4, 17 | // tag type: logic array 18 | parameter TAG_WIDTH = 2, 19 | // Do not change, follow fp-new definition 20 | localparam int unsigned WIDTH = FLEN, 21 | localparam int unsigned NUM_OPERANDS = 3, 22 | localparam type TagType = logic [TAG_WIDTH-1:0] 23 | 24 | ) ( 25 | // Copied from fpnew_top 26 | input logic clk_i, 27 | input logic rst_ni, 28 | // Input signals 29 | input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, 30 | input fpnew_pkg::roundmode_e rnd_mode_i, 31 | input fpnew_pkg::operation_e op_i, 32 | input logic op_mod_i, 33 | input fpnew_pkg::fp_format_e src_fmt_i, 34 | input fpnew_pkg::fp_format_e dst_fmt_i, 35 | input fpnew_pkg::int_format_e int_fmt_i, 36 | input logic vectorial_op_i, 37 | input TagType tag_i, 38 | // Input Handshake 39 | input logic in_valid_i, 40 | output logic in_ready_o, 41 | input logic flush_i, 42 | // Output signals 43 | output logic [WIDTH-1:0] result_o, 44 | output fpnew_pkg::status_t status_o, 45 | output TagType tag_o, 46 | // Output handshake 47 | output logic out_valid_o, 48 | input logic out_ready_i, 49 | // Indication of valid data in flight 50 | output logic busy_o 51 | ); 52 | 53 | localparam fpnew_pkg::fpu_features_t Features = '{ 54 | Width: int'(FLEN), 55 | EnableVectors: int'(ENABLE_VECTORS), 56 | EnableNanBox: int'(ENABLE_NAN_BOX), 57 | FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0), 58 | IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0) 59 | }; 60 | 61 | // only pipeline regs is customized 62 | localparam fpnew_pkg::fpu_implementation_t Implementation = '{ 63 | PipeRegs: '{'{default: PIPELINE_STAGES}, 64 | '{default: PIPELINE_STAGES}, 65 | '{default: PIPELINE_STAGES}, 66 | '{default: PIPELINE_STAGES}}, 67 | UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL 68 | '{default: fpnew_pkg::MERGED}, // DIVSQRT 69 | '{default: fpnew_pkg::PARALLEL}, // NONCOMP 70 | '{default: fpnew_pkg::MERGED}}, // CONV 71 | PipeConfig: fpnew_pkg::BEFORE 72 | }; 73 | 74 | fpnew_top #( 75 | .Features(Features), 76 | .Implementation(Implementation), 77 | .TagType(TagType) 78 | ) inst ( 79 | .clk_i(clk_i), 80 | .rst_ni(rst_ni), 81 | 82 | .operands_i(operands_i), 83 | .rnd_mode_i(rnd_mode_i), 84 | .op_i(op_i), 85 | .op_mod_i(op_mod_i), 86 | .src_fmt_i(src_fmt_i), 87 | .dst_fmt_i(dst_fmt_i), 88 | .int_fmt_i(int_fmt_i), 89 | .vectorial_op_i(vectorial_op_i), 90 | .tag_i(tag_i), 91 | 92 | .in_valid_i(in_valid_i), 93 | .in_ready_o(in_ready_o), 94 | .flush_i(flush_i), 95 | 96 | .result_o(result_o), 97 | .status_o(status_o), 98 | .tag_o(tag_o), 99 | 100 | .out_valid_o(out_valid_o), 101 | .out_ready_i(out_ready_i), 102 | 103 | .busy_o(busy_o) 104 | ); 105 | 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/FPNewBlackbox_5s.sv: -------------------------------------------------------------------------------- 1 | module FPNewBlackbox #( 2 | // fpu features 3 | parameter FLEN = 64, 4 | parameter ENABLE_VECTORS = 1, 5 | parameter ENABLE_NAN_BOX = 1, 6 | parameter ENABLE_FP32 = 1, 7 | parameter ENABLE_FP64 = 0, 8 | parameter ENABLE_FP16 = 0, 9 | parameter ENABLE_FP8 = 0, 10 | parameter ENABLE_FP16ALT = 0, 11 | parameter ENABLE_INT8 = 0, 12 | parameter ENABLE_INT16 = 0, 13 | parameter ENABLE_INT32 = 0, 14 | parameter ENABLE_INT64 = 0, 15 | // fpu implementation 16 | parameter PIPELINE_STAGES = 5, 17 | // tag type: logic array 18 | parameter TAG_WIDTH = 2, 19 | // Do not change, follow fp-new definition 20 | localparam int unsigned WIDTH = FLEN, 21 | localparam int unsigned NUM_OPERANDS = 3, 22 | localparam type TagType = logic [TAG_WIDTH-1:0] 23 | 24 | ) ( 25 | // Copied from fpnew_top 26 | input logic clk_i, 27 | input logic rst_ni, 28 | // Input signals 29 | input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, 30 | input fpnew_pkg::roundmode_e rnd_mode_i, 31 | input fpnew_pkg::operation_e op_i, 32 | input logic op_mod_i, 33 | input fpnew_pkg::fp_format_e src_fmt_i, 34 | input fpnew_pkg::fp_format_e dst_fmt_i, 35 | input fpnew_pkg::int_format_e int_fmt_i, 36 | input logic vectorial_op_i, 37 | input TagType tag_i, 38 | // Input Handshake 39 | input logic in_valid_i, 40 | output logic in_ready_o, 41 | input logic flush_i, 42 | // Output signals 43 | output logic [WIDTH-1:0] result_o, 44 | output fpnew_pkg::status_t status_o, 45 | output TagType tag_o, 46 | // Output handshake 47 | output logic out_valid_o, 48 | input logic out_ready_i, 49 | // Indication of valid data in flight 50 | output logic busy_o 51 | ); 52 | 53 | localparam fpnew_pkg::fpu_features_t Features = '{ 54 | Width: int'(FLEN), 55 | EnableVectors: int'(ENABLE_VECTORS), 56 | EnableNanBox: int'(ENABLE_NAN_BOX), 57 | FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0), 58 | IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0) 59 | }; 60 | 61 | // only pipeline regs is customized 62 | localparam fpnew_pkg::fpu_implementation_t Implementation = '{ 63 | PipeRegs: '{'{default: PIPELINE_STAGES}, 64 | '{default: PIPELINE_STAGES}, 65 | '{default: PIPELINE_STAGES}, 66 | '{default: PIPELINE_STAGES}}, 67 | UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL 68 | '{default: fpnew_pkg::MERGED}, // DIVSQRT 69 | '{default: fpnew_pkg::PARALLEL}, // NONCOMP 70 | '{default: fpnew_pkg::MERGED}}, // CONV 71 | PipeConfig: fpnew_pkg::BEFORE 72 | }; 73 | 74 | fpnew_top #( 75 | .Features(Features), 76 | .Implementation(Implementation), 77 | .TagType(TagType) 78 | ) inst ( 79 | .clk_i(clk_i), 80 | .rst_ni(rst_ni), 81 | 82 | .operands_i(operands_i), 83 | .rnd_mode_i(rnd_mode_i), 84 | .op_i(op_i), 85 | .op_mod_i(op_mod_i), 86 | .src_fmt_i(src_fmt_i), 87 | .dst_fmt_i(dst_fmt_i), 88 | .int_fmt_i(int_fmt_i), 89 | .vectorial_op_i(vectorial_op_i), 90 | .tag_i(tag_i), 91 | 92 | .in_valid_i(in_valid_i), 93 | .in_ready_o(in_ready_o), 94 | .flush_i(flush_i), 95 | 96 | .result_o(result_o), 97 | .status_o(status_o), 98 | .tag_o(tag_o), 99 | 100 | .out_valid_o(out_valid_o), 101 | .out_ready_i(out_ready_i), 102 | 103 | .busy_o(busy_o) 104 | ); 105 | 106 | 107 | endmodule 108 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/Makefile: -------------------------------------------------------------------------------- 1 | ROOT = ../../../thirdparty 2 | VSRCS = $(ROOT)/fpnew/src/common_cells/src/rr_arb_tree.sv \ 3 | $(ROOT)/fpnew/src/common_cells/src/cf_math_pkg.sv \ 4 | $(ROOT)/fpnew/src/common_cells/src/lzc.sv \ 5 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ 6 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \ 7 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \ 8 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \ 9 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \ 10 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \ 11 | $(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv 12 | VSRCS += $(ROOT)/fpnew/src/fpnew_pkg.sv \ 13 | $(ROOT)/fpnew/src/fpnew_cast_multi.sv \ 14 | $(ROOT)/fpnew/src/fpnew_classifier.sv \ 15 | $(ROOT)/fpnew/src/fpnew_divsqrt_multi.sv \ 16 | $(ROOT)/fpnew/src/fpnew_fma.sv \ 17 | $(ROOT)/fpnew/src/fpnew_fma_multi.sv \ 18 | $(ROOT)/fpnew/src/fpnew_noncomp.sv \ 19 | $(ROOT)/fpnew/src/fpnew_opgroup_block.sv \ 20 | $(ROOT)/fpnew/src/fpnew_opgroup_fmt_slice.sv \ 21 | $(ROOT)/fpnew/src/fpnew_opgroup_multifmt_slice.sv \ 22 | $(ROOT)/fpnew/src/fpnew_rounding.sv \ 23 | $(ROOT)/fpnew/src/fpnew_top.sv 24 | VSRCS += FPNewBlackbox$(SUFFIX).sv 25 | 26 | VERILATOR_OPTS = -E +incdir+$(ROOT)/fpnew/src/common_cells/include 27 | 28 | # Must use defer here, otherwise it can fail with TAG_WIDTH=0 29 | FPNewBlackbox$(SUFFIX).synth.v: FPNewBlackbox$(SUFFIX).preprocessed.v 30 | yosys -p 'read_verilog -defer $^' -p 'hierarchy -top FPNewBlackbox' -p 'proc' -p 'opt' -p 'write_verilog -noattr $@' 31 | 32 | FPNewBlackbox$(SUFFIX).preprocessed.v: FPNewBlackbox$(SUFFIX).preprocessed.sv 33 | ~/sv2v/bin/sv2v $^ > $@ 34 | sed -i '/\$$fatal/d' $@ 35 | 36 | FPNewBlackbox$(SUFFIX).preprocessed.sv: Makefile $(VSRCS) 37 | cat $(VSRCS) > cat.sv 38 | verilator --cc --exe $(VERILATOR_OPTS) cat.sv --top-module FPNewBlackbox > $@ 39 | sed -i '/^`line/d' $@ 40 | rm cat.sv 41 | 42 | clean: 43 | rm -f FPNewBlackbox*.synth.v FPNewBlackbox*.preprocessed.v FPNewBlackbox*.preprocessed.sv 44 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/fpnew/gen.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | width = { 4 | 'S': 32, 5 | 'D': 64 6 | } 7 | 8 | for stage in range(1, 6): 9 | for format in ['S', 'D']: 10 | for lane in range(1, 3): 11 | suffix = f'_{format}{lane}l{stage}s' 12 | os.system(f"cp FPNewBlackbox.sv FPNewBlackbox{suffix}.sv") 13 | os.system(f"sed -i 's/__FLEN__/{width[format]*lane}/' FPNewBlackbox{suffix}.sv") 14 | fp32 = int(format == "S") 15 | os.system(f"sed -i 's/__FP32__/{fp32}/' FPNewBlackbox{suffix}.sv") 16 | fp64 = int(format == "D") 17 | os.system(f"sed -i 's/__FP64__/{fp64}/' FPNewBlackbox{suffix}.sv") 18 | os.system(f"sed -i 's/__STAGES__/{stage}/' FPNewBlackbox{suffix}.sv") 19 | os.system(f"make SUFFIX={suffix}") 20 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/opencores/.gitignore: -------------------------------------------------------------------------------- 1 | !*.v 2 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/opencores/except.v: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////// 2 | //// //// 3 | //// EXCEPT //// 4 | //// Floating Point Exception/Special Numbers Unit //// 5 | //// //// 6 | //// Author: Rudolf Usselmann //// 7 | //// rudi@asics.ws //// 8 | //// //// 9 | ///////////////////////////////////////////////////////////////////// 10 | //// //// 11 | //// Copyright (C) 2000 Rudolf Usselmann //// 12 | //// rudi@asics.ws //// 13 | //// //// 14 | //// This source file may be used and distributed without //// 15 | //// restriction provided that this copyright statement is not //// 16 | //// removed from the file and that any derivative work contains //// 17 | //// the original copyright notice and the associated disclaimer.//// 18 | //// //// 19 | //// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// 28 | //// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// 31 | //// POSSIBILITY OF SUCH DAMAGE. //// 32 | //// //// 33 | ///////////////////////////////////////////////////////////////////// 34 | 35 | 36 | `timescale 1ns / 100ps 37 | 38 | 39 | module except( clk, opa, opb, inf, ind, qnan, snan, opa_nan, opb_nan, 40 | opa_00, opb_00, opa_inf, opb_inf, opa_dn, opb_dn); 41 | input clk; 42 | input [31:0] opa, opb; 43 | output inf, ind, qnan, snan, opa_nan, opb_nan; 44 | output opa_00, opb_00; 45 | output opa_inf, opb_inf; 46 | output opa_dn; 47 | output opb_dn; 48 | 49 | //////////////////////////////////////////////////////////////////////// 50 | // 51 | // Local Wires and registers 52 | // 53 | 54 | wire [7:0] expa, expb; // alias to opX exponent 55 | wire [22:0] fracta, fractb; // alias to opX fraction 56 | reg expa_ff, infa_f_r, qnan_r_a, snan_r_a; 57 | reg expb_ff, infb_f_r, qnan_r_b, snan_r_b; 58 | reg inf, ind, qnan, snan; // Output registers 59 | reg opa_nan, opb_nan; 60 | reg expa_00, expb_00, fracta_00, fractb_00; 61 | reg opa_00, opb_00; 62 | reg opa_inf, opb_inf; 63 | reg opa_dn, opb_dn; 64 | 65 | //////////////////////////////////////////////////////////////////////// 66 | // 67 | // Aliases 68 | // 69 | 70 | assign expa = opa[30:23]; 71 | assign expb = opb[30:23]; 72 | assign fracta = opa[22:0]; 73 | assign fractb = opb[22:0]; 74 | 75 | //////////////////////////////////////////////////////////////////////// 76 | // 77 | // Determine if any of the input operators is a INF or NAN or any other special number 78 | // 79 | 80 | always @(posedge clk) 81 | expa_ff <= &expa; 82 | 83 | always @(posedge clk) 84 | expb_ff <= &expb; 85 | 86 | always @(posedge clk) 87 | infa_f_r <= !(|fracta); 88 | 89 | always @(posedge clk) 90 | infb_f_r <= !(|fractb); 91 | 92 | always @(posedge clk) 93 | qnan_r_a <= fracta[22]; 94 | 95 | always @(posedge clk) 96 | snan_r_a <= !fracta[22] & |fracta[21:0]; 97 | 98 | always @(posedge clk) 99 | qnan_r_b <= fractb[22]; 100 | 101 | always @(posedge clk) 102 | snan_r_b <= !fractb[22] & |fractb[21:0]; 103 | 104 | always @(posedge clk) 105 | ind <= (expa_ff & infa_f_r) & (expb_ff & infb_f_r); 106 | 107 | always @(posedge clk) 108 | inf <= (expa_ff & infa_f_r) | (expb_ff & infb_f_r); 109 | 110 | always @(posedge clk) 111 | qnan <= (expa_ff & qnan_r_a) | (expb_ff & qnan_r_b); 112 | 113 | always @(posedge clk) 114 | snan <= (expa_ff & snan_r_a) | (expb_ff & snan_r_b); 115 | 116 | always @(posedge clk) 117 | opa_nan <= &expa & (|fracta[22:0]); 118 | 119 | always @(posedge clk) 120 | opb_nan <= &expb & (|fractb[22:0]); 121 | 122 | always @(posedge clk) 123 | opa_inf <= (expa_ff & infa_f_r); 124 | 125 | always @(posedge clk) 126 | opb_inf <= (expb_ff & infb_f_r); 127 | 128 | always @(posedge clk) 129 | expa_00 <= !(|expa); 130 | 131 | always @(posedge clk) 132 | expb_00 <= !(|expb); 133 | 134 | always @(posedge clk) 135 | fracta_00 <= !(|fracta); 136 | 137 | always @(posedge clk) 138 | fractb_00 <= !(|fractb); 139 | 140 | always @(posedge clk) 141 | opa_00 <= expa_00 & fracta_00; 142 | 143 | always @(posedge clk) 144 | opb_00 <= expb_00 & fractb_00; 145 | 146 | always @(posedge clk) 147 | opa_dn <= expa_00; 148 | 149 | always @(posedge clk) 150 | opb_dn <= expb_00; 151 | 152 | endmodule 153 | 154 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/opencores/fpu.v: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////// 2 | //// //// 3 | //// FPU //// 4 | //// Floating Point Unit (Single precision) //// 5 | //// //// 6 | //// Author: Rudolf Usselmann //// 7 | //// rudi@asics.ws //// 8 | //// //// 9 | ///////////////////////////////////////////////////////////////////// 10 | //// //// 11 | //// Copyright (C) 2000 Rudolf Usselmann //// 12 | //// rudi@asics.ws //// 13 | //// //// 14 | //// This source file may be used and distributed without //// 15 | //// restriction provided that this copyright statement is not //// 16 | //// removed from the file and that any derivative work contains //// 17 | //// the original copyright notice and the associated disclaimer.//// 18 | //// //// 19 | //// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// 28 | //// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// 31 | //// POSSIBILITY OF SUCH DAMAGE. //// 32 | //// //// 33 | ///////////////////////////////////////////////////////////////////// 34 | 35 | `timescale 1ns / 100ps 36 | 37 | /* 38 | 39 | FPU Operations (fpu_op): 40 | ======================== 41 | 42 | 0 = add 43 | 1 = sub 44 | 2 = mul 45 | 3 = div 46 | 4 = 47 | 5 = 48 | 6 = 49 | 7 = 50 | 51 | Rounding Modes (rmode): 52 | ======================= 53 | 54 | 0 = round_nearest_even 55 | 1 = round_to_zero 56 | 2 = round_up 57 | 3 = round_down 58 | 59 | */ 60 | 61 | 62 | module fpu( clk, rmode, fpu_op, opa, opb, out, inf, snan, qnan, ine, overflow, underflow, zero, div_by_zero); 63 | input clk; 64 | input [1:0] rmode; 65 | input [2:0] fpu_op; 66 | input [31:0] opa, opb; 67 | output [31:0] out; 68 | output inf, snan, qnan; 69 | output ine; 70 | output overflow, underflow; 71 | output zero; 72 | output div_by_zero; 73 | 74 | parameter INF = 31'h7f800000, 75 | QNAN = 31'h7fc00001, 76 | SNAN = 31'h7f800001; 77 | 78 | //////////////////////////////////////////////////////////////////////// 79 | // 80 | // Local Wires 81 | // 82 | reg zero; 83 | reg [31:0] opa_r, opb_r; // Input operand registers 84 | reg [31:0] out; // Output register 85 | reg div_by_zero; // Divide by zero output register 86 | wire signa, signb; // alias to opX sign 87 | wire sign_fasu; // sign output 88 | wire [26:0] fracta, fractb; // Fraction Outputs from EQU block 89 | wire [7:0] exp_fasu; // Exponent output from EQU block 90 | reg [7:0] exp_r; // Exponent output (registerd) 91 | wire [26:0] fract_out_d; // fraction output 92 | wire co_d; // carry output 93 | reg [27:0] fract_out_q; // fraction output (registerd) 94 | wire [30:0] out_d; // Intermediate final result output 95 | wire overflow_d, underflow_d;// Overflow/Underflow Indicators 96 | reg overflow, underflow; // Output registers for Overflow & Underflow 97 | reg inf, snan, qnan; // Output Registers for INF, SNAN and QNAN 98 | reg ine; // Output Registers for INE 99 | reg [1:0] rmode_r1, rmode_r2, // Pipeline registers for rounding mode 100 | rmode_r3; 101 | reg [2:0] fpu_op_r1, fpu_op_r2, // Pipeline registers for fp opration 102 | fpu_op_r3; 103 | wire mul_inf, div_inf; 104 | wire mul_00, div_00; 105 | 106 | //////////////////////////////////////////////////////////////////////// 107 | // 108 | // Input Registers 109 | // 110 | 111 | always @(posedge clk) 112 | opa_r <= opa; 113 | 114 | always @(posedge clk) 115 | opb_r <= opb; 116 | 117 | always @(posedge clk) 118 | rmode_r1 <= rmode; 119 | 120 | always @(posedge clk) 121 | rmode_r2 <= rmode_r1; 122 | 123 | always @(posedge clk) 124 | rmode_r3 <= rmode_r2; 125 | 126 | always @(posedge clk) 127 | fpu_op_r1 <= fpu_op; 128 | 129 | always @(posedge clk) 130 | fpu_op_r2 <= fpu_op_r1; 131 | 132 | always @(posedge clk) 133 | fpu_op_r3 <= fpu_op_r2; 134 | 135 | //////////////////////////////////////////////////////////////////////// 136 | // 137 | // Exceptions block 138 | // 139 | wire inf_d, ind_d, qnan_d, snan_d, opa_nan, opb_nan; 140 | wire opa_00, opb_00; 141 | wire opa_inf, opb_inf; 142 | wire opa_dn, opb_dn; 143 | 144 | except u0( .clk(clk), 145 | .opa(opa_r), .opb(opb_r), 146 | .inf(inf_d), .ind(ind_d), 147 | .qnan(qnan_d), .snan(snan_d), 148 | .opa_nan(opa_nan), .opb_nan(opb_nan), 149 | .opa_00(opa_00), .opb_00(opb_00), 150 | .opa_inf(opa_inf), .opb_inf(opb_inf), 151 | .opa_dn(opa_dn), .opb_dn(opb_dn) 152 | ); 153 | 154 | //////////////////////////////////////////////////////////////////////// 155 | // 156 | // Pre-Normalize block 157 | // - Adjusts the numbers to equal exponents and sorts them 158 | // - determine result sign 159 | // - determine actual operation to perform (add or sub) 160 | // 161 | 162 | wire fasu_op; 163 | wire nan_sign_d, result_zero_sign_d; 164 | reg sign_fasu_r; 165 | wire [7:0] exp_mul; 166 | wire sign_mul; 167 | reg sign_mul_r; 168 | wire [23:0] fracta_mul, fractb_mul; 169 | wire inf_mul; 170 | reg inf_mul_r; 171 | wire [1:0] exp_ovf; 172 | reg [1:0] exp_ovf_r; 173 | wire sign_exe; 174 | reg sign_exe_r; 175 | wire [2:0] underflow_fmul_d; 176 | 177 | 178 | pre_norm u1(.clk(clk), // System Clock 179 | .rmode(rmode_r2), // Roundin Mode 180 | .add(!fpu_op_r1[0]), // Add/Sub Input 181 | .opa(opa_r), .opb(opb_r), // Registered OP Inputs 182 | .opa_nan(opa_nan), // OpA is a NAN indicator 183 | .opb_nan(opb_nan), // OpB is a NAN indicator 184 | .fracta_out(fracta), // Equalized and sorted fraction 185 | .fractb_out(fractb), // outputs (Registered) 186 | .exp_dn_out(exp_fasu), // Selected exponent output (registered); 187 | .sign(sign_fasu), // Encoded output Sign (registered) 188 | .nan_sign(nan_sign_d), // Output Sign for NANs (registered) 189 | .result_zero_sign(result_zero_sign_d), // Output Sign for zero result (registered) 190 | .fasu_op(fasu_op) // Actual fasu operation output (registered) 191 | ); 192 | 193 | always @(posedge clk) 194 | sign_fasu_r <= sign_fasu; 195 | 196 | pre_norm_fmul u2( 197 | .clk(clk), 198 | .fpu_op(fpu_op_r1), 199 | .opa(opa_r), .opb(opb_r), 200 | .fracta(fracta_mul), 201 | .fractb(fractb_mul), 202 | .exp_out(exp_mul), // FMUL exponent output (registered) 203 | .sign(sign_mul), // FMUL sign output (registered) 204 | .sign_exe(sign_exe), // FMUL exception sign output (registered) 205 | .inf(inf_mul), // FMUL inf output (registered) 206 | .exp_ovf(exp_ovf), // FMUL exponnent overflow output (registered) 207 | .underflow(underflow_fmul_d) 208 | ); 209 | 210 | 211 | always @(posedge clk) 212 | sign_mul_r <= sign_mul; 213 | 214 | always @(posedge clk) 215 | sign_exe_r <= sign_exe; 216 | 217 | always @(posedge clk) 218 | inf_mul_r <= inf_mul; 219 | 220 | always @(posedge clk) 221 | exp_ovf_r <= exp_ovf; 222 | 223 | 224 | //////////////////////////////////////////////////////////////////////// 225 | // 226 | // Add/Sub 227 | // 228 | 229 | add_sub27 u3( 230 | .add(fasu_op), // Add/Sub 231 | .opa(fracta), // Fraction A input 232 | .opb(fractb), // Fraction B Input 233 | .sum(fract_out_d), // SUM output 234 | .co(co_d) ); // Carry Output 235 | 236 | always @(posedge clk) 237 | fract_out_q <= {co_d, fract_out_d}; 238 | 239 | //////////////////////////////////////////////////////////////////////// 240 | // 241 | // Mul 242 | // 243 | wire [47:0] prod; 244 | 245 | mul_r2 u5(.clk(clk), .opa(fracta_mul), .opb(fractb_mul), .prod(prod)); 246 | 247 | //////////////////////////////////////////////////////////////////////// 248 | // 249 | // Divide 250 | // 251 | wire [49:0] quo; 252 | wire [49:0] fdiv_opa; 253 | wire [49:0] remainder; 254 | wire remainder_00; 255 | reg [4:0] div_opa_ldz_d, div_opa_ldz_r1, div_opa_ldz_r2; 256 | 257 | always @(fracta_mul) 258 | casez(fracta_mul[22:0]) 259 | 23'b1??????????????????????: div_opa_ldz_d = 1; 260 | 23'b01?????????????????????: div_opa_ldz_d = 2; 261 | 23'b001????????????????????: div_opa_ldz_d = 3; 262 | 23'b0001???????????????????: div_opa_ldz_d = 4; 263 | 23'b00001??????????????????: div_opa_ldz_d = 5; 264 | 23'b000001?????????????????: div_opa_ldz_d = 6; 265 | 23'b0000001????????????????: div_opa_ldz_d = 7; 266 | 23'b00000001???????????????: div_opa_ldz_d = 8; 267 | 23'b000000001??????????????: div_opa_ldz_d = 9; 268 | 23'b0000000001?????????????: div_opa_ldz_d = 10; 269 | 23'b00000000001????????????: div_opa_ldz_d = 11; 270 | 23'b000000000001???????????: div_opa_ldz_d = 12; 271 | 23'b0000000000001??????????: div_opa_ldz_d = 13; 272 | 23'b00000000000001?????????: div_opa_ldz_d = 14; 273 | 23'b000000000000001????????: div_opa_ldz_d = 15; 274 | 23'b0000000000000001???????: div_opa_ldz_d = 16; 275 | 23'b00000000000000001??????: div_opa_ldz_d = 17; 276 | 23'b000000000000000001?????: div_opa_ldz_d = 18; 277 | 23'b0000000000000000001????: div_opa_ldz_d = 19; 278 | 23'b00000000000000000001???: div_opa_ldz_d = 20; 279 | 23'b000000000000000000001??: div_opa_ldz_d = 21; 280 | 23'b0000000000000000000001?: div_opa_ldz_d = 22; 281 | 23'b0000000000000000000000?: div_opa_ldz_d = 23; 282 | endcase 283 | 284 | assign fdiv_opa = !(|opa_r[30:23]) ? {(fracta_mul< expb; // expa is larger than expb 104 | 105 | // --------------------------------------------------------------------- 106 | // Normalize 107 | 108 | assign expa_dn = !(|expa); // opa denormalized 109 | assign expb_dn = !(|expb); // opb denormalized 110 | 111 | // --------------------------------------------------------------------- 112 | // Calculate the difference between the smaller and larger exponent 113 | 114 | wire [7:0] exp_diff1, exp_diff1a, exp_diff2; 115 | 116 | assign exp_small = expa_lt_expb ? expb : expa; 117 | assign exp_large = expa_lt_expb ? expa : expb; 118 | assign exp_diff1 = exp_large - exp_small; 119 | assign exp_diff1a = exp_diff1-1; 120 | assign exp_diff2 = (expa_dn | expb_dn) ? exp_diff1a : exp_diff1; 121 | assign exp_diff = (expa_dn & expb_dn) ? 8'h0 : exp_diff2; 122 | 123 | always @(posedge clk) // If numbers are equal we should return zero 124 | exp_dn_out <= (!add_d & expa==expb & fracta==fractb) ? 8'h0 : exp_large; 125 | 126 | // --------------------------------------------------------------------- 127 | // Adjust the smaller fraction 128 | 129 | 130 | assign op_dn = expa_lt_expb ? expb_dn : expa_dn; 131 | assign adj_op = expa_lt_expb ? fractb : fracta; 132 | assign adj_op_tmp = { ~op_dn, adj_op, 3'b0 }; // recover hidden bit (op_dn) 133 | 134 | // adj_op_out is 27 bits wide, so can only be shifted 27 bits to the right 135 | assign exp_lt_27 = exp_diff > 8'd27; 136 | assign exp_diff_sft = exp_lt_27 ? 5'd27 : exp_diff[4:0]; 137 | assign adj_op_out_sft = adj_op_tmp >> exp_diff_sft; 138 | assign adj_op_out = {adj_op_out_sft[26:1], adj_op_out_sft[0] | sticky }; 139 | 140 | // --------------------------------------------------------------------- 141 | // Get truncated portion (sticky bit) 142 | 143 | always @(exp_diff_sft or adj_op_tmp) 144 | case(exp_diff_sft) // synopsys full_case parallel_case 145 | 00: sticky = 1'h0; 146 | 01: sticky = adj_op_tmp[0]; 147 | 02: sticky = |adj_op_tmp[01:0]; 148 | 03: sticky = |adj_op_tmp[02:0]; 149 | 04: sticky = |adj_op_tmp[03:0]; 150 | 05: sticky = |adj_op_tmp[04:0]; 151 | 06: sticky = |adj_op_tmp[05:0]; 152 | 07: sticky = |adj_op_tmp[06:0]; 153 | 08: sticky = |adj_op_tmp[07:0]; 154 | 09: sticky = |adj_op_tmp[08:0]; 155 | 10: sticky = |adj_op_tmp[09:0]; 156 | 11: sticky = |adj_op_tmp[10:0]; 157 | 12: sticky = |adj_op_tmp[11:0]; 158 | 13: sticky = |adj_op_tmp[12:0]; 159 | 14: sticky = |adj_op_tmp[13:0]; 160 | 15: sticky = |adj_op_tmp[14:0]; 161 | 16: sticky = |adj_op_tmp[15:0]; 162 | 17: sticky = |adj_op_tmp[16:0]; 163 | 18: sticky = |adj_op_tmp[17:0]; 164 | 19: sticky = |adj_op_tmp[18:0]; 165 | 20: sticky = |adj_op_tmp[19:0]; 166 | 21: sticky = |adj_op_tmp[20:0]; 167 | 22: sticky = |adj_op_tmp[21:0]; 168 | 23: sticky = |adj_op_tmp[22:0]; 169 | 24: sticky = |adj_op_tmp[23:0]; 170 | 25: sticky = |adj_op_tmp[24:0]; 171 | 26: sticky = |adj_op_tmp[25:0]; 172 | 27: sticky = |adj_op_tmp[26:0]; 173 | endcase 174 | 175 | // --------------------------------------------------------------------- 176 | // Select operands for add/sub (recover hidden bit) 177 | 178 | assign fracta_n = expa_lt_expb ? {~expa_dn, fracta, 3'b0} : adj_op_out; 179 | assign fractb_n = expa_lt_expb ? adj_op_out : {~expb_dn, fractb, 3'b0}; 180 | 181 | // --------------------------------------------------------------------- 182 | // Sort operands (for sub only) 183 | 184 | assign fractb_lt_fracta = fractb_n > fracta_n; // fractb is larger than fracta 185 | assign fracta_s = fractb_lt_fracta ? fractb_n : fracta_n; 186 | assign fractb_s = fractb_lt_fracta ? fracta_n : fractb_n; 187 | 188 | always @(posedge clk) 189 | fracta_out <= fracta_s; 190 | 191 | always @(posedge clk) 192 | fractb_out <= fractb_s; 193 | 194 | // --------------------------------------------------------------------- 195 | // Determine sign for the output 196 | 197 | // sign: 0=Positive Number; 1=Negative Number 198 | always @(signa or signb or add or fractb_lt_fracta) 199 | case({signa, signb, add}) // synopsys full_case parallel_case 200 | 201 | // Add 202 | 3'b0_0_1: sign_d = 0; 203 | 3'b0_1_1: sign_d = fractb_lt_fracta; 204 | 3'b1_0_1: sign_d = !fractb_lt_fracta; 205 | 3'b1_1_1: sign_d = 1; 206 | 207 | // Sub 208 | 3'b0_0_0: sign_d = fractb_lt_fracta; 209 | 3'b0_1_0: sign_d = 0; 210 | 3'b1_0_0: sign_d = 1; 211 | 3'b1_1_0: sign_d = !fractb_lt_fracta; 212 | endcase 213 | 214 | always @(posedge clk) 215 | sign <= sign_d; 216 | 217 | // Fix sign for ZERO result 218 | always @(posedge clk) 219 | signa_r <= signa; 220 | 221 | always @(posedge clk) 222 | signb_r <= signb; 223 | 224 | always @(posedge clk) 225 | add_r <= add; 226 | 227 | always @(posedge clk) 228 | result_zero_sign <= ( add_r & signa_r & signb_r) | 229 | (!add_r & signa_r & !signb_r) | 230 | ( add_r & (signa_r | signb_r) & (rmode==3)) | 231 | (!add_r & (signa_r == signb_r) & (rmode==3)); 232 | 233 | // Fix sign for NAN result 234 | always @(posedge clk) 235 | fracta_lt_fractb <= fracta < fractb; 236 | 237 | always @(posedge clk) 238 | fracta_eq_fractb <= fracta == fractb; 239 | 240 | assign nan_sign1 = fracta_eq_fractb ? (signa_r & signb_r) : fracta_lt_fractb ? signb_r : signa_r; 241 | 242 | always @(posedge clk) 243 | nan_sign <= (opa_nan & opb_nan) ? nan_sign1 : opb_nan ? signb_r : signa_r; 244 | 245 | //////////////////////////////////////////////////////////////////////// 246 | // 247 | // Decode Add/Sub operation 248 | // 249 | 250 | // add: 1=Add; 0=Subtract 251 | always @(signa or signb or add) 252 | case({signa, signb, add}) // synopsys full_case parallel_case 253 | 254 | // Add 255 | 3'b0_0_1: add_d = 1; 256 | 3'b0_1_1: add_d = 0; 257 | 3'b1_0_1: add_d = 0; 258 | 3'b1_1_1: add_d = 1; 259 | 260 | // Sub 261 | 3'b0_0_0: add_d = 0; 262 | 3'b0_1_0: add_d = 1; 263 | 3'b1_0_0: add_d = 1; 264 | 3'b1_1_0: add_d = 0; 265 | endcase 266 | 267 | always @(posedge clk) 268 | fasu_op <= add_d; 269 | 270 | endmodule 271 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/opencores/pre_norm_fmul.v: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////// 2 | //// //// 3 | //// Pre Normalize //// 4 | //// Floating Point Pre Normalization Unit for FMUL //// 5 | //// //// 6 | //// Author: Rudolf Usselmann //// 7 | //// rudi@asics.ws //// 8 | //// //// 9 | ///////////////////////////////////////////////////////////////////// 10 | //// //// 11 | //// Copyright (C) 2000 Rudolf Usselmann //// 12 | //// rudi@asics.ws //// 13 | //// //// 14 | //// This source file may be used and distributed without //// 15 | //// restriction provided that this copyright statement is not //// 16 | //// removed from the file and that any derivative work contains //// 17 | //// the original copyright notice and the associated disclaimer.//// 18 | //// //// 19 | //// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// 28 | //// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// 31 | //// POSSIBILITY OF SUCH DAMAGE. //// 32 | //// //// 33 | ///////////////////////////////////////////////////////////////////// 34 | 35 | `timescale 1ns / 100ps 36 | 37 | module pre_norm_fmul(clk, fpu_op, opa, opb, fracta, fractb, exp_out, sign, 38 | sign_exe, inf, exp_ovf, underflow); 39 | input clk; 40 | input [2:0] fpu_op; 41 | input [31:0] opa, opb; 42 | output [23:0] fracta, fractb; 43 | output [7:0] exp_out; 44 | output sign, sign_exe; 45 | output inf; 46 | output [1:0] exp_ovf; 47 | output [2:0] underflow; 48 | 49 | //////////////////////////////////////////////////////////////////////// 50 | // 51 | // Local Wires and registers 52 | // 53 | 54 | reg [7:0] exp_out; 55 | wire signa, signb; 56 | reg sign, sign_d; 57 | reg sign_exe; 58 | reg inf; 59 | wire [1:0] exp_ovf_d; 60 | reg [1:0] exp_ovf; 61 | wire [7:0] expa, expb; 62 | wire [7:0] exp_tmp1, exp_tmp2; 63 | wire co1, co2; 64 | wire expa_dn, expb_dn; 65 | wire [7:0] exp_out_a; 66 | wire opa_00, opb_00, fracta_00, fractb_00; 67 | wire [7:0] exp_tmp3, exp_tmp4, exp_tmp5; 68 | wire [2:0] underflow_d; 69 | reg [2:0] underflow; 70 | wire op_div = (fpu_op == 3'b011); 71 | wire [7:0] exp_out_mul, exp_out_div; 72 | 73 | //////////////////////////////////////////////////////////////////////// 74 | // 75 | // Aliases 76 | // 77 | 78 | assign signa = opa[31]; 79 | assign signb = opb[31]; 80 | assign expa = opa[30:23]; 81 | assign expb = opb[30:23]; 82 | 83 | //////////////////////////////////////////////////////////////////////// 84 | // 85 | // Calculate Exponenet 86 | // 87 | 88 | assign expa_dn = !(|expa); 89 | assign expb_dn = !(|expb); 90 | assign opa_00 = !(|opa[30:0]); 91 | assign opb_00 = !(|opb[30:0]); 92 | assign fracta_00 = !(|opa[22:0]); 93 | assign fractb_00 = !(|opb[22:0]); 94 | 95 | assign fracta = {!expa_dn,opa[22:0]}; // Recover hidden bit 96 | assign fractb = {!expb_dn,opb[22:0]}; // Recover hidden bit 97 | 98 | assign {co1,exp_tmp1} = op_div ? (expa - expb) : (expa + expb); 99 | assign {co2,exp_tmp2} = op_div ? ({co1,exp_tmp1} + 8'h7f) : ({co1,exp_tmp1} - 8'h7f); 100 | 101 | assign exp_tmp3 = exp_tmp2 + 1; 102 | assign exp_tmp4 = 8'h7f - exp_tmp1; 103 | assign exp_tmp5 = op_div ? (exp_tmp4+1) : (exp_tmp4-1); 104 | 105 | 106 | always@(posedge clk) 107 | exp_out <= op_div ? exp_out_div : exp_out_mul; 108 | 109 | assign exp_out_div = (expa_dn | expb_dn) ? (co2 ? exp_tmp5 : exp_tmp3 ) : co2 ? exp_tmp4 : exp_tmp2; 110 | assign exp_out_mul = exp_ovf_d[1] ? exp_out_a : (expa_dn | expb_dn) ? exp_tmp3 : exp_tmp2; 111 | assign exp_out_a = (expa_dn | expb_dn) ? exp_tmp5 : exp_tmp4; 112 | assign exp_ovf_d[0] = op_div ? (expa[7] & !expb[7]) : (co2 & expa[7] & expb[7]); 113 | assign exp_ovf_d[1] = op_div ? co2 : ((!expa[7] & !expb[7] & exp_tmp2[7]) | co2); 114 | 115 | always @(posedge clk) 116 | exp_ovf <= exp_ovf_d; 117 | 118 | assign underflow_d[0] = (exp_tmp1 < 8'h7f) & !co1 & !(opa_00 | opb_00 | expa_dn | expb_dn); 119 | assign underflow_d[1] = ((expa[7] | expb[7]) & !opa_00 & !opb_00) | 120 | (expa_dn & !fracta_00) | (expb_dn & !fractb_00); 121 | assign underflow_d[2] = !opa_00 & !opb_00 & (exp_tmp1 == 8'h7f); 122 | 123 | always @(posedge clk) 124 | underflow <= underflow_d; 125 | 126 | always @(posedge clk) 127 | inf <= op_div ? (expb_dn & !expa[7]) : ({co1,exp_tmp1} > 9'h17e) ; 128 | 129 | 130 | //////////////////////////////////////////////////////////////////////// 131 | // 132 | // Determine sign for the output 133 | // 134 | 135 | // sign: 0=Posetive Number; 1=Negative Number 136 | always @(signa or signb) 137 | case({signa, signb}) // synopsys full_case parallel_case 138 | 2'b0_0: sign_d = 0; 139 | 2'b0_1: sign_d = 1; 140 | 2'b1_0: sign_d = 1; 141 | 2'b1_1: sign_d = 0; 142 | endcase 143 | 144 | always @(posedge clk) 145 | sign <= sign_d; 146 | 147 | always @(posedge clk) 148 | sign_exe <= signa & signb; 149 | 150 | endmodule -------------------------------------------------------------------------------- /fpu-wrappers/resources/opencores/primitives.v: -------------------------------------------------------------------------------- 1 | ///////////////////////////////////////////////////////////////////// 2 | //// //// 3 | //// Primitives //// 4 | //// FPU Primitives //// 5 | //// //// 6 | //// Author: Rudolf Usselmann //// 7 | //// rudi@asics.ws //// 8 | //// //// 9 | ///////////////////////////////////////////////////////////////////// 10 | //// //// 11 | //// Copyright (C) 2000 Rudolf Usselmann //// 12 | //// rudi@asics.ws //// 13 | //// //// 14 | //// This source file may be used and distributed without //// 15 | //// restriction provided that this copyright statement is not //// 16 | //// removed from the file and that any derivative work contains //// 17 | //// the original copyright notice and the associated disclaimer.//// 18 | //// //// 19 | //// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// 28 | //// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// 31 | //// POSSIBILITY OF SUCH DAMAGE. //// 32 | //// //// 33 | ///////////////////////////////////////////////////////////////////// 34 | 35 | 36 | `timescale 1ns / 100ps 37 | 38 | 39 | //////////////////////////////////////////////////////////////////////// 40 | // 41 | // Add/Sub 42 | // 43 | 44 | module add_sub27(add, opa, opb, sum, co); 45 | input add; 46 | input [26:0] opa, opb; 47 | output [26:0] sum; 48 | output co; 49 | 50 | 51 | 52 | assign {co, sum} = add ? (opa + opb) : (opa - opb); 53 | 54 | endmodule 55 | 56 | //////////////////////////////////////////////////////////////////////// 57 | // 58 | // Multiply 59 | // 60 | 61 | module mul_r2(clk, opa, opb, prod); 62 | input clk; 63 | input [23:0] opa, opb; 64 | output [47:0] prod; 65 | 66 | reg [47:0] prod1, prod; 67 | 68 | always @(posedge clk) 69 | prod1 <= opa * opb; 70 | 71 | always @(posedge clk) 72 | prod <= prod1; 73 | 74 | endmodule 75 | 76 | //////////////////////////////////////////////////////////////////////// 77 | // 78 | // Divide 79 | // 80 | 81 | module div_r2(clk, opa, opb, quo, rem); 82 | input clk; 83 | input [49:0] opa; 84 | input [23:0] opb; 85 | output [49:0] quo, rem; 86 | 87 | reg [49:0] quo, rem, quo1, remainder; 88 | 89 | always @(posedge clk) 90 | quo1 <= opa / opb; 91 | 92 | always @(posedge clk) 93 | quo <= quo1; 94 | 95 | always @(posedge clk) 96 | remainder <= opa % opb; 97 | 98 | always @(posedge clk) 99 | rem <= remainder; 100 | 101 | endmodule 102 | 103 | 104 | -------------------------------------------------------------------------------- /fpu-wrappers/resources/syn.tcl: -------------------------------------------------------------------------------- 1 | # usage: dc_shell -f syn.tcl 2 | 3 | # load library if config exists 4 | set rc [file exist ~/library.tcl] 5 | if {$rc == 1} { 6 | source ~/library.tcl 7 | } 8 | 9 | # args 10 | set input_verilog [list INPUT_VERILOG] 11 | set input_systemverilog [list INPUT_SYSTEMVERILOG] 12 | set input_vhdl [list INPUT_VHDL] 13 | set toplevel_name TOPLEVEL_NAME 14 | 15 | # load design 16 | read_file -format verilog $input_verilog 17 | read_file -format sverilog $input_systemverilog 18 | read_file -format vhdl $input_vhdl 19 | # check module exists 20 | set rc [llength [get_designs -exact $toplevel_name]] 21 | if {$rc == 0} { 22 | quit 23 | } 24 | current_design $toplevel_name 25 | 26 | # setup 27 | set_host_options -max_cores 16 28 | 29 | # timing 30 | # 1GHz clock 31 | create_clock clock -period 1.0000 32 | create_clock clk -period 1.0000 33 | # dff clock to output: 0.14ns 34 | # assume all input comes from output of dff 35 | set_input_delay 0.14 -clock clock [all_inputs] 36 | set_input_delay 0.14 -clock clk [all_inputs] 37 | # dff setup time: 0.02ns 38 | # assume all output goes to dff 39 | set_output_delay 0.02 -clock clock [all_outputs] 40 | set_output_delay 0.02 -clock clk [all_outputs] 41 | 42 | # synthesis flow 43 | link 44 | uniquify 45 | ungroup -flatten -all 46 | set_optimize_registers 47 | compile_ultra 48 | 49 | # export 50 | write -format ddc -hierarchy -output [format "%s%s" $toplevel_name ".ddc"] 51 | write_sdf -version 1.0 [format "%s%s" $toplevel_name ".sdf"] 52 | write -format verilog -hierarchy -output [format "%s%s" $toplevel_name ".syn.v"] 53 | write_sdc [format "%s%s" $toplevel_name ".sdc"] 54 | 55 | # reports 56 | check_timing > ${toplevel_name}_check_timing.txt 57 | check_design > ${toplevel_name}_check_design.txt 58 | report_design > ${toplevel_name}_report_design.txt 59 | report_area -hierarchy > ${toplevel_name}_report_area.txt 60 | report_power -hierarchy > ${toplevel_name}_report_power.txt 61 | report_cell > ${toplevel_name}_report_cell.txt 62 | report_timing -delay_type max -max_paths 5 > ${toplevel_name}_report_timing_setup.txt 63 | report_timing -delay_type min -max_paths 5 > ${toplevel_name}_report_timing_hold.txt 64 | report_constraint -all_violators > ${toplevel_name}_report_constraint.txt 65 | report_qor > ${toplevel_name}_report_qor.txt 66 | 67 | # quit 68 | quit 69 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/Mul.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import spinal.core._ 4 | import spinal.lib._ 5 | 6 | /** An integer multiplier 7 | * 8 | * @param bitWidth 9 | * the bit width of integer 10 | * @param stages 11 | * pipeline stages 12 | */ 13 | class Mul(bitWidth: Int, stages: Int) extends Component { 14 | val a = in(UInt(bitWidth bits)) 15 | val b = in(UInt(bitWidth bits)) 16 | val c = out(UInt(2 * bitWidth bits)) 17 | 18 | c := Delay(a * b, stages) 19 | } 20 | 21 | /** Generate Mul module 22 | */ 23 | object Mul extends SpinalEmitVerilog { 24 | for (width <- Seq(8, 16, 32)) { 25 | for (stages <- Seq(0, 1, 2)) { 26 | work(new Mul(width, stages), s"Mul_${width}w${stages}s") 27 | } 28 | } 29 | } 30 | 31 | /** Synthesize Mul 32 | */ 33 | object MulSynth extends SpinalEmitVerilog { 34 | for (width <- Seq(11, 24, 53)) { 35 | for (stages <- Seq(0, 1, 2)) { 36 | work(new Mul(width, stages), s"Mul_${width}w${stages}s") 37 | val name = s"Mul_${width}w${stages}s" 38 | Synthesis.build( 39 | Seq( 40 | s"${name}.v" 41 | ), 42 | s"Mul", 43 | name 44 | ) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/bench.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import spinal.core._ 4 | import spinal.lib.eda.bench._ 5 | import spinal.lib.eda.xilinx.VivadoFlow 6 | 7 | import scala.collection.mutable.ArrayBuffer 8 | 9 | /** Benchmark with Vivado 10 | */ 11 | trait VivadoBench extends App { 12 | def bench(name: String, paths: Seq[String], topModuleName: String) = { 13 | val targets = ArrayBuffer[Target]() 14 | val vivadoPath = "/opt/Xilinx/Vivado/2020.2/bin" 15 | 16 | for ( 17 | (family, device) <- Seq( 18 | ("Kintex 7", "xc7k325tffg900-3"), 19 | ("Virtex UltraScale+", "xcvu37p-fsvh2892-3-e") 20 | ) 21 | ) { 22 | for ( 23 | (freq, name) <- Seq( 24 | (50 MHz, "area"), 25 | (400 MHz, "fmax") 26 | ) 27 | ) { 28 | targets += new Target { 29 | override def getFamilyName(): String = family 30 | override def synthesise(rtl: Rtl, workspace: String): Report = { 31 | VivadoFlow( 32 | frequencyTarget = freq, 33 | vivadoPath = vivadoPath, 34 | workspacePath = s"${workspace}_${name}", 35 | rtl = rtl, 36 | family = getFamilyName(), 37 | device = device 38 | ) 39 | } 40 | } 41 | } 42 | } 43 | 44 | Bench( 45 | Seq(new Rtl { 46 | override def getName(): String = name 47 | override def getRtlPaths(): Seq[String] = paths 48 | override def getTopModuleName(): String = topModuleName 49 | }), 50 | targets, 51 | "/tmp/" 52 | ) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/common.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import java.nio.file.Files 4 | import java.nio.file.Paths 5 | import java.nio.file.StandardCopyOption 6 | 7 | /** Helper class to get resource 8 | */ 9 | object Resource { 10 | def path(name: String) = { 11 | val tmp = Paths.get( 12 | System.getProperty("java.io.tmpdir"), 13 | System.getProperty("user.name"), 14 | "resource" 15 | ); 16 | tmp.toFile().mkdirs() 17 | val path = tmp.resolve(Paths.get(name).getFileName()) 18 | 19 | val is = getClass().getResourceAsStream(name) 20 | Files.copy(is, path, StandardCopyOption.REPLACE_EXISTING) 21 | path.toFile().getAbsolutePath() 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/emit.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import chisel3._ 4 | import chisel3.stage.ChiselGeneratorAnnotation 5 | import chisel3.experimental.ChiselAnnotation 6 | import circt.stage.FirtoolOption 7 | import circt.stage.ChiselStage 8 | import _root_.sifive.enterprise.firrtl.NestedPrefixModulesAnnotation 9 | import chisel3.experimental.annotate 10 | 11 | /** Helper to add prefix 12 | */ 13 | object AddPrefix { 14 | def apply(module: Module, prefix: String, inclusive: Boolean = true) = { 15 | if (prefix != null && prefix != "") { 16 | annotate(new ChiselAnnotation { 17 | def toFirrtl = 18 | new NestedPrefixModulesAnnotation(module.toTarget, prefix, true) 19 | }) 20 | } 21 | } 22 | } 23 | 24 | /** Emit Verilog from Chisel module 25 | */ 26 | trait ChiselEmitVerilog extends App { 27 | def emit(genModule: () => RawModule, name: String) = { 28 | ChiselStage.emitSystemVerilogFile( 29 | genModule(), 30 | Array(), 31 | Array("-o", s"${name}.sv") 32 | ) 33 | } 34 | } 35 | 36 | /** Helper to generate Chisel modules 37 | */ 38 | trait EmitChiselModule extends ChiselEmitVerilog { 39 | def emitChisel( 40 | genModule: (FloatType, Int, Int, String) => RawModule, 41 | name: String, 42 | library: String, 43 | allStages: Seq[Int] = Seq(1, 2, 3), 44 | floatTypes: Seq[FloatType] = Seq(FloatH, FloatS, FloatD), 45 | lanes: Seq[Int] = Seq(1, 2, 4) 46 | ) = { 47 | for (floatType <- floatTypes) { 48 | val floatName = floatType.kind().toString() 49 | for (lanes <- lanes) { 50 | for (stages <- allStages) { 51 | val moduleName = s"${name}_${floatName}${lanes}l${stages}s_${library}" 52 | val prefix = s"${moduleName}_" 53 | emit( 54 | () => genModule(floatType, lanes, stages, prefix), 55 | moduleName 56 | ) 57 | } 58 | } 59 | } 60 | } 61 | } 62 | 63 | /** Generate Verilog from SpinalHDL module 64 | */ 65 | trait SpinalEmitVerilog extends App { 66 | def work[T <: spinal.core.Component]( 67 | gen: => T, 68 | netlistName: String = null 69 | ): Unit = { 70 | // verilog 71 | val verilog = spinal.core.SpinalConfig( 72 | netlistFileName = netlistName match { 73 | case null => null 74 | case s => s"$s.v" 75 | } 76 | ) 77 | verilog.generateVerilog(gen) 78 | } 79 | } 80 | 81 | /** Helper to generate SpinalHDL modules 82 | */ 83 | trait EmitSpinalModule extends SpinalEmitVerilog { 84 | def emitFlopoco[T <: spinal.core.Component]( 85 | stages: Int, 86 | genModule: (FloatType, Int, Int) => T, 87 | name: String 88 | ) = { 89 | for (kind <- Seq(FloatH, FloatS, FloatD)) { 90 | val floatName = kind.kind().toString() 91 | for (lanes <- Seq(1, 2, 4, 8)) { 92 | work( 93 | genModule(kind, lanes, stages), 94 | s"${name}_${floatName}${lanes}l${stages}s" 95 | ) 96 | } 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/float.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import _root_.hardfloat.fNFromRecFN 4 | import _root_.hardfloat.recFNFromFN 5 | import chisel3._ 6 | 7 | /** Trait for floating point type 8 | */ 9 | trait FloatType { 10 | // must implement 11 | // exp bits 12 | def exp(): Int 13 | // (total - exp) bits 14 | def sig(): Int 15 | def kind(): FpKind.FpKind 16 | 17 | // auto implemented 18 | // total bits 19 | def width(): Int = exp() + sig() 20 | // HF width in bits 21 | def widthHardfloat(): Int = width() + 1 22 | // FloPoCo width in bits 23 | def widthFlopoco(): Int = width() + 2 24 | 25 | // conversion to hardfloat internal representation 26 | def toHardfloat(n: UInt) = recFNFromFN(exp(), sig(), n) 27 | def fromHardfloat(n: UInt) = fNFromRecFN(exp(), sig(), n) 28 | // extract one element from packed 29 | def extract(data: UInt, offset: Int) = 30 | data((offset + 1) * width() - 1, offset * width()) 31 | def extractHardfloat(data: UInt, offset: Int) = 32 | data((offset + 1) * widthHardfloat() - 1, offset * widthHardfloat()) 33 | // generate the representation of 1.0 34 | def oneBigInt() = (((BigInt(1) << (exp() - 1)) - 1) << (sig() - 1)) 35 | // chisel 36 | def oneChisel() = 37 | (((BigInt(1) << (exp() - 1)) - 1) << (sig() - 1)).U(width().W) 38 | def oneHardfloatChisel() = 39 | (BigInt(1) << (exp() + sig() - 1)).U(widthHardfloat().W) 40 | } 41 | 42 | /** Enum of floating point types 43 | */ 44 | object FpKind extends Enumeration { 45 | type FpKind = Value 46 | // Double, Single, Half precision 47 | val D, S, H = Value 48 | } 49 | 50 | /** 64-bit Double 51 | */ 52 | object FloatD extends FloatType { 53 | def exp() = 11 54 | def sig() = 53 55 | def kind() = FpKind.D 56 | } 57 | 58 | /** 32-bit Float 59 | */ 60 | object FloatS extends FloatType { 61 | def exp() = 8 62 | def sig() = 24 63 | def kind() = FpKind.S 64 | } 65 | 66 | /** 16-bit Half Float 67 | */ 68 | object FloatH extends FloatType { 69 | def exp() = 5 70 | def sig() = 11 71 | def kind() = FpKind.H 72 | } 73 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/flopoco/FPCFExp.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import fpuwrapper.EmitSpinalModule 4 | import fpuwrapper.FloatType 5 | import fpuwrapper.Resource 6 | import spinal.core._ 7 | import spinal.lib._ 8 | 9 | class FPCFExpRequest(val floatType: FloatType, val lanes: Int) extends Bundle { 10 | val a = Vec(UInt(floatType.widthFlopoco() bits), lanes) 11 | } 12 | 13 | class FPCFExpResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 14 | // result 15 | val res = Vec(UInt(floatType.widthFlopoco() bits), lanes) 16 | } 17 | 18 | class FPCFExp(floatType: FloatType, lanes: Int, stages: Int) extends Component { 19 | val io = new Bundle { 20 | val req = slave(Flow(new FPCFExpRequest(floatType, lanes))) 21 | val resp = master(Flow(new FPCFExpResponse(floatType, lanes))) 22 | } 23 | 24 | for (i <- 0 until lanes) { 25 | val fma = new FPCFExpBlackBox(floatType, stages) 26 | fma.X := io.req.a(i).asBits 27 | io.resp.res(i) := fma.R.asUInt 28 | } 29 | 30 | io.resp.valid := Delay(io.req.valid, stages) 31 | } 32 | 33 | class FPCFExpBlackBox(floatType: FloatType, stages: Int) extends BlackBox { 34 | val clk = in(Bool()) 35 | val X = in(Bits(floatType.widthFlopoco() bits)) 36 | val R = out(Bits(floatType.widthFlopoco() bits)) 37 | 38 | setDefinitionName(s"FPCFExp_${floatType.kind().toString()}") 39 | 40 | // Map the clk 41 | mapCurrentClockDomain( 42 | clock = clk 43 | ) 44 | 45 | val fileName = s"FPCFExp_${floatType.kind().toString()}${stages}s.v" 46 | assert( 47 | getClass().getResource(s"/flopoco/${fileName}") != null, 48 | s"file ${fileName} not found" 49 | ) 50 | addRTLPath(Resource.path(s"/flopoco/${fileName}")) 51 | } 52 | 53 | object FPCFExp extends EmitSpinalModule { 54 | emitFlopoco( 55 | 3, 56 | (floatType, lanes, stages) => new FPCFExp(floatType, lanes, stages), 57 | "FlopocoFPCFExp" 58 | ) 59 | } 60 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/flopoco/FPCToIEEE.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import fpuwrapper._ 4 | import spinal.core._ 5 | import spinal.lib._ 6 | 7 | class FPCToIEEEInner(floatType: FloatType) extends Component { 8 | val io = new Bundle { 9 | val req = in(UInt(floatType.widthFlopoco() bits)) 10 | val resp = out(UInt(floatType.width() bits)) 11 | } 12 | 13 | val fpc = io.req 14 | val fracX = fpc(floatType.sig() - 2 downto 0) 15 | val expX = fpc(floatType.width() - 2 downto floatType.sig() - 1) 16 | val sX = Bool() 17 | val exnX = fpc(floatType.width() + 1 downto floatType.width()) 18 | when(exnX === 1 || exnX === 2 || exnX === 0) { 19 | sX := fpc(floatType.width() - 1) 20 | } otherwise { 21 | sX := False 22 | } 23 | 24 | val expZero = expX === 0 25 | 26 | val ieee = UInt(floatType.width() bits) 27 | io.resp := ieee 28 | 29 | val fracR = UInt(floatType.sig() - 1 bits) 30 | val expR = UInt(floatType.exp() bits) 31 | val sR = Bool() 32 | ieee := Cat(sR, expR, fracR).asUInt 33 | 34 | switch(exnX) { 35 | is(0) { 36 | // zero 37 | fracR := 0 38 | expR := 0 39 | sR := sX 40 | } 41 | is(1) { 42 | // normal 43 | when(expZero) { 44 | fracR := Cat(True, fracX(floatType.sig() - 2 downto 1)).asUInt 45 | }.otherwise { 46 | fracR := fracX 47 | } 48 | expR := expX 49 | sR := sX 50 | } 51 | is(2) { 52 | // inf 53 | fracR := exnX(0).asUInt.resized 54 | expR.setAllTo(True) 55 | sR := sX 56 | } 57 | default { 58 | // nan 59 | fracR := exnX(0).asUInt.resized 60 | expR.setAllTo(True) 61 | sR := False 62 | } 63 | } 64 | } 65 | 66 | /** Implementation of OutputIEEE operator of Flopoco 67 | * 68 | * @param floatType 69 | * @param lanes 70 | */ 71 | class FPCToIEEE(floatType: FloatType, lanes: Int, stages: Int) 72 | extends Component { 73 | val io = new Bundle { 74 | val req = slave(Flow(Vec(UInt(floatType.widthFlopoco() bits), lanes))) 75 | val resp = master(Flow(Vec(UInt(floatType.width() bits), lanes))) 76 | } 77 | 78 | io.resp.valid := Delay(io.req.valid, stages) 79 | 80 | for (i <- 0 until lanes) { 81 | val inner = new FPCToIEEEInner(floatType) 82 | inner.io.req := io.req.payload(i) 83 | io.resp.payload(i) := Delay(inner.io.resp, stages) 84 | } 85 | } 86 | 87 | object FPCToIEEE extends EmitSpinalModule { 88 | emitFlopoco( 89 | 0, 90 | (floatType, lanes, stages) => new FPCToIEEE(floatType, lanes, stages), 91 | "FlopocoFPCToIEEE" 92 | ) 93 | } 94 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/flopoco/IEEEFExp.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import fpuwrapper.EmitSpinalModule 4 | import fpuwrapper.FloatS 5 | import fpuwrapper.FloatType 6 | import fpuwrapper.SpinalEmitVerilog 7 | import fpuwrapper.Synthesis 8 | import spinal.core._ 9 | import spinal.lib._ 10 | 11 | class IEEEFExpRequest(val floatType: FloatType, val lanes: Int) extends Bundle { 12 | val a = Vec(UInt(floatType.width() bits), lanes) 13 | } 14 | 15 | class IEEEFExpResponse(val floatType: FloatType, val lanes: Int) 16 | extends Bundle { 17 | // result 18 | val res = Vec(UInt(floatType.width() bits), lanes) 19 | } 20 | 21 | class IEEEFExp(floatType: FloatType, lanes: Int, stages: Int) 22 | extends Component { 23 | val io = new Bundle { 24 | val req = slave(Flow(new IEEEFExpRequest(floatType, lanes))) 25 | val resp = master(Flow(new IEEEFExpResponse(floatType, lanes))) 26 | } 27 | 28 | for (i <- 0 until lanes) { 29 | val ieee2fpc = new IEEEToFPCInner(floatType) 30 | ieee2fpc.io.req := io.req.a(i) 31 | 32 | val fma = new FPCFExpBlackBox(floatType, stages) 33 | fma.X := ieee2fpc.io.resp.asBits 34 | 35 | val fpc2ieee = new FPCToIEEEInner(floatType) 36 | fpc2ieee.io.req := fma.R.asUInt 37 | io.resp.res(i) := fpc2ieee.io.resp 38 | } 39 | 40 | io.resp.valid := Delay(io.req.valid, stages) 41 | } 42 | 43 | object IEEEFExp extends EmitSpinalModule { 44 | emitFlopoco( 45 | 3, 46 | (floatType, lanes, stages) => new IEEEFExp(floatType, lanes, stages), 47 | "FlopocoIEEEFExp" 48 | ) 49 | } 50 | 51 | object IEEEFExpSynth extends SpinalEmitVerilog { 52 | for (floatType <- Seq(FloatS)) { 53 | val floatName = floatType.kind().toString() 54 | for (stages <- Seq(3)) { 55 | val lanes = 1 56 | val name = s"IEEEFExp_${floatName}${lanes}l${stages}s" 57 | work( 58 | new IEEEFExp(floatType, lanes, stages), 59 | name 60 | ) 61 | 62 | val fileName = s"FPCFExp_${floatName}${stages}s.v" 63 | Synthesis.build( 64 | Seq( 65 | s"${name}.v", 66 | s"./fpu-wrappers/resources/flopoco/${fileName}" 67 | ), 68 | s"IEEEFExp", 69 | s"${name}_flopoco" 70 | ) 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/flopoco/IEEEFMA.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import fpuwrapper._ 4 | import spinal.core._ 5 | import spinal.lib._ 6 | 7 | object IEEEFMAOp extends SpinalEnum { 8 | // 1 * op[1] + op[2] 9 | val FADD = newElement() 10 | // 1 * op[1] - op[2] 11 | val FSUB = newElement() 12 | // op[0] * op[1] + 0 13 | val FMUL = newElement() 14 | // op[0] * op[1] + op[2] 15 | val FMADD = newElement() 16 | // op[0] * op[1] - op[2] 17 | val FMSUB = newElement() 18 | // -(op[0] * op[1] - op[2]) 19 | val FNMSUB = newElement() 20 | // -(op[0] * op[1] + op[2]) 21 | val FNMADD = newElement() 22 | 23 | val NOP = FADD 24 | } 25 | 26 | class IEEEFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle { 27 | val op = IEEEFMAOp() 28 | val operands = Vec(Vec(UInt(floatType.width() bits), lanes), 3) 29 | } 30 | 31 | class IEEEFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 32 | // result 33 | val res = Vec(UInt(floatType.width() bits), lanes) 34 | } 35 | 36 | class IEEEFMA(floatType: FloatType, lanes: Int, stages: Int) extends Component { 37 | val io = new Bundle { 38 | val req = slave(Flow(new IEEEFMARequest(floatType, lanes))) 39 | val resp = master(Flow(new IEEEFMAResponse(floatType, lanes))) 40 | } 41 | 42 | val negateAB = False 43 | val negateC = False 44 | val op1 = Vec(UInt(floatType.width() bits), lanes) 45 | val op2 = Vec(UInt(floatType.width() bits), lanes) 46 | val op3 = Vec(UInt(floatType.width() bits), lanes) 47 | op1 := io.req.operands(0) 48 | op2 := io.req.operands(1) 49 | op3 := io.req.operands(2) 50 | 51 | val one = Vec(UInt(floatType.width() bits), lanes) 52 | val zero = Vec(UInt(floatType.width() bits), lanes) 53 | for (i <- 0 until lanes) { 54 | one(i) := floatType.oneBigInt() 55 | zero(i) := 0 56 | } 57 | 58 | switch(io.req.op) { 59 | is(IEEEFMAOp.FADD) { 60 | op1 := one 61 | } 62 | is(IEEEFMAOp.FSUB) { 63 | op1 := one 64 | negateC := True 65 | } 66 | is(IEEEFMAOp.FMUL) { 67 | op3 := zero 68 | } 69 | is(IEEEFMAOp.FMADD) { 70 | // do nothing 71 | } 72 | is(IEEEFMAOp.FMSUB) { 73 | negateC := True 74 | } 75 | is(IEEEFMAOp.FNMSUB) { 76 | negateAB := True 77 | } 78 | is(IEEEFMAOp.FNMADD) { 79 | negateAB := True 80 | negateC := True 81 | } 82 | } 83 | 84 | for (i <- 0 until lanes) { 85 | val fma = new IEEEFMABlackBox(floatType, stages) 86 | fma.A := op1(i).asBits 87 | fma.B := op2(i).asBits 88 | fma.C := op3(i).asBits 89 | fma.negateAB := negateAB 90 | fma.negateC := negateC 91 | fma.RndMode := 0 92 | io.resp.res(i) := fma.R.asUInt 93 | } 94 | 95 | io.resp.valid := Delay(io.req.valid, stages) 96 | } 97 | 98 | class IEEEFMABlackBox(floatType: FloatType, stages: Int) extends BlackBox { 99 | val clk = in(Bool()) 100 | val A = in(Bits(floatType.width() bits)) 101 | val B = in(Bits(floatType.width() bits)) 102 | val C = in(Bits(floatType.width() bits)) 103 | val negateAB = in(Bool()) 104 | val negateC = in(Bool()) 105 | val RndMode = in(Bits(2 bits)) 106 | val R = out(Bits(floatType.width() bits)) 107 | 108 | setDefinitionName(s"IEEEFMA_${floatType.kind().toString()}") 109 | 110 | // Map the clk 111 | mapCurrentClockDomain( 112 | clock = clk 113 | ) 114 | 115 | val fileName = s"IEEEFMA_${floatType.kind().toString()}${stages}s.v" 116 | assert( 117 | getClass().getResource(s"/flopoco/${fileName}") != null, 118 | s"file ${fileName} not found" 119 | ) 120 | addRTLPath(Resource.path(s"/flopoco/${fileName}")) 121 | } 122 | 123 | object IEEEFMA extends EmitSpinalModule { 124 | emitFlopoco( 125 | 3, 126 | (floatType, lanes, stages) => new IEEEFMA(floatType, lanes, stages), 127 | "FlopocoIEEEFMA" 128 | ) 129 | } 130 | 131 | object IEEEFMASynth extends SpinalEmitVerilog { 132 | for (floatType <- Seq(FloatS)) { 133 | val floatName = floatType.kind().toString() 134 | for (stages <- Seq(4)) { 135 | val lanes = 1 136 | val name = s"IEEEFMA_${floatName}${lanes}l${stages}s" 137 | work( 138 | new IEEEFMA(floatType, lanes, stages), 139 | name 140 | ) 141 | 142 | val fileName = s"IEEEFMA_${floatName}${stages}s.v" 143 | Synthesis.build( 144 | Seq( 145 | s"${name}.v", 146 | s"./fpu-wrappers/resources/flopoco/${fileName}" 147 | ), 148 | s"IEEEFMA", 149 | s"${name}_flopoco" 150 | ) 151 | } 152 | } 153 | } 154 | 155 | object IEEEFMABench extends SpinalEmitVerilog with VivadoBench { 156 | for (floatType <- Seq(FloatS)) { 157 | val floatName = floatType.kind().toString() 158 | for (stages <- Seq(3)) { 159 | val lanes = 1 160 | val name = s"IEEEFMA_${floatName}${lanes}l${stages}s" 161 | work( 162 | new IEEEFMA(floatType, lanes, stages), 163 | name 164 | ) 165 | 166 | val fileName = s"IEEEFMA_${floatName}${stages}s.v" 167 | bench( 168 | s"${name}_flopoco", 169 | Seq(s"${name}.v", s"./fpu-wrappers/resources/flopoco/${fileName}"), 170 | s"IEEEFMA" 171 | ) 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/flopoco/IEEEToFPC.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import fpuwrapper._ 4 | import spinal.core._ 5 | import spinal.lib._ 6 | 7 | class IEEEToFPCInner(floatType: FloatType) extends Component { 8 | val io = new Bundle { 9 | val req = in(UInt(floatType.width() bits)) 10 | val resp = out(UInt(floatType.widthFlopoco() bits)) 11 | } 12 | 13 | val ieee = io.req 14 | val fracX = ieee(floatType.sig() - 2 downto 0) 15 | val expX = ieee(floatType.width() - 2 downto floatType.sig() - 1) 16 | val sX = ieee(floatType.width() - 1) 17 | 18 | val expZero = expX === 0 19 | val expInfty = expX.andR 20 | val fracZero = fracX === 0 21 | val reprSubNormal = fracX(floatType.sig() - 2) 22 | 23 | // representable subnormal numbers 24 | val sfracX = UInt(floatType.sig() - 1 bits) 25 | when(expZero && reprSubNormal) { 26 | sfracX := fracX(floatType.sig() - 3 downto 0) << 1 27 | } otherwise { 28 | sfracX := fracX 29 | } 30 | 31 | val fpc = UInt(floatType.widthFlopoco() bits) 32 | io.resp := fpc 33 | 34 | val fracR = UInt(floatType.sig() - 1 bits) 35 | val expR = UInt(floatType.exp() bits) 36 | val sR = Bool() 37 | val exnR = UInt(2 bits) 38 | fpc := Cat(exnR, sR, expR, fracR).asUInt 39 | 40 | sR := sX 41 | expR := expX 42 | fracR := sfracX 43 | 44 | when(expInfty) { 45 | when(fracZero) { 46 | // inf 47 | exnR := 2 48 | } otherwise { 49 | // nan 50 | exnR := 3 51 | } 52 | } elsewhen (expZero && !reprSubNormal) { 53 | // zero 54 | exnR := 0 55 | } otherwise { 56 | // normal and representable subnormal 57 | exnR := 1 58 | } 59 | } 60 | 61 | /** Conversion from IEEE for flopoco format 62 | * 63 | * @param floatType 64 | * @param lanes 65 | */ 66 | class IEEEToFPC(floatType: FloatType, lanes: Int, stages: Int) 67 | extends Component { 68 | val io = new Bundle { 69 | val req = slave(Flow(Vec(UInt(floatType.width() bits), lanes))) 70 | val resp = master(Flow(Vec(UInt(floatType.widthFlopoco() bits), lanes))) 71 | } 72 | 73 | io.resp.valid := Delay(io.req.valid, stages) 74 | 75 | for (i <- 0 until lanes) { 76 | val inner = new IEEEToFPCInner(floatType) 77 | inner.io.req := io.req.payload(i) 78 | io.resp.payload(i) := Delay(inner.io.resp, stages) 79 | } 80 | } 81 | 82 | object IEEEToFPC extends EmitSpinalModule { 83 | emitFlopoco( 84 | 0, 85 | (floatType, lanes, stages) => new IEEEToFPC(floatType, lanes, stages), 86 | "FlopocoIEEEToFPC" 87 | ) 88 | } 89 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/formal/HFRoundtrip.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.formal 2 | 3 | import circt.stage.ChiselStage 4 | import chisel3._ 5 | import fpuwrapper.FloatS 6 | import fpuwrapper.FloatType 7 | import fpuwrapper.hardfloat.HFToIEEE 8 | import fpuwrapper.hardfloat.IEEEToHF 9 | 10 | import scala.sys.process._ 11 | 12 | class HFRoundtrip(floatType: FloatType) extends Module { 13 | val io = IO(new Bundle { 14 | val req = Input(UInt(floatType.width().W)) 15 | }) 16 | 17 | val ieee2hf = Module( 18 | new IEEEToHF(floatType, 1, 0) 19 | ) 20 | ieee2hf.io.float.valid := true.B 21 | ieee2hf.io.float.bits(0) := io.req 22 | 23 | val hf2ieee = Module( 24 | new HFToIEEE(floatType, 1, 0) 25 | ) 26 | hf2ieee.io.hardfloat.valid := true.B 27 | hf2ieee.io.hardfloat.bits(0) := ieee2hf.io.hardfloat.bits(0) 28 | 29 | chisel3.assert(hf2ieee.io.float.bits(0) === io.req) 30 | } 31 | 32 | object HFRoundtrip extends App { 33 | ChiselStage.emitSystemVerilog( 34 | new HFRoundtrip(FloatS), 35 | Array("-o", "HFRoundtrip") 36 | ) 37 | Seq( 38 | "yosys", 39 | "-v2", 40 | "-p", 41 | "read_verilog -formal HFRoundtrip.sv", 42 | "-p", 43 | "prep", 44 | "-p", 45 | "write_smt2 -wires HFRoundtrip.smt2" 46 | ).! 47 | Seq( 48 | "yosys-smtbmc", 49 | "-s", 50 | "z3", 51 | "-t", 52 | "30", 53 | "--dump-vcd", 54 | "test.vcd", 55 | "-m", 56 | "HFRoundtrip", 57 | "HFRoundtrip.smt2" 58 | ).! 59 | } 60 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/formal/IEEEFMAFormal.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.formal 2 | 3 | import circt.stage.ChiselStage 4 | import chisel3._ 5 | import chisel3.util._ 6 | import fpuwrapper.ChiselEmitVerilog 7 | import fpuwrapper.FloatH 8 | import fpuwrapper.FloatType 9 | 10 | class FMARequest(val floatType: FloatType, val lanes: Int) extends Bundle { 11 | val a = Vec(lanes, UInt(floatType.width().W)) 12 | val b = Vec(lanes, UInt(floatType.width().W)) 13 | val c = Vec(lanes, UInt(floatType.width().W)) 14 | } 15 | 16 | class FMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 17 | // result 18 | val res = Vec(lanes, UInt(floatType.width().W)) 19 | // exception status 20 | val exc = Vec(lanes, Bits(5.W)) 21 | } 22 | 23 | class IEEEFMAFormal(floatType: FloatType, lanes: Int, stages: Int) 24 | extends Module { 25 | val io = IO(new Bundle { 26 | val req = Flipped(Valid(new FMARequest(floatType, lanes))) 27 | }) 28 | 29 | val zeros = WireInit(VecInit.fill(lanes)(0.U(floatType.width().W))) 30 | 31 | val hardfloat = Module( 32 | new fpuwrapper.hardfloat.IEEEFMA(floatType, lanes, stages) 33 | ) 34 | hardfloat.io.req.valid := io.req.valid 35 | hardfloat.io.req.bits.op := fpuwrapper.hardfloat.FMAOp.FMADD 36 | hardfloat.io.req.bits.operands(0) := zeros 37 | hardfloat.io.req.bits.operands(1) := io.req.bits.b 38 | hardfloat.io.req.bits.operands(2) := io.req.bits.c 39 | 40 | val fudian = Module( 41 | new fpuwrapper.fudian.IEEEFMA(floatType, lanes, stages) 42 | ) 43 | fudian.io.req.valid := io.req.valid 44 | fudian.io.req.bits.operands(0) := zeros 45 | fudian.io.req.bits.operands(1) := io.req.bits.b 46 | fudian.io.req.bits.operands(2) := io.req.bits.c 47 | 48 | chisel3.assert( 49 | hardfloat.io.resp.valid === fudian.io.resp.valid 50 | ) 51 | when(hardfloat.io.resp.valid) { 52 | for (i <- 0 until lanes) { 53 | chisel3.assert( 54 | hardfloat.io.resp.bits.res(i) === fudian.io.resp.bits.res(i) 55 | ) 56 | } 57 | } 58 | } 59 | 60 | object IEEEFMAFormal extends App { 61 | ChiselStage.emitSystemVerilogFile( 62 | new IEEEFMAFormal(FloatH, 1, 1) 63 | ) 64 | } 65 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/fpnew/FPNewBlackbox.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fpnew 2 | 3 | import chisel3._ 4 | import chisel3.util.HasBlackBoxResource 5 | import fpuwrapper.FloatType 6 | 7 | class FPNewBlackbox( 8 | floatType: FloatType, 9 | lanes: Int, 10 | stages: Int, 11 | tagWidth: Int 12 | ) extends BlackBox( 13 | Map() 14 | ) 15 | with HasBlackBoxResource { 16 | val fLen = floatType.width() * lanes 17 | val io = IO(new Bundle { 18 | val clk_i = Input(Clock()) 19 | val rst_ni = Input(Bool()) 20 | val operands_i = Input(UInt((fLen * 3).W)) 21 | val rnd_mode_i = Input(UInt(3.W)) 22 | val op_i = Input(UInt(4.W)) 23 | val op_mod_i = Input(Bool()) 24 | val src_fmt_i = Input(UInt(3.W)) 25 | val dst_fmt_i = Input(UInt(3.W)) 26 | val int_fmt_i = Input(UInt(2.W)) 27 | val vectorial_op_i = Input(Bool()) 28 | val tag_i = Input(UInt(tagWidth.W)) 29 | val in_valid_i = Input(Bool()) 30 | val in_ready_o = Output(Bool()) 31 | val flush_i = Input(Bool()) 32 | val result_o = Output(UInt(fLen.W)) 33 | val status_o = Output(UInt(5.W)) 34 | val tag_o = Output(UInt(tagWidth.W)) 35 | val out_valid_o = Output(Bool()) 36 | val out_ready_i = Input(Bool()) 37 | val busy_o = Output(Bool()) 38 | }).suggestName("io") 39 | 40 | addResource( 41 | s"/fpnew/FPNewBlackbox_${floatType.kind().toString()}${lanes}l${stages}s.synth.v" 42 | ) 43 | } 44 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/fpnew/IEEEFPU.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fpnew 2 | 3 | import chisel3._ 4 | import chisel3.util.Decoupled 5 | import fpuwrapper.EmitChiselModule 6 | import fpuwrapper.FloatType 7 | import fpuwrapper.FloatS 8 | import fpuwrapper.FloatD 9 | import fpuwrapper.Synthesis 10 | 11 | class FPConfig() 12 | 13 | object FPFloatFormat extends ChiselEnum { 14 | val Fp32, Fp64, Fp16, Fp8, Fp16Alt = Value 15 | } 16 | 17 | object FPIntFormat extends ChiselEnum { 18 | val Int8, Int16, Int32, Int64 = Value 19 | } 20 | 21 | object FPOperation extends ChiselEnum { 22 | val FMADD, FNMSUB, ADD, MUL, DIV, SQRT, SGNJ, MINMAX, CMP, CLASSIFY, F2F, F2I, 23 | I2F, CPKAB, CPKCD = Value 24 | } 25 | 26 | object FPRoundingMode extends ChiselEnum { 27 | val RNE, RTZ, RDN, RUP, RMM, DYN = Value 28 | } 29 | 30 | class FPRequest(val fLen: Int) extends Bundle { 31 | val operands = Vec(3, UInt(fLen.W)) 32 | val roundingMode = FPRoundingMode() 33 | val op = FPOperation() 34 | val opModifier = Bool() 35 | val srcFormat = FPFloatFormat() 36 | val dstFormat = FPFloatFormat() 37 | val intFormat = FPIntFormat() 38 | } 39 | 40 | class FPStatus extends Bundle { 41 | val NV = Bool() // Invalid 42 | val DZ = Bool() // Divide by zero 43 | val OF = Bool() // Overflow 44 | val UF = Bool() // Underflow 45 | val NX = Bool() // Inexact 46 | } 47 | 48 | class FPResponse(val fLen: Int) extends Bundle { 49 | val result = UInt(fLen.W) 50 | val status = new FPStatus() 51 | } 52 | 53 | /** FPNew IO port. For meanings of ports, visit 54 | * https://github.com/pulp-platform/fpnew/blob/develop/docs/README.md 55 | */ 56 | class FPIO(val fLen: Int) extends Bundle { 57 | val req = Flipped(Decoupled(new FPRequest(fLen))) 58 | val resp = Decoupled(new FPResponse(fLen)) 59 | val flush = Input(Bool()) 60 | val busy = Output(Bool()) 61 | } 62 | 63 | class IEEEFPU( 64 | val floatType: FloatType, 65 | val lanes: Int, 66 | val stages: Int 67 | ) extends Module { 68 | 69 | val fLen = floatType.width() * lanes 70 | val io = IO(new FPIO(fLen)) 71 | 72 | val blackbox = Module( 73 | new FPNewBlackbox( 74 | floatType, 75 | lanes, 76 | stages, 77 | tagWidth = 0 78 | ) 79 | ) 80 | 81 | // clock & reset 82 | blackbox.io.clk_i := clock 83 | blackbox.io.rst_ni := ~reset.asBool 84 | 85 | // request 86 | blackbox.io.operands_i := io.req.bits.operands.asUInt 87 | blackbox.io.rnd_mode_i := io.req.bits.roundingMode.asUInt 88 | blackbox.io.op_i := io.req.bits.op.asUInt 89 | blackbox.io.op_mod_i := io.req.bits.opModifier 90 | blackbox.io.src_fmt_i := io.req.bits.srcFormat.asUInt 91 | blackbox.io.dst_fmt_i := io.req.bits.dstFormat.asUInt 92 | blackbox.io.int_fmt_i := io.req.bits.intFormat.asUInt 93 | blackbox.io.vectorial_op_i := 1.B 94 | blackbox.io.tag_i := 0.B 95 | blackbox.io.in_valid_i := io.req.valid 96 | io.req.ready := blackbox.io.in_ready_o 97 | 98 | // response 99 | io.resp.bits.result := blackbox.io.result_o 100 | io.resp.bits.status := blackbox.io.status_o.asTypeOf(io.resp.bits.status) 101 | io.resp.valid := blackbox.io.out_valid_o 102 | blackbox.io.out_ready_i := io.resp.ready 103 | 104 | // flush & flush 105 | blackbox.io.flush_i := io.flush 106 | io.busy := blackbox.io.busy_o 107 | } 108 | 109 | object IEEEFPU extends EmitChiselModule { 110 | emitChisel( 111 | (floatType, lanes, stages, _) => new IEEEFPU(floatType, lanes, stages), 112 | "IEEEFPU", 113 | "fpnew" 114 | ) 115 | } 116 | 117 | object IEEEFPUSynth extends EmitChiselModule { 118 | for (floatType <- Seq(FloatS, FloatD)) { 119 | val floatName = floatType.kind().toString() 120 | for (stages <- Seq(2, 3)) { 121 | for (lanes <- Seq(1)) { 122 | emitChisel( 123 | ( 124 | floatType, 125 | lanes, 126 | stages, 127 | _ 128 | ) => new IEEEFPU(floatType, lanes, stages), 129 | "IEEEFPU", 130 | "fpnew", 131 | allStages = Seq(stages), 132 | floatTypes = Seq(floatType), 133 | lanes = Seq(lanes) 134 | ) 135 | val name = s"IEEEFPU_${floatName}${lanes}l${stages}s_fpnew" 136 | 137 | val fileName = 138 | s"FPNewBlackbox_${floatType.kind().toString()}${lanes}l${stages}s.synth.v" 139 | Synthesis.build( 140 | Seq(s"${name}.v", s"./fpu-wrappers/resources/fpnew/${fileName}"), 141 | s"${name}_IEEEFPU", 142 | s"${name}" 143 | ) 144 | } 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/fudian/IEEEFAdd.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fudian 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import fpuwrapper.EmitChiselModule 6 | import fpuwrapper.FloatD 7 | import fpuwrapper.FloatType 8 | import fpuwrapper.Synthesis 9 | 10 | class IEEEFAddRequest(val floatType: FloatType, val lanes: Int) extends Bundle { 11 | val a = Vec(lanes, UInt(floatType.width().W)) 12 | val b = Vec(lanes, UInt(floatType.width().W)) 13 | } 14 | 15 | class IEEEFAddResponse(val floatType: FloatType, val lanes: Int) 16 | extends Bundle { 17 | // result 18 | val res = Vec(lanes, UInt(floatType.width().W)) 19 | // exception status 20 | val exc = Vec(lanes, Bits(5.W)) 21 | } 22 | 23 | class IEEEFAdd(floatType: FloatType, lanes: Int, stages: Int) extends Module { 24 | val io = IO(new Bundle { 25 | val req = Flipped(Valid(new IEEEFAddRequest(floatType, lanes))) 26 | val resp = Valid(new IEEEFAddResponse(floatType, lanes)) 27 | }) 28 | 29 | val inputStages = stages / 2 30 | val outputStages = stages - inputStages 31 | 32 | val reqValid = io.req.valid 33 | val results = for (i <- 0 until lanes) yield { 34 | val fma = Module( 35 | new fudian.FADD( 36 | floatType.exp(), 37 | floatType.sig() 38 | ) 39 | ) 40 | fma.suggestName(s"fadd_${floatType.kind()}_${i}") 41 | fma.io.a := Pipe( 42 | reqValid, 43 | io.req.bits.a(i), 44 | inputStages 45 | ).bits 46 | fma.io.b := Pipe( 47 | reqValid, 48 | io.req.bits.b(i), 49 | inputStages 50 | ).bits 51 | 52 | // TODO 53 | fma.io.rm := 0.U 54 | 55 | val res = 56 | Pipe(true.B, fma.io.result, outputStages).bits 57 | val exc = Pipe(true.B, fma.io.fflags, outputStages).bits 58 | (res, exc) 59 | } 60 | 61 | // collect result 62 | val res = results.map(_._1) 63 | // exception flags 64 | val exc = results.map(_._2) 65 | 66 | val resValid = ShiftRegister(reqValid, stages) 67 | 68 | io.resp.valid := resValid 69 | io.resp.bits.res := res 70 | io.resp.bits.exc := exc 71 | } 72 | 73 | object IEEEFAdd extends EmitChiselModule { 74 | emitChisel( 75 | (floatType, lanes, stages, _) => new IEEEFAdd(floatType, lanes, stages), 76 | "IEEEFAdd", 77 | "fudian" 78 | ) 79 | } 80 | 81 | object IEEEFAddSynth extends EmitChiselModule { 82 | for (floatType <- Seq(FloatD)) { 83 | val floatName = floatType.kind().toString() 84 | for (stages <- Seq(4)) { 85 | emitChisel( 86 | (floatType, lanes, stages, _) => new IEEEFAdd(floatType, lanes, stages), 87 | "IEEEFAdd", 88 | "fudian", 89 | allStages = Seq(stages), 90 | floatTypes = Seq(floatType), 91 | lanes = Seq(1) 92 | ) 93 | val name = s"Fudian_IEEEFAdd_${floatName}1l${stages}s" 94 | Synthesis.build( 95 | Seq(s"${name}.v"), 96 | s"${name}_IEEEFAdd", 97 | s"fudian_${name}" 98 | ) 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/fudian/IEEEFDivSqrt.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fudian 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import fpuwrapper.FloatType 6 | 7 | object IEEEFDivSqrtOp extends ChiselEnum { 8 | val DIV = Value 9 | val SQRT = Value 10 | 11 | val NOP = DIV 12 | } 13 | 14 | class IEEEFDivSqrtRequest(val floatType: FloatType, val lanes: Int) 15 | extends Bundle { 16 | val op = IEEEFDivSqrtOp() 17 | val a = Vec(lanes, UInt(floatType.width().W)) 18 | val b = Vec(lanes, UInt(floatType.width().W)) 19 | } 20 | 21 | class IEEEFDivSqrtResponse(val floatType: FloatType, val lanes: Int) 22 | extends Bundle { 23 | // result 24 | val res = Vec(lanes, UInt(floatType.width().W)) 25 | // exception status 26 | val exc = Vec(lanes, Bits(5.W)) 27 | } 28 | 29 | class IEEEFDivSqrt(val floatType: FloatType, val lanes: Int) extends Module { 30 | val io = IO(new Bundle { 31 | val req = Flipped(Decoupled(new IEEEFDivSqrtRequest(floatType, lanes))) 32 | val resp = Valid(new IEEEFDivSqrtResponse(floatType, lanes)) 33 | }) 34 | 35 | // replicate small units for higher throughput 36 | val valid = io.req.valid 37 | val results = for (i <- 0 until lanes) yield { 38 | val div_sqrt = Module( 39 | new fudian.FDIV( 40 | floatType.exp(), 41 | floatType.sig() 42 | ) 43 | ) 44 | div_sqrt.suggestName(s"div_sqrt${floatType.kind()}_${i}") 45 | div_sqrt.io.a := io.req.bits.a(i) 46 | div_sqrt.io.b := io.req.bits.b(i) 47 | div_sqrt.io.specialIO.in_valid := io.req.valid 48 | div_sqrt.io.specialIO.kill := false.B 49 | 50 | // TODO 51 | div_sqrt.io.rm := 0.U 52 | 53 | val result = div_sqrt.io.result 54 | val exception = Wire(UInt(5.W)) 55 | exception := div_sqrt.io.fflags 56 | div_sqrt.io.specialIO.isSqrt := io.req.bits.op === IEEEFDivSqrtOp.SQRT 57 | 58 | // lanes might not complete in the same cycle 59 | val resultReg = Reg(UInt(floatType.width().W)) 60 | val exceptionReg = Reg(UInt(5.W)) 61 | val resultValidReg = RegInit(false.B) 62 | val done = Wire(Bool()) 63 | div_sqrt.io.specialIO.out_ready := true.B 64 | when(div_sqrt.io.specialIO.out_valid) { 65 | resultReg := result 66 | exceptionReg := exception 67 | resultValidReg := true.B 68 | } 69 | when(done) { 70 | resultValidReg := false.B 71 | } 72 | 73 | ( 74 | resultReg, 75 | exceptionReg, 76 | resultValidReg, 77 | done, 78 | div_sqrt.io.specialIO.in_ready 79 | ) 80 | } 81 | 82 | io.req.ready := results.map(_._5).reduce(_ & _) 83 | 84 | // collect result 85 | val res = results.map(_._1) 86 | // exception flags 87 | val exc = results.map(_._2) 88 | 89 | val resValid = results.map(_._3).reduce(_ & _) 90 | // all done 91 | for (lane <- results) { 92 | lane._4 := resValid 93 | } 94 | 95 | io.resp.valid := resValid 96 | io.resp.bits.res := res 97 | io.resp.bits.exc := exc 98 | } 99 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/fudian/IEEEFMA.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fudian 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import fpuwrapper.EmitChiselModule 6 | import fpuwrapper.FloatH 7 | import fpuwrapper.FloatS 8 | import fpuwrapper.FloatD 9 | import fpuwrapper.AddPrefix 10 | import fpuwrapper.FloatType 11 | import fpuwrapper.Synthesis 12 | 13 | class IEEEFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle { 14 | val operands = Vec(3, Vec(lanes, UInt(floatType.width().W))) 15 | } 16 | 17 | class IEEEFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 18 | // result 19 | val res = Vec(lanes, UInt(floatType.width().W)) 20 | // exception status 21 | val exc = Vec(lanes, Bits(5.W)) 22 | } 23 | 24 | // adapted from fudian.FCMA 25 | // insert pipeline stages between FMUL and FCMA_ADD 26 | class FCMAPipe(val expWidth: Int, val precision: Int, val stages: Int) 27 | extends Module { 28 | val io = IO(new Bundle() { 29 | val a, b, c = Input(UInt((expWidth + precision).W)) 30 | val rm = Input(UInt(3.W)) 31 | val result = Output(UInt((expWidth + precision).W)) 32 | val fflags = Output(UInt(5.W)) 33 | }) 34 | 35 | val fmul = Module(new fudian.FMUL(expWidth, precision)) 36 | val fadd = Module(new fudian.FCMA_ADD(expWidth, 2 * precision, precision)) 37 | 38 | fmul.io.a := io.a 39 | fmul.io.b := io.b 40 | fmul.io.rm := io.rm 41 | 42 | val mul_to_fadd = ShiftRegister(fmul.io.to_fadd, stages) 43 | fadd.io.a := ShiftRegister(Cat(io.c, 0.U(precision.W)), stages) 44 | fadd.io.b := mul_to_fadd.fp_prod.asUInt 45 | fadd.io.b_inter_valid := true.B 46 | fadd.io.b_inter_flags := mul_to_fadd.inter_flags 47 | fadd.io.rm := ShiftRegister(io.rm, stages) 48 | 49 | io.result := fadd.io.result 50 | io.fflags := fadd.io.fflags 51 | } 52 | 53 | class IEEEFMA( 54 | floatType: FloatType, 55 | lanes: Int, 56 | stages: Int, 57 | prefix: String = "" 58 | ) extends Module { 59 | AddPrefix(this, prefix) 60 | val io = IO(new Bundle { 61 | val req = Flipped(Valid(new IEEEFMARequest(floatType, lanes))) 62 | val resp = Valid(new IEEEFMAResponse(floatType, lanes)) 63 | }) 64 | 65 | val internalStages = if (stages > 1) 1 else 0 66 | val inputStages = (stages - internalStages) / 2 67 | val outputStages = stages - internalStages - inputStages 68 | 69 | val reqValid = io.req.valid 70 | val results = for (i <- 0 until lanes) yield { 71 | val fma = Module( 72 | new fudian.FCMA( 73 | floatType.exp(), 74 | floatType.sig() 75 | ) 76 | ) 77 | fma.suggestName(s"fma_${floatType.kind()}_${i}") 78 | fma.io.a := Pipe( 79 | reqValid, 80 | io.req.bits.operands(0)(i), 81 | inputStages 82 | ).bits 83 | fma.io.b := Pipe( 84 | reqValid, 85 | io.req.bits.operands(1)(i), 86 | inputStages 87 | ).bits 88 | fma.io.c := Pipe( 89 | reqValid, 90 | io.req.bits.operands(2)(i), 91 | inputStages 92 | ).bits 93 | 94 | // TODO 95 | fma.io.rm := 0.U 96 | 97 | val res = 98 | Pipe(true.B, fma.io.result, outputStages).bits 99 | val exc = Pipe(true.B, fma.io.fflags, outputStages).bits 100 | (res, exc) 101 | } 102 | 103 | // collect result 104 | val res = results.map(_._1) 105 | // exception flags 106 | val exc = results.map(_._2) 107 | 108 | val resValid = ShiftRegister(reqValid, stages) 109 | 110 | io.resp.valid := resValid 111 | io.resp.bits.res := res 112 | io.resp.bits.exc := exc 113 | } 114 | 115 | object IEEEFMA extends EmitChiselModule { 116 | emitChisel( 117 | (floatType, lanes, stages, _) => new IEEEFMA(floatType, lanes, stages), 118 | "IEEEFMA", 119 | "fudian" 120 | ) 121 | } 122 | 123 | object IEEEFMASynth extends EmitChiselModule { 124 | for (floatType <- Seq(FloatH, FloatS, FloatD)) { 125 | val floatName = floatType.kind().toString() 126 | for (stages <- Seq(2, 3, 4)) { 127 | emitChisel( 128 | (floatType, lanes, stages, prefix) => 129 | new IEEEFMA(floatType, lanes, stages, prefix), 130 | "IEEEFMA", 131 | "fudian", 132 | allStages = Seq(stages), 133 | floatTypes = Seq(floatType), 134 | lanes = Seq(1) 135 | ) 136 | val name = s"IEEEFMA_${floatName}1l${stages}s_fudian" 137 | Synthesis.build( 138 | Seq(s"${name}.sv"), 139 | s"${name}_IEEEFMA", 140 | s"${name}" 141 | ) 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/FMACommon.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | object FMAOp extends ChiselEnum { 7 | // 1 * op[1] + op[2] 8 | val FADD = Value 9 | // 1 * op[1] - op[2] 10 | val FSUB = Value 11 | // op[0] * op[1] + 0 12 | val FMUL = Value 13 | // op[0] * op[1] + op[2] 14 | val FMADD = Value 15 | // op[0] * op[1] - op[2] 16 | val FMSUB = Value 17 | // -(op[0] * op[1] - op[2]) 18 | val FNMSUB = Value 19 | // -(op[0] * op[1] + op[2]) 20 | val FNMADD = Value 21 | 22 | val NOP = FADD 23 | } 24 | 25 | // https://github.com/chipsalliance/rocket-chip/blob/master/src/main/scala/tile/FPU.scala 26 | // with modifications of extra stages 27 | class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int) 28 | extends Module { 29 | require(latency <= 2) 30 | 31 | val io = IO(new Bundle { 32 | val validin = Input(Bool()) 33 | val op = Input(Bits(2.W)) 34 | val a = Input(Bits((expWidth + sigWidth + 1).W)) 35 | val b = Input(Bits((expWidth + sigWidth + 1).W)) 36 | val c = Input(Bits((expWidth + sigWidth + 1).W)) 37 | val roundingMode = Input(UInt(3.W)) 38 | val detectTininess = Input(UInt(1.W)) 39 | val out = Output(Bits((expWidth + sigWidth + 1).W)) 40 | val exceptionFlags = Output(Bits(5.W)) 41 | val validout = Output(Bool()) 42 | }) 43 | 44 | // ------------------------------------------------------------------------ 45 | // ------------------------------------------------------------------------ 46 | 47 | val mulAddRecFNToRaw_preMul = Module( 48 | new _root_.hardfloat.MulAddRecFNToRaw_preMul(expWidth, sigWidth) 49 | ) 50 | val mulAddRecFNToRaw_postMul = Module( 51 | new _root_.hardfloat.MulAddRecFNToRaw_postMul(expWidth, sigWidth) 52 | ) 53 | 54 | mulAddRecFNToRaw_preMul.io.op := io.op 55 | mulAddRecFNToRaw_preMul.io.a := io.a 56 | mulAddRecFNToRaw_preMul.io.b := io.b 57 | mulAddRecFNToRaw_preMul.io.c := io.c 58 | 59 | val mulAddResult = 60 | (mulAddRecFNToRaw_preMul.io.mulAddA * 61 | mulAddRecFNToRaw_preMul.io.mulAddB) +& 62 | mulAddRecFNToRaw_preMul.io.mulAddC 63 | 64 | val valid_stage0 = Wire(Bool()) 65 | val roundingMode_stage0 = Wire(UInt(3.W)) 66 | val detectTininess_stage0 = Wire(UInt(1.W)) 67 | 68 | val postmul_regs = if (latency > 0) 1 else 0 69 | mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe( 70 | io.validin, 71 | mulAddRecFNToRaw_preMul.io.toPostMul, 72 | postmul_regs 73 | ).bits 74 | mulAddRecFNToRaw_postMul.io.mulAddResult := Pipe( 75 | io.validin, 76 | mulAddResult, 77 | postmul_regs 78 | ).bits 79 | mulAddRecFNToRaw_postMul.io.roundingMode := Pipe( 80 | io.validin, 81 | io.roundingMode, 82 | postmul_regs 83 | ).bits 84 | roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits 85 | detectTininess_stage0 := Pipe( 86 | io.validin, 87 | io.detectTininess, 88 | postmul_regs 89 | ).bits 90 | valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid 91 | 92 | // ------------------------------------------------------------------------ 93 | // ------------------------------------------------------------------------ 94 | 95 | val roundRawFNToRecFN = Module( 96 | new _root_.hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0) 97 | ) 98 | 99 | val round_regs = if (latency == 2) 1 else 0 100 | roundRawFNToRecFN.io.invalidExc := Pipe( 101 | valid_stage0, 102 | mulAddRecFNToRaw_postMul.io.invalidExc, 103 | round_regs 104 | ).bits 105 | roundRawFNToRecFN.io.in := Pipe( 106 | valid_stage0, 107 | mulAddRecFNToRaw_postMul.io.rawOut, 108 | round_regs 109 | ).bits 110 | roundRawFNToRecFN.io.roundingMode := Pipe( 111 | valid_stage0, 112 | roundingMode_stage0, 113 | round_regs 114 | ).bits 115 | roundRawFNToRecFN.io.detectTininess := Pipe( 116 | valid_stage0, 117 | detectTininess_stage0, 118 | round_regs 119 | ).bits 120 | io.validout := Pipe(valid_stage0, false.B, round_regs).valid 121 | 122 | roundRawFNToRecFN.io.infiniteExc := false.B 123 | 124 | io.out := roundRawFNToRecFN.io.out 125 | io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags 126 | } 127 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/HFFCmp.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import _root_.hardfloat.CompareRecFN 4 | import chisel3._ 5 | import chisel3.util._ 6 | import fpuwrapper._ 7 | 8 | object HFFCmpOp extends ChiselEnum { 9 | val EQ = Value 10 | val NE = Value 11 | val LT = Value 12 | val LE = Value 13 | val GT = Value 14 | val GE = Value 15 | 16 | val NOP = EQ 17 | } 18 | 19 | class HFFCmpRequest(val floatType: FloatType, val lanes: Int) extends Bundle { 20 | val op = HFFCmpOp() 21 | val r1 = Vec(lanes, UInt(floatType.widthHardfloat().W)) 22 | val r2 = Vec(lanes, UInt(floatType.widthHardfloat().W)) 23 | } 24 | 25 | class HFFCmpResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 26 | // result 27 | val res = Vec(lanes, UInt(floatType.width().W)) 28 | // exception status 29 | val exc = Vec(lanes, Bits(5.W)) 30 | } 31 | 32 | class HFFCmp(floatType: FloatType, lanes: Int, stages: Int) extends Module { 33 | val io = IO(new Bundle { 34 | val req = Flipped(Valid(new HFFCmpRequest(floatType, lanes))) 35 | val resp = Valid(new HFFCmpResponse(floatType, lanes)) 36 | }) 37 | 38 | // replicate small units for higher throughput 39 | val valid = io.req.valid 40 | val results = for (i <- 0 until lanes) yield { 41 | val cmp = Module( 42 | new CompareRecFN( 43 | floatType.exp(), 44 | floatType.sig() 45 | ) 46 | ) 47 | cmp.suggestName(s"cmp${floatType.kind()}_${i}") 48 | cmp.io.a := io.req.bits.r1(i) 49 | cmp.io.b := io.req.bits.r2(i) 50 | cmp.io.signaling := true.B 51 | 52 | val result = Wire(UInt(floatType.width().W)) 53 | val exception = Wire(UInt(5.W)) 54 | exception := cmp.io.exceptionFlags 55 | result := 0.U 56 | switch(io.req.bits.op) { 57 | is(HFFCmpOp.EQ) { 58 | when(cmp.io.eq) { 59 | result := 1.U 60 | } 61 | } 62 | is(HFFCmpOp.NE) { 63 | when(!cmp.io.eq) { 64 | result := 1.U 65 | } 66 | } 67 | is(HFFCmpOp.GE) { 68 | when(cmp.io.gt || cmp.io.eq) { 69 | result := 1.U 70 | } 71 | } 72 | is(HFFCmpOp.LE) { 73 | when(cmp.io.lt || cmp.io.eq) { 74 | result := 1.U 75 | } 76 | } 77 | is(HFFCmpOp.GT) { 78 | when(cmp.io.gt) { 79 | result := 1.U 80 | } 81 | } 82 | is(HFFCmpOp.LT) { 83 | when(cmp.io.lt) { 84 | result := 1.U 85 | } 86 | } 87 | } 88 | 89 | // stages 90 | val res = Pipe( 91 | valid, 92 | result, 93 | stages 94 | ).bits 95 | val exc = Pipe( 96 | valid, 97 | exception, 98 | stages 99 | ).bits 100 | (res, exc) 101 | } 102 | 103 | // collect result 104 | val res = results.map(_._1) 105 | // exception flags 106 | val exc = results.map(_._2) 107 | 108 | val resValid = ShiftRegister(valid, stages) 109 | 110 | io.resp.valid := resValid 111 | io.resp.bits.res := res 112 | io.resp.bits.exc := exc 113 | } 114 | 115 | object HFFCmp extends EmitChiselModule { 116 | emitChisel( 117 | (floatType, lanes, stages, _) => new HFFCmp(floatType, lanes, stages), 118 | "HFFCmp", 119 | "hardfloat" 120 | ) 121 | } 122 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/HFFDivSqrt.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.ChiselEnum 5 | import chisel3.util._ 6 | import fpuwrapper.EmitChiselModule 7 | import fpuwrapper.FloatType 8 | import hardfloat.DivSqrtRecFN_small 9 | 10 | object HFFDivSqrtOp extends ChiselEnum { 11 | val DIV = Value 12 | val SQRT = Value 13 | 14 | val NOP = DIV 15 | } 16 | 17 | class HFFDivSqrtRequest(val floatType: FloatType, val lanes: Int) 18 | extends Bundle { 19 | val op = HFFDivSqrtOp() 20 | val a = Vec(lanes, UInt(floatType.widthHardfloat().W)) 21 | val b = Vec(lanes, UInt(floatType.widthHardfloat().W)) 22 | } 23 | 24 | class HFFDivSqrtResponse(val floatType: FloatType, val lanes: Int) 25 | extends Bundle { 26 | // result 27 | val res = Vec(lanes, UInt(floatType.widthHardfloat().W)) 28 | // exception status 29 | val exc = Vec(lanes, Bits(5.W)) 30 | } 31 | 32 | class HFFDivSqrt(val floatType: FloatType, val lanes: Int) 33 | extends Module 34 | with RequireAsyncReset { 35 | val io = IO(new Bundle { 36 | val req = Flipped(Decoupled(new HFFDivSqrtRequest(floatType, lanes))) 37 | val resp = Valid(new HFFDivSqrtResponse(floatType, lanes)) 38 | }) 39 | 40 | // replicate small units for higher throughput 41 | val valid = io.req.valid 42 | val results = for (i <- 0 until lanes) yield { 43 | val div_sqrt = Module( 44 | new DivSqrtRecFN_small( 45 | floatType.exp(), 46 | floatType.sig(), 47 | 0 48 | ) 49 | ) 50 | div_sqrt.suggestName(s"div_sqrt${floatType.kind()}_${i}") 51 | div_sqrt.io.a := io.req.bits.a(i) 52 | div_sqrt.io.b := io.req.bits.b(i) 53 | div_sqrt.io.inValid := io.req.valid 54 | 55 | // TODO 56 | div_sqrt.io.roundingMode := 0.U 57 | div_sqrt.io.detectTininess := 0.U 58 | 59 | val result = div_sqrt.io.out 60 | val exception = Wire(UInt(5.W)) 61 | exception := div_sqrt.io.exceptionFlags 62 | div_sqrt.io.sqrtOp := io.req.bits.op === HFFDivSqrtOp.SQRT 63 | 64 | // lanes might not complete in the same cycle 65 | val resultReg = Reg(UInt(floatType.widthHardfloat().W)) 66 | val exceptionReg = Reg(UInt(5.W)) 67 | val resultValidReg = RegInit(false.B) 68 | val done = Wire(Bool()) 69 | when(div_sqrt.io.outValid_div | div_sqrt.io.outValid_sqrt) { 70 | resultReg := result 71 | exceptionReg := exception 72 | resultValidReg := true.B 73 | } 74 | when(done) { 75 | resultValidReg := false.B 76 | } 77 | 78 | ( 79 | resultReg, 80 | exceptionReg, 81 | resultValidReg, 82 | done, 83 | div_sqrt.io.inReady 84 | ) 85 | } 86 | 87 | io.req.ready := results.map(_._5).reduce(_ & _) 88 | 89 | // collect result 90 | val res = results.map(_._1) 91 | // exception flags 92 | val exc = results.map(_._2) 93 | 94 | val resValid = results.map(_._3).reduce(_ & _) 95 | // all done 96 | for (lane <- results) { 97 | lane._4 := resValid 98 | } 99 | 100 | io.resp.valid := resValid 101 | io.resp.bits.res := res 102 | io.resp.bits.exc := exc 103 | } 104 | 105 | object HFFDivSqrt extends EmitChiselModule { 106 | emitChisel( 107 | (floatType, lanes, _, _) => new HFFDivSqrt(floatType, lanes), 108 | "HFFDivSqrt", 109 | "hardfloat" 110 | ) 111 | } 112 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/HFFMA.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import fpuwrapper._ 6 | import chisel3.experimental.annotate 7 | import sifive.enterprise.firrtl.NestedPrefixModulesAnnotation 8 | import chisel3.experimental.ChiselAnnotation 9 | 10 | class HFFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle { 11 | val op = FMAOp() 12 | val operands = Vec(3, Vec(lanes, UInt(floatType.widthHardfloat().W))) 13 | } 14 | 15 | class HFFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 16 | // result 17 | val res = Vec(lanes, UInt(floatType.widthHardfloat().W)) 18 | // exception status 19 | val exc = Vec(lanes, Bits(5.W)) 20 | } 21 | 22 | class HFFMA( 23 | floatType: FloatType, 24 | lanes: Int, 25 | stages: Int, 26 | prefix: String = "" 27 | ) extends Module { 28 | AddPrefix(this, prefix) 29 | 30 | val io = IO(new Bundle { 31 | val req = Flipped(Valid(new HFFMARequest(floatType, lanes))) 32 | val resp = Valid(new HFFMAResponse(floatType, lanes)) 33 | }) 34 | 35 | val one = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 36 | val zero = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 37 | for (i <- 0 until lanes) { 38 | one(i) := floatType.oneHardfloatChisel() 39 | zero(i) := 0.U 40 | } 41 | 42 | // fma: neg * (op[0] * op[1]) + sign * op[2] 43 | // neg: {0 => 1, 1 => -1} 44 | // sub: {0 => 1, 1 => -1} 45 | val op1 = WireInit(io.req.bits.operands(0)) 46 | val op2 = WireInit(io.req.bits.operands(1)) 47 | val op3 = WireInit(io.req.bits.operands(2)) 48 | val neg = WireInit(false.B) 49 | val sign = WireInit(false.B) 50 | 51 | // see the definition of FMAOp for more detail 52 | switch(io.req.bits.op) { 53 | is(FMAOp.FADD) { 54 | op1 := one 55 | } 56 | is(FMAOp.FSUB) { 57 | op1 := one 58 | sign := true.B 59 | } 60 | is(FMAOp.FMUL) { 61 | op3 := zero 62 | } 63 | is(FMAOp.FMADD) { 64 | // do nothing 65 | } 66 | is(FMAOp.FMSUB) { 67 | sign := true.B 68 | } 69 | is(FMAOp.FNMSUB) { 70 | neg := true.B 71 | } 72 | is(FMAOp.FNMADD) { 73 | neg := true.B 74 | sign := true.B 75 | } 76 | } 77 | 78 | // when stages > 3, add extra stages 79 | val extraStages = (stages - 2) max 0 80 | val inputStages = (extraStages + 1) / 2 81 | val outputStages = extraStages - inputStages 82 | 83 | // replicate small units for higher throughput 84 | val reqValid = io.req.valid 85 | val results = for (i <- 0 until lanes) yield { 86 | // MulAddRecFNPipe only support stages <= 2 87 | val fma = Module( 88 | new MulAddRecFNPipe( 89 | stages min 2, 90 | floatType.exp(), 91 | floatType.sig() 92 | ) 93 | ) 94 | fma.suggestName(s"fma_${floatType.kind()}_${i}") 95 | fma.io.validin := Pipe(reqValid, reqValid, inputStages).bits 96 | fma.io.a := Pipe( 97 | reqValid, 98 | op1(i), 99 | inputStages 100 | ).bits 101 | fma.io.b := Pipe( 102 | reqValid, 103 | op2(i), 104 | inputStages 105 | ).bits 106 | fma.io.c := Pipe( 107 | reqValid, 108 | op3(i), 109 | inputStages 110 | ).bits 111 | 112 | fma.io.op := Pipe( 113 | reqValid, 114 | Cat(neg, sign), 115 | inputStages 116 | ).bits 117 | // TODO 118 | fma.io.roundingMode := 0.U 119 | fma.io.detectTininess := 0.U 120 | 121 | val res = Pipe(true.B, fma.io.out, outputStages).bits 122 | val exc = Pipe(true.B, fma.io.exceptionFlags, outputStages).bits 123 | (res, exc) 124 | } 125 | 126 | // collect result 127 | val res = results.map(_._1) 128 | // exception flags 129 | val exc = results.map(_._2) 130 | 131 | val resValid = ShiftRegister(reqValid, stages) 132 | 133 | io.resp.valid := resValid 134 | io.resp.bits.res := res 135 | io.resp.bits.exc := exc 136 | } 137 | 138 | object HFFMA extends EmitChiselModule { 139 | emitChisel( 140 | (floatType, lanes, stages, prefix) => 141 | new HFFMA(floatType, lanes, stages, prefix), 142 | "HFFMA", 143 | "hardfloat" 144 | ) 145 | } 146 | 147 | object HFFMASynth extends EmitChiselModule { 148 | for (floatType <- Seq(FloatS)) { 149 | val floatName = floatType.kind().toString() 150 | for (stages <- Seq(3)) { 151 | emitChisel( 152 | (floatType, lanes, stages, _) => new HFFMA(floatType, lanes, stages), 153 | "HFFMA", 154 | "hardfloat", 155 | allStages = Seq(stages), 156 | floatTypes = Seq(floatType), 157 | lanes = Seq(1) 158 | ) 159 | val name = s"Hardfloat_HFFMA_${floatName}1l${stages}s" 160 | Synthesis.build(Seq(s"${name}.v"), s"${name}_HFFMA", s"hardfloat_${name}") 161 | } 162 | } 163 | } 164 | 165 | object HFFMABench extends EmitChiselModule with VivadoBench { 166 | val library = "hardfloat" 167 | val moduleName = "HFFMA" 168 | for (floatType <- Seq(FloatS)) { 169 | for (stages <- Seq(3)) { 170 | for (lanes <- Seq(2)) { 171 | val floatName = floatType.kind().toString() 172 | emitChisel( 173 | (floatType, lanes, stages, prefix) => 174 | new HFFMA(floatType, lanes, stages, prefix), 175 | moduleName, 176 | library, 177 | allStages = Seq(stages), 178 | floatTypes = Seq(floatType), 179 | lanes = Seq(lanes) 180 | ) 181 | val name = s"${moduleName}_${floatName}${lanes}l${stages}s" 182 | bench( 183 | s"${name}_${library}", 184 | Seq(s"${name}_${library}.sv"), 185 | s"${name}_${library}_${moduleName}" 186 | ) 187 | } 188 | } 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/HFFMul.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import fpuwrapper._ 6 | 7 | class HFFMulRequest(val floatType: FloatType, val lanes: Int) extends Bundle { 8 | val a = Vec(lanes, UInt(floatType.widthHardfloat().W)) 9 | val b = Vec(lanes, UInt(floatType.widthHardfloat().W)) 10 | } 11 | 12 | class HFFMulResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 13 | // result 14 | val res = Vec(lanes, UInt(floatType.widthHardfloat().W)) 15 | // exception status 16 | val exc = Vec(lanes, Bits(5.W)) 17 | } 18 | 19 | class HFFMul(floatType: FloatType, lanes: Int, stages: Int) extends Module { 20 | val io = IO(new Bundle { 21 | val req = Flipped(Valid(new HFFMulRequest(floatType, lanes))) 22 | val resp = Valid(new HFFMulResponse(floatType, lanes)) 23 | }) 24 | 25 | // when stages > 1, add extra stages 26 | val extraStages = (stages - 1) max 0 27 | val inputStages = extraStages / 2 28 | val outputStages = extraStages - inputStages 29 | 30 | // replicate small units for higher throughput 31 | val reqValid = io.req.valid 32 | val results = for (i <- 0 until lanes) yield { 33 | // MulRecFNPipe stages <= 1 34 | val fmul = Module( 35 | new MulRecFNPipe( 36 | floatType.exp(), 37 | floatType.sig(), 38 | stages min 1 39 | ) 40 | ) 41 | fmul.suggestName(s"fmul_${floatType.kind()}_${i}") 42 | fmul.io.validin := Pipe(reqValid, reqValid, inputStages).bits 43 | fmul.io.a := Pipe( 44 | reqValid, 45 | io.req.bits.a(i), 46 | inputStages 47 | ).bits 48 | fmul.io.b := Pipe( 49 | reqValid, 50 | io.req.bits.b(i), 51 | inputStages 52 | ).bits 53 | // TODO 54 | fmul.io.roundingMode := 0.U 55 | fmul.io.detectTininess := 0.U 56 | 57 | val res = Pipe(true.B, fmul.io.out, outputStages).bits 58 | val exc = Pipe(true.B, fmul.io.exceptionFlags, outputStages).bits 59 | (res, exc) 60 | } 61 | 62 | // collect result 63 | val res = results.map(_._1) 64 | // exception flags 65 | val exc = results.map(_._2) 66 | 67 | val resValid = ShiftRegister(reqValid, stages) 68 | 69 | io.resp.valid := resValid 70 | io.resp.bits.res := res 71 | io.resp.bits.exc := exc 72 | } 73 | 74 | object HFFMul extends EmitChiselModule { 75 | emitChisel( 76 | (floatType, lanes, stages, _) => new HFFMul(floatType, lanes, stages), 77 | "HFFMul", 78 | "hardfloat" 79 | ) 80 | } 81 | 82 | object HFFMulSynth extends EmitChiselModule { 83 | for (floatType <- Seq(FloatS)) { 84 | val floatName = floatType.kind().toString() 85 | for (stages <- Seq(1, 2, 3)) { 86 | emitChisel( 87 | (floatType, lanes, stages, _) => new HFFMul(floatType, lanes, stages), 88 | "HFFMul", 89 | "hardfloat", 90 | allStages = Seq(stages), 91 | floatTypes = Seq(floatType), 92 | lanes = Seq(1) 93 | ) 94 | val name = s"Hardfloat_HFFMul_${floatName}1l${stages}s" 95 | Synthesis.build( 96 | Seq(s"${name}.v"), 97 | s"${name}_HFFMul", 98 | s"hardfloat_${name}" 99 | ) 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/HFToIEEE.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util.ShiftRegister 5 | import chisel3.util.Valid 6 | import fpuwrapper._ 7 | 8 | class HFToIEEE(floatType: FloatType, lanes: Int, stages: Int) extends Module { 9 | val io = IO(new Bundle { 10 | val hardfloat = Input(Valid(Vec(lanes, Bits(floatType.widthHardfloat().W)))) 11 | val float = Output(Valid(Vec(lanes, Bits(floatType.width().W)))) 12 | }) 13 | 14 | io.float.valid := ShiftRegister(io.hardfloat.valid, stages) 15 | for (i <- 0 until lanes) { 16 | io.float.bits(i) := ShiftRegister( 17 | floatType.fromHardfloat(io.hardfloat.bits(i)), 18 | stages 19 | ) 20 | } 21 | } 22 | 23 | object HFToIEEE extends EmitChiselModule { 24 | emitChisel( 25 | (floatType, lanes, stages, _) => new HFToIEEE(floatType, lanes, stages), 26 | "HFToIEEE", 27 | "hardfloat" 28 | ) 29 | } 30 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/IEEEFMA.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | import fpuwrapper._ 6 | 7 | class IEEEFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle { 8 | val op = FMAOp() 9 | val operands = Vec(3, Vec(lanes, UInt(floatType.width().W))) 10 | } 11 | 12 | class IEEEFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle { 13 | // result 14 | val res = Vec(lanes, UInt(floatType.width().W)) 15 | // exception status 16 | val exc = Vec(lanes, Bits(5.W)) 17 | } 18 | 19 | class IEEEFMA( 20 | floatType: FloatType, 21 | lanes: Int, 22 | stages: Int, 23 | prefix: String = "" 24 | ) extends Module { 25 | AddPrefix(this, prefix) 26 | val io = IO(new Bundle { 27 | val req = Flipped(Valid(new IEEEFMARequest(floatType, lanes))) 28 | val resp = Valid(new IEEEFMAResponse(floatType, lanes)) 29 | }) 30 | 31 | val one = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 32 | val zero = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 33 | for (i <- 0 until lanes) { 34 | one(i) := floatType.oneHardfloatChisel() 35 | zero(i) := 0.U 36 | } 37 | 38 | // fma: neg * (op[0] * op[1]) + sign * op[2] 39 | // neg: {0 => 1, 1 => -1} 40 | // sub: {0 => 1, 1 => -1} 41 | val op1 = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 42 | val op2 = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 43 | val op3 = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W))) 44 | for (i <- 0 until lanes) { 45 | op1(i) := floatType.toHardfloat(io.req.bits.operands(0)(i)) 46 | op2(i) := floatType.toHardfloat(io.req.bits.operands(1)(i)) 47 | op3(i) := floatType.toHardfloat(io.req.bits.operands(2)(i)) 48 | } 49 | val neg = WireInit(false.B) 50 | val sign = WireInit(false.B) 51 | 52 | // see the definition of FMAOp for more detail 53 | switch(io.req.bits.op) { 54 | is(FMAOp.FADD) { 55 | op1 := one 56 | } 57 | is(FMAOp.FSUB) { 58 | op1 := one 59 | sign := true.B 60 | } 61 | is(FMAOp.FMUL) { 62 | op3 := zero 63 | } 64 | is(FMAOp.FMADD) { 65 | // do nothing 66 | } 67 | is(FMAOp.FMSUB) { 68 | sign := true.B 69 | } 70 | is(FMAOp.FNMSUB) { 71 | neg := true.B 72 | } 73 | is(FMAOp.FNMADD) { 74 | neg := true.B 75 | sign := true.B 76 | } 77 | } 78 | 79 | // when stages > 3, add extra stages 80 | val extraStages = (stages - 2) max 0 81 | val inputStages = extraStages / 2 82 | val outputStages = extraStages - inputStages 83 | 84 | // replicate small units for higher throughput 85 | val reqValid = io.req.valid 86 | val results = for (i <- 0 until lanes) yield { 87 | // MulAddRecFNPipe only support stages <= 2 88 | val fma = Module( 89 | new MulAddRecFNPipe( 90 | stages min 2, 91 | floatType.exp(), 92 | floatType.sig() 93 | ) 94 | ) 95 | fma.suggestName(s"fma_${floatType.kind()}_${i}") 96 | fma.io.validin := Pipe(reqValid, reqValid, inputStages).bits 97 | fma.io.a := Pipe( 98 | reqValid, 99 | op1(i), 100 | inputStages 101 | ).bits 102 | fma.io.b := Pipe( 103 | reqValid, 104 | op2(i), 105 | inputStages 106 | ).bits 107 | fma.io.c := Pipe( 108 | reqValid, 109 | op3(i), 110 | inputStages 111 | ).bits 112 | 113 | fma.io.op := Pipe( 114 | reqValid, 115 | Cat(neg, sign), 116 | inputStages 117 | ).bits 118 | // TODO 119 | fma.io.roundingMode := 0.U 120 | fma.io.detectTininess := 0.U 121 | 122 | val res = 123 | Pipe(true.B, floatType.fromHardfloat(fma.io.out), outputStages).bits 124 | val exc = Pipe(true.B, fma.io.exceptionFlags, outputStages).bits 125 | (res, exc) 126 | } 127 | 128 | // collect result 129 | val res = results.map(_._1) 130 | // exception flags 131 | val exc = results.map(_._2) 132 | 133 | val resValid = ShiftRegister(reqValid, stages) 134 | 135 | io.resp.valid := resValid 136 | io.resp.bits.res := res 137 | io.resp.bits.exc := exc 138 | } 139 | 140 | object IEEEFMA extends EmitChiselModule { 141 | emitChisel( 142 | (floatType, lanes, stages, prefix) => 143 | new IEEEFMA(floatType, lanes, stages, prefix), 144 | "IEEEFMA", 145 | "hardfloat" 146 | ) 147 | } 148 | 149 | object IEEEFMASynth extends EmitChiselModule { 150 | for (floatType <- Seq(FloatH, FloatS, FloatD)) { 151 | val floatName = floatType.kind().toString() 152 | for (stages <- Seq(4)) { 153 | emitChisel( 154 | (floatType, lanes, stages, prefix) => 155 | new IEEEFMA(floatType, lanes, stages, prefix), 156 | "IEEEFMA", 157 | "hardfloat", 158 | allStages = Seq(stages), 159 | floatTypes = Seq(floatType), 160 | lanes = Seq(1) 161 | ) 162 | val name = s"IEEEFMA_${floatName}1l${stages}s_hardfloat" 163 | Synthesis.build( 164 | Seq(s"${name}.sv"), 165 | s"${name}_IEEEFMA", 166 | name 167 | ) 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/IEEEToHF.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util.ShiftRegister 5 | import chisel3.util.Valid 6 | import fpuwrapper._ 7 | 8 | class IEEEToHF(floatType: FloatType, lanes: Int, stages: Int) extends Module { 9 | val io = IO(new Bundle { 10 | val float = Input(Valid(Vec(lanes, Bits(floatType.width().W)))) 11 | val hardfloat = 12 | Output(Valid(Vec(lanes, Bits(floatType.widthHardfloat().W)))) 13 | }) 14 | 15 | io.hardfloat.valid := ShiftRegister(io.float.valid, stages) 16 | for (i <- 0 until lanes) { 17 | io.hardfloat.bits(i) := ShiftRegister( 18 | floatType.toHardfloat(io.float.bits(i)), 19 | stages 20 | ) 21 | } 22 | } 23 | 24 | object IEEEToHF extends EmitChiselModule { 25 | emitChisel( 26 | (floatType, lanes, stages, _) => new IEEEToHF(floatType, lanes, stages), 27 | "IEEEToHF", 28 | "hardfloat" 29 | ) 30 | } 31 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/hardfloat/MulCommon.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.util._ 5 | 6 | class MulRecFNPipe(expWidth: Int, sigWidth: Int, latency: Int) extends Module { 7 | val io = IO(new Bundle { 8 | val validin = Input(Bool()) 9 | val a = Input(UInt((expWidth + sigWidth + 1).W)) 10 | val b = Input(UInt((expWidth + sigWidth + 1).W)) 11 | val roundingMode = Input(UInt(3.W)) 12 | val detectTininess = Input(Bool()) 13 | 14 | val out = Output(UInt((expWidth + sigWidth + 1).W)) 15 | val exceptionFlags = Output(UInt(5.W)) 16 | val validout = Output(Bool()) 17 | }) 18 | 19 | // ------------------------------------------------------------------------ 20 | // ------------------------------------------------------------------------ 21 | val mulRawFN = Module(new _root_.hardfloat.MulRawFN(expWidth, sigWidth)) 22 | 23 | mulRawFN.io.a := _root_.hardfloat.rawFloatFromRecFN(expWidth, sigWidth, io.a) 24 | mulRawFN.io.b := _root_.hardfloat.rawFloatFromRecFN(expWidth, sigWidth, io.b) 25 | 26 | // ------------------------------------------------------------------------ 27 | // ------------------------------------------------------------------------ 28 | val roundRawFNToRecFN = 29 | Module(new _root_.hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0)) 30 | roundRawFNToRecFN.io.invalidExc := Pipe( 31 | io.validin, 32 | mulRawFN.io.invalidExc, 33 | latency 34 | ).bits 35 | roundRawFNToRecFN.io.infiniteExc := false.B 36 | roundRawFNToRecFN.io.in := Pipe( 37 | io.validin, 38 | mulRawFN.io.rawOut, 39 | latency 40 | ).bits 41 | roundRawFNToRecFN.io.roundingMode := Pipe( 42 | io.validin, 43 | io.roundingMode, 44 | latency 45 | ).bits 46 | roundRawFNToRecFN.io.detectTininess := Pipe( 47 | io.validin, 48 | io.detectTininess, 49 | latency 50 | ).bits 51 | 52 | io.validout := Pipe(io.validin, false.B, latency).valid 53 | io.out := roundRawFNToRecFN.io.out 54 | io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags 55 | } 56 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/opencores/IEEEFPU.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.opencores 2 | 3 | import fpuwrapper._ 4 | import spinal.core._ 5 | import spinal.lib._ 6 | 7 | import java.nio.file.Paths 8 | 9 | object IEEEFPUOp extends SpinalEnum { 10 | val FADD = newElement() 11 | val FSUB = newElement() 12 | val FMUL = newElement() 13 | val FDIV = newElement() 14 | val INT2FP = newElement() 15 | val FP2INT = newElement() 16 | 17 | val NOP = FADD 18 | } 19 | 20 | class IEEEFPURequest(val floatType: FloatType) extends Bundle { 21 | val op = IEEEFPUOp() 22 | val operands = Vec(UInt(floatType.width() bits), 2) 23 | } 24 | 25 | class IEEEFPUResponse(val floatType: FloatType) extends Bundle { 26 | // result 27 | val res = UInt(floatType.width() bits) 28 | } 29 | 30 | class IEEEFPU extends Component { 31 | val floatType = FloatS 32 | val stages = 4 33 | val io = new Bundle { 34 | val req = slave(Flow(new IEEEFPURequest(floatType))) 35 | val resp = master(Flow(new IEEEFPUResponse(floatType))) 36 | } 37 | 38 | val fpu = new IEEEFPUBlackBox(floatType) 39 | fpu.rmode := 0 40 | fpu.fpu_op := io.req.op.asBits.resized 41 | fpu.opa := io.req.operands(0).asBits 42 | fpu.opb := io.req.operands(1).asBits 43 | io.resp.res := fpu.out.asUInt 44 | 45 | io.resp.valid := Delay(io.req.valid, stages) 46 | } 47 | 48 | class IEEEFPUBlackBox(val floatType: FloatType) extends BlackBox { 49 | val clk = in(Bool()) 50 | val rmode = in(Bits(2 bits)) 51 | val fpu_op = in(Bits(3 bits)) 52 | val opa = in(Bits(floatType.width() bits)) 53 | val opb = in(Bits(floatType.width() bits)) 54 | 55 | val out = spinal.core.out(Bits(floatType.width() bits)) 56 | val inf = spinal.core.out(Bool()) 57 | val snan = spinal.core.out(Bool()) 58 | val qnan = spinal.core.out(Bool()) 59 | val ine = spinal.core.out(Bool()) 60 | val overflow = spinal.core.out(Bool()) 61 | val underflow = spinal.core.out(Bool()) 62 | val zero = spinal.core.out(Bool()) 63 | val div_by_zero = spinal.core.out(Bool()) 64 | 65 | setDefinitionName("fpu") 66 | 67 | // Map the clk 68 | mapCurrentClockDomain( 69 | clock = clk 70 | ) 71 | 72 | val files = Seq( 73 | "except.v", 74 | "fpu.v", 75 | "post_norm.v", 76 | "pre_norm_fmul.v", 77 | "pre_norm.v", 78 | "primitives.v" 79 | ) 80 | for (file <- files) { 81 | val res = getClass().getResource(s"/opencores/${file}"); 82 | addRTLPath(Paths.get(res.toURI()).toFile().getAbsolutePath()) 83 | } 84 | } 85 | 86 | object IEEEFPU extends App { 87 | val verilog = spinal.core.SpinalConfig(netlistFileName = "OpencoresIEEEFPU.v") 88 | verilog.generateVerilog(new IEEEFPU()) 89 | } 90 | 91 | object IEEEFPUSynth extends App { 92 | val files = Seq( 93 | "except.v", 94 | "fpu.v", 95 | "post_norm.v", 96 | "pre_norm_fmul.v", 97 | "pre_norm.v", 98 | "primitives.v" 99 | ) 100 | val sources = for (file <- files) yield { 101 | s"./fpu-wrappers/resources/opencores/${file}" 102 | } 103 | 104 | Synthesis.build( 105 | Seq( 106 | s"OpencoresIEEEFPU.v" 107 | ) ++ sources, 108 | s"IEEEFPU_1", 109 | s"opencores_IEEEFPU" 110 | ) 111 | } 112 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/sifive.scala: -------------------------------------------------------------------------------- 1 | package sifive { 2 | package enterprise { 3 | package firrtl { 4 | import _root_.firrtl.annotations._ 5 | 6 | case class NestedPrefixModulesAnnotation( 7 | val target: Target, 8 | prefix: String, 9 | inclusive: Boolean 10 | ) extends SingleTargetAnnotation[Target] { 11 | 12 | def duplicate(n: Target): Annotation = 13 | NestedPrefixModulesAnnotation(target, prefix, inclusive) 14 | } 15 | } 16 | 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /fpu-wrappers/src/fpuwrapper/synthesis.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import java.nio.charset.StandardCharsets 4 | import java.nio.file.Files 5 | import java.nio.file.Paths 6 | import java.nio.file.StandardCopyOption 7 | import scala.io.Source 8 | import scala.sys.process._ 9 | import scala.language.postfixOps 10 | 11 | /** Synthesize code with Synopsys Design Compiler 12 | */ 13 | object Synthesis { 14 | def build( 15 | sources: Seq[String], 16 | toplevelName: String, 17 | folderName: String = null 18 | ) = { 19 | val actualFolderName = if (folderName == null) { 20 | toplevelName 21 | } else { 22 | folderName 23 | } 24 | 25 | val dir = s"synWorkspace/${actualFolderName}/" 26 | Files.createDirectories(Paths.get(dir)) 27 | 28 | // copy files to synWorkspace 29 | val names = (for (file <- sources) yield { 30 | val name = Paths.get(file).getFileName() 31 | Files.copy( 32 | Paths.get(file), 33 | Paths.get(s"${dir}/${name}"), 34 | StandardCopyOption.REPLACE_EXISTING 35 | ) 36 | name.toString() 37 | }).toList 38 | 39 | // apply template 40 | var template = Source.fromResource("syn.tcl").mkString 41 | template = template.replace( 42 | "INPUT_VERILOG", 43 | names.filter((s) => s.endsWith(".v")).mkString(" ") 44 | ) 45 | template = template.replace( 46 | "INPUT_SYSTEMVERILOG", 47 | names.filter((s) => s.endsWith(".sv")).mkString(" ") 48 | ) 49 | template = template.replace( 50 | "INPUT_VHDL", 51 | names.filter((s) => s.endsWith(".vhdl")).mkString(" ") 52 | ) 53 | template = template.replace("TOPLEVEL_NAME", toplevelName) 54 | 55 | Files.write( 56 | Paths.get(s"${dir}/syn.tcl"), 57 | template.getBytes(StandardCharsets.UTF_8) 58 | ) 59 | 60 | Process("dc_shell -f syn.tcl", new java.io.File(dir)) ! 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/common.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper 2 | 3 | import svsim._ 4 | import chisel3.RawModule 5 | import chisel3.simulator._ 6 | import java.nio.file.Files 7 | import java.io.File 8 | import scala.reflect.io.Directory 9 | 10 | // custom EphemeralSimulator to add options to verilator 11 | 12 | object Simulator extends PeekPokeAPI { 13 | 14 | def simulate[T <: RawModule]( 15 | module: => T 16 | )(body: (T) => Unit): Unit = { 17 | makeSimulator.simulate(module)({ module => body(module.wrapped) }).result 18 | } 19 | 20 | private class DefaultSimulator(val workspacePath: String) 21 | extends SingleBackendSimulator[verilator.Backend] { 22 | val backend = verilator.Backend.initializeFromProcessEnvironment() 23 | val tag = "default" 24 | val commonCompilationSettings = CommonCompilationSettings() 25 | val backendSpecificCompilationSettings = 26 | verilator.Backend.CompilationSettings( 27 | traceStyle = 28 | Some(verilator.Backend.CompilationSettings.TraceStyle.Vcd()), 29 | // for fpnew 30 | disabledWarnings = Seq( 31 | "UNOPTFLAT", 32 | "CASEOVERLAP", 33 | "UNSIGNED", 34 | "WIDTHTRUNC", 35 | "WIDTHEXPAND", 36 | "ASCRANGE", 37 | "PINMISSING" 38 | ) 39 | ) 40 | 41 | // Try to clean up temporary workspace if possible 42 | sys.addShutdownHook { 43 | (new Directory(new File(workspacePath))).deleteRecursively() 44 | } 45 | } 46 | private def makeSimulator: DefaultSimulator = { 47 | // TODO: Use ProcessHandle when we can drop Java 8 support 48 | // val id = ProcessHandle.current().pid().toString() 49 | val id = java.lang.management.ManagementFactory.getRuntimeMXBean().getName() 50 | val className = getClass().getName().stripSuffix("$") 51 | new DefaultSimulator( 52 | Files.createTempDirectory(s"${className}_${id}_").toString 53 | ) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/flopoco/FPCFExpTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import org.scalatest.funsuite.AnyFunSuite 4 | import spinal.core._ 5 | import spinal.core.sim._ 6 | import fpuwrapper.FloatS 7 | 8 | // FPCFExp's testbench 9 | class FPCFExpTest extends AnyFunSuite { 10 | test("FPCFExp") { 11 | val stages = 1 12 | SimConfig.withWave.withIVerilog 13 | .doSim( 14 | new FPCFExp( 15 | FloatS, 16 | 2, 17 | stages 18 | ) 19 | ) { dut => 20 | dut.clockDomain.forkStimulus(period = 10) 21 | dut.clockDomain.waitRisingEdge() 22 | 23 | var cycles = 0 24 | dut.clockDomain.onRisingEdges { 25 | cycles = cycles + 1 26 | } 27 | 28 | dut.io.req.valid #= false 29 | sleep(160) 30 | 31 | dut.clockDomain.waitRisingEdge() 32 | dut.io.req.valid #= true 33 | dut.io.req.a(0) #= BigInt("13f800000", 16) // 1.0 34 | dut.io.req.a(1) #= BigInt("140000000", 16) // 2.0 35 | 36 | val beginCycles = cycles 37 | dut.clockDomain.waitFallingEdgeWhere { 38 | dut.io.resp.valid.toBoolean 39 | } 40 | assert(cycles - beginCycles == stages) 41 | assert( 42 | dut.io.resp.res(0).toBigInt == BigInt("1402df854", 16) 43 | ) // 2.718281828459045 44 | assert( 45 | dut.io.resp.res(1).toBigInt == BigInt("140ec7326", 16) 46 | ) // 7.38905609893065 47 | 48 | sleep(100) 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/flopoco/IEEEFExpTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import org.scalatest.funsuite.AnyFunSuite 4 | import spinal.core._ 5 | import spinal.core.sim._ 6 | import fpuwrapper.FloatS 7 | 8 | // IEEEFExp's testbench 9 | class IEEEFExpTest extends AnyFunSuite { 10 | test("IEEEFExp") { 11 | val stages = 1 12 | SimConfig.withWave.withIVerilog 13 | .doSim( 14 | new IEEEFExp( 15 | FloatS, 16 | 2, 17 | stages 18 | ) 19 | ) { dut => 20 | dut.clockDomain.forkStimulus(period = 10) 21 | dut.clockDomain.waitRisingEdge() 22 | 23 | var cycles = 0 24 | dut.clockDomain.onRisingEdges { 25 | cycles = cycles + 1 26 | } 27 | 28 | dut.io.req.valid #= false 29 | sleep(160) 30 | 31 | dut.clockDomain.waitRisingEdge() 32 | dut.io.req.valid #= true 33 | dut.io.req.a(0) #= BigInt("3f800000", 16) // 1.0 34 | dut.io.req.a(1) #= BigInt("40000000", 16) // 2.0 35 | 36 | val beginCycles = cycles 37 | dut.clockDomain.waitFallingEdgeWhere { 38 | dut.io.resp.valid.toBoolean 39 | } 40 | assert(cycles - beginCycles == stages) 41 | assert( 42 | dut.io.resp.res(0).toBigInt == BigInt("402df854", 16) 43 | ) // 2.718281828459045 44 | assert( 45 | dut.io.resp.res(1).toBigInt == BigInt("40ec7326", 16) 46 | ) // 7.38905609893065 47 | 48 | sleep(100) 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/flopoco/IEEEFMATest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.flopoco 2 | 3 | import org.scalatest.funsuite.AnyFunSuite 4 | import spinal.core._ 5 | import spinal.core.sim._ 6 | import fpuwrapper.FloatS 7 | 8 | // IEEEFMA's testbench 9 | class IEEEFMATest extends AnyFunSuite { 10 | test("IEEEFMA") { 11 | val stages = 3 12 | SimConfig.withWave.withIVerilog 13 | .doSim( 14 | new IEEEFMA( 15 | FloatS, 16 | 2, 17 | stages 18 | ) 19 | ) { dut => 20 | dut.clockDomain.forkStimulus(period = 10) 21 | dut.clockDomain.waitRisingEdge() 22 | 23 | var cycles = 0 24 | dut.clockDomain.onRisingEdges { 25 | cycles = cycles + 1 26 | } 27 | 28 | dut.io.req.valid #= false 29 | sleep(160) 30 | 31 | dut.clockDomain.waitRisingEdge() 32 | dut.io.req.valid #= true 33 | dut.io.req.operands(0)(0) #= BigInt("3f800000", 16) // 1.0 34 | dut.io.req.operands(1)(0) #= BigInt("40000000", 16) // 2.0 35 | dut.io.req.operands(2)(0) #= BigInt("40400000", 16) // 3.0 36 | dut.io.req.operands(0)(1) #= BigInt("40800000", 16) // 4.0 37 | dut.io.req.operands(1)(1) #= BigInt("40a00000", 16) // 5.0 38 | dut.io.req.operands(2)(1) #= BigInt("40c00000", 16) // 6.0 39 | dut.io.req.op #= IEEEFMAOp.FMADD 40 | 41 | val beginCycles = cycles 42 | dut.clockDomain.waitFallingEdgeWhere { 43 | dut.io.resp.valid.toBoolean 44 | } 45 | assert(cycles - beginCycles == stages) 46 | assert(dut.io.resp.res(0).toBigInt == BigInt("40a00000", 16)) // 5.0 47 | assert(dut.io.resp.res(1).toBigInt == BigInt("41d00000", 16)) // 26.0 48 | 49 | sleep(100) 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/fpnew/IEEEFPUTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fpnew 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import fpuwrapper.Simulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | class IEEEFPUTest extends AnyFreeSpec { 10 | // fpnew does not support icarus verilog 11 | for (stages <- 1 to 5) { 12 | s"IEEEFPU of ${stages} stages should work" in { 13 | simulate(new IEEEFPU(FloatS, 2, stages)) { dut => 14 | dut.clock.step(16) 15 | 16 | def enqueueReq() = { 17 | dut.io.req.valid.poke(true.B) 18 | while (dut.io.req.ready.peek().litToBoolean == false) { 19 | dut.clock.step(1) 20 | } 21 | dut.clock.step(1) 22 | dut.io.req.valid.poke(false.B) 23 | } 24 | 25 | def expectResp()(x: IEEEFPU => Unit) = { 26 | val expectedCycles = stages - 1 27 | var cycles = 0 28 | dut.io.resp.ready.poke(true.B) 29 | while (dut.io.resp.valid.peek().litToBoolean == false) { 30 | dut.clock.step(1) 31 | cycles += 1 32 | } 33 | dut.io.resp.valid.expect(true.B) 34 | x(dut) 35 | dut.io.resp.ready.poke(true.B) 36 | dut.clock.step(1) 37 | assert( 38 | cycles == expectedCycles, 39 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 40 | ) 41 | } 42 | 43 | dut.io.req.bits.operands(0).poke("h3f8000003f800000".U) // 1 44 | dut.io.req.bits.operands(1).poke("h4000000040000000".U) // 2 45 | dut.io.req.bits.operands(2).poke("h4040000040400000".U) // 3 46 | dut.io.req.bits.op.poke(FPOperation.FMADD) 47 | dut.io.req.bits.srcFormat.poke(FPFloatFormat.Fp32) 48 | dut.io.req.bits.dstFormat.poke(FPFloatFormat.Fp32) 49 | enqueueReq() 50 | expectResp() { dut => 51 | dut.io.resp.bits.result.expect("h40a0000040a00000".U) 52 | } // 5 53 | } 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/fudian/IEEEFAddTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fudian 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | class IEEEFAddTest extends AnyFreeSpec { 10 | for (stages <- 1 to 5) { 11 | s"IEEEFAdd of ${stages} stages should work" in { 12 | simulate(new IEEEFAdd(FloatS, 2, stages)) { dut => 13 | dut.clock.step(16) 14 | 15 | def enqueueReq() = { 16 | dut.io.req.valid.poke(true.B) 17 | dut.clock.step(1) 18 | dut.io.req.valid.poke(false.B) 19 | } 20 | 21 | def expectResp()(x: IEEEFAdd => Unit) = { 22 | val expectedCycles = stages - 1 23 | var cycles = 0 24 | while (dut.io.resp.valid.peek().litToBoolean == false) { 25 | dut.clock.step(1) 26 | cycles += 1 27 | } 28 | dut.io.resp.valid.expect(true.B) 29 | x(dut) 30 | dut.clock.step(1) 31 | assert( 32 | cycles == expectedCycles, 33 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 34 | ) 35 | } 36 | 37 | dut.io.req.bits.a(0).poke("h40000000".U) // 2 38 | dut.io.req.bits.b(0).poke("h40400000".U) // 3 39 | dut.io.req.bits.a(1).poke("h40800000".U) // 4 40 | dut.io.req.bits.b(1).poke("h40a00000".U) // 5 41 | enqueueReq() 42 | expectResp() { dut => 43 | dut.io.resp.bits.res(0).expect("h40a00000".U) // 5 44 | dut.io.resp.bits.res(1).expect("h41100000".U) // 9 45 | } 46 | } 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/fudian/IEEEFDivSqrtTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fudian 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import fpuwrapper.Simulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | class IEEEFDivSqrtTest extends AnyFreeSpec { 10 | s"IEEEFDivSqrt should work" in { 11 | simulate(new IEEEFDivSqrt(FloatS, 2)) { dut => 12 | dut.reset.poke(true.B) 13 | dut.clock.step() 14 | dut.reset.poke(false.B) 15 | dut.clock.step() 16 | 17 | dut.clock.step(16) 18 | 19 | def enqueueReq() = { 20 | dut.io.req.valid.poke(true.B) 21 | dut.clock.step(1) 22 | dut.io.req.valid.poke(false.B) 23 | } 24 | 25 | def expectResp()(x: IEEEFDivSqrt => Unit) = { 26 | while (dut.io.resp.valid.peek().litToBoolean == false) { 27 | dut.clock.step(1) 28 | } 29 | dut.io.resp.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | } 33 | 34 | dut.io.req.bits.a(0).poke("h3f800000".U) // 1.0 35 | dut.io.req.bits.b(0).poke("h40000000".U) // 2.0 36 | dut.io.req.bits.a(1).poke("h40400000".U) // 3.0 37 | dut.io.req.bits.b(1).poke("h40800000".U) // 4.0 38 | dut.io.req.bits.op.poke(IEEEFDivSqrtOp.DIV) 39 | enqueueReq() 40 | expectResp() { dut => 41 | dut.io.resp.bits.res(0).expect("h3f000000".U) // 0.5 42 | dut.io.resp.bits.res(1).expect("h3f400000".U) // 0.75 43 | } 44 | 45 | dut.io.req.bits.a(0).poke("h40800000".U) // 4.0 46 | dut.io.req.bits.b(0).poke("h00000000".U) // 0.0 47 | dut.io.req.bits.a(1).poke("h41100000".U) // 9.0 48 | dut.io.req.bits.b(1).poke("h00000000".U) // 0.0 49 | dut.io.req.bits.op.poke(IEEEFDivSqrtOp.SQRT) 50 | enqueueReq() 51 | expectResp() { dut => 52 | dut.io.resp.bits.res(0).expect("h40000000".U) // 2.0 53 | dut.io.resp.bits.res(1).expect("h40400000".U) // 3.0 54 | } 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/fudian/IEEEFMATest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.fudian 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | class IEEEFMATest extends AnyFreeSpec { 10 | for (stages <- 1 to 5) { 11 | s"IEEEFMA of ${stages} stages should work" in { 12 | simulate(new IEEEFMA(FloatS, 2, stages)) { dut => 13 | dut.clock.step(16) 14 | 15 | def enqueueReq() = { 16 | dut.io.req.valid.poke(true.B) 17 | dut.clock.step(1) 18 | dut.io.req.valid.poke(false.B) 19 | } 20 | 21 | def expectResp()(x: IEEEFMA => Unit) = { 22 | val expectedCycles = stages - 1 23 | var cycles = 0 24 | while (dut.io.resp.valid.peek().litToBoolean == false) { 25 | dut.clock.step(1) 26 | cycles += 1 27 | } 28 | dut.io.resp.valid.expect(true.B) 29 | x(dut) 30 | dut.clock.step(1) 31 | assert( 32 | cycles == expectedCycles, 33 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 34 | ) 35 | } 36 | 37 | dut.io.req.bits.operands(0)(0).poke("h3f800000".U) // 1 38 | dut.io.req.bits.operands(1)(0).poke("h40000000".U) // 2 39 | dut.io.req.bits.operands(2)(0).poke("h40400000".U) // 3 40 | dut.io.req.bits.operands(0)(1).poke("h40800000".U) // 4 41 | dut.io.req.bits.operands(1)(1).poke("h40a00000".U) // 5 42 | dut.io.req.bits.operands(2)(1).poke("h40c00000".U) // 6 43 | enqueueReq() 44 | expectResp() { dut => 45 | dut.io.resp.bits.res(0).expect("h40a00000".U) // 5 46 | dut.io.resp.bits.res(1).expect("h41d00000".U) // 26 47 | } 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/hardfloat/HFFCmpTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | 10 | class HFFCmpTest extends AnyFreeSpec { 11 | for (stages <- 1 to 5) { 12 | s"HFFCmp of ${stages} stages should work" in { 13 | simulate(new HFFCmp(FloatS, 2, stages)) { dut => 14 | dut.clock.step(16) 15 | 16 | def enqueueReq() = { 17 | dut.io.req.valid.poke(true.B) 18 | dut.clock.step(1) 19 | dut.io.req.valid.poke(false.B) 20 | } 21 | 22 | def expectResp()(x: HFFCmp => Unit) = { 23 | val expectedCycles = stages - 1 24 | var cycles = 0 25 | while (dut.io.resp.valid.peek().litToBoolean == false) { 26 | dut.clock.step(1) 27 | cycles += 1 28 | } 29 | dut.io.resp.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | assert( 33 | cycles == expectedCycles, 34 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 35 | ) 36 | } 37 | 38 | dut.io.req.bits.r1(0).poke("h080000000".U) // 1 39 | dut.io.req.bits.r2(0).poke("h000000000".U) // 0 40 | dut.io.req.bits.r1(1).poke("h083480000".U) // 100 41 | dut.io.req.bits.r2(1).poke("h083460000".U) // 99 42 | dut.io.req.bits.op.poke(HFFCmpOp.GE) 43 | enqueueReq() 44 | expectResp() { dut => 45 | dut.io.resp.bits.res(0).expect("h00000001".U) // true 46 | dut.io.resp.bits.res(1).expect("h00000001".U) // true 47 | } 48 | 49 | dut.io.req.bits.r1(0).poke("h000000000".U) // 0 50 | dut.io.req.bits.r2(0).poke("h080000000".U) // 1 51 | dut.io.req.bits.r1(1).poke("h083460000".U) // 99 52 | dut.io.req.bits.r2(1).poke("h083480000".U) // 100 53 | dut.io.req.bits.op.poke(HFFCmpOp.GE) 54 | enqueueReq() 55 | expectResp() { dut => 56 | dut.io.resp.bits.res(0).expect("h00000000".U) // false 57 | dut.io.resp.bits.res(1).expect("h00000000".U) // false 58 | } 59 | 60 | dut.io.req.bits.r1(0).poke("h180000000".U) // -1 61 | dut.io.req.bits.r2(0).poke("h000000000".U) // 0 62 | dut.io.req.bits.r1(1).poke("h183460000".U) // -99 63 | dut.io.req.bits.r2(1).poke("h183480000".U) // -100 64 | dut.io.req.bits.op.poke(HFFCmpOp.LT) 65 | enqueueReq() 66 | expectResp() { dut => 67 | dut.io.resp.bits.res(0).expect("h00000001".U) // true 68 | dut.io.resp.bits.res(1).expect("h00000000".U) // false 69 | } 70 | } 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/hardfloat/HFFDivSqrtTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import fpuwrapper.Simulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | class HFFDivSqrtTest extends AnyFreeSpec { 10 | s"HFFDivSqrt should work" in { 11 | simulate(new HFFDivSqrt(FloatS, 2)) { dut => 12 | dut.reset.poke(true.B) 13 | dut.clock.step() 14 | dut.reset.poke(false.B) 15 | dut.clock.step() 16 | 17 | dut.clock.step(16) 18 | 19 | def enqueueReq() = { 20 | dut.io.req.valid.poke(true.B) 21 | dut.clock.step(1) 22 | dut.io.req.valid.poke(false.B) 23 | } 24 | 25 | def expectResp()(x: HFFDivSqrt => Unit) = { 26 | while (dut.io.resp.valid.peek().litToBoolean == false) { 27 | dut.clock.step(1) 28 | } 29 | dut.io.resp.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | } 33 | 34 | dut.io.req.bits.a(0).poke("h080000000".U) // 1.0 35 | dut.io.req.bits.b(0).poke("h080800000".U) // 2.0 36 | dut.io.req.bits.a(1).poke("h080c00000".U) // 3.0 37 | dut.io.req.bits.b(1).poke("h081000000".U) // 4.0 38 | dut.io.req.bits.op.poke(HFFDivSqrtOp.DIV) 39 | enqueueReq() 40 | expectResp() { dut => 41 | dut.io.resp.bits.res(0).expect("h07f800000".U) // 0.5 42 | dut.io.resp.bits.res(1).expect("h07fc00000".U) // 0.75 43 | } 44 | 45 | dut.io.req.bits.a(0).poke("h080000000".U) // 1.0 46 | dut.io.req.bits.b(0).poke("h180800000".U) // -2.0 47 | dut.io.req.bits.a(1).poke("h080c00000".U) // 3.0 48 | dut.io.req.bits.b(1).poke("h181000000".U) // -4.0 49 | dut.io.req.bits.op.poke(HFFDivSqrtOp.DIV) 50 | enqueueReq() 51 | expectResp() { dut => 52 | dut.io.resp.bits.res(0).expect("h17f800000".U) // -0.5 53 | dut.io.resp.bits.res(1).expect("h17fc00000".U) // -0.75 54 | } 55 | 56 | dut.io.req.bits.a(0).poke("h081000000".U) // 4.0 57 | dut.io.req.bits.b(0).poke("h000000000".U) // 0.0 58 | dut.io.req.bits.a(1).poke("h081900000".U) // 9.0 59 | dut.io.req.bits.b(1).poke("h000000000".U) // 0.0 60 | dut.io.req.bits.op.poke(HFFDivSqrtOp.SQRT) 61 | enqueueReq() 62 | expectResp() { dut => 63 | dut.io.resp.bits.res(0).expect("h080800000".U) // 2.0 64 | dut.io.resp.bits.res(1).expect("h080c00000".U) // 3.0 65 | } 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/hardfloat/HFFMATest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | 10 | class HFFMATest extends AnyFreeSpec { 11 | for (stages <- 1 to 5) { 12 | s"HFFMA of ${stages} stages should work" in { 13 | simulate(new HFFMA(FloatS, 2, stages)) { dut => 14 | dut.clock.step(16) 15 | 16 | def enqueueReq() = { 17 | dut.io.req.valid.poke(true.B) 18 | dut.clock.step(1) 19 | dut.io.req.valid.poke(false.B) 20 | } 21 | 22 | def expectResp()(x: HFFMA => Unit) = { 23 | val expectedCycles = stages - 1 24 | var cycles = 0 25 | while (dut.io.resp.valid.peek().litToBoolean == false) { 26 | dut.clock.step(1) 27 | cycles += 1 28 | } 29 | dut.io.resp.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | assert( 33 | cycles == expectedCycles, 34 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 35 | ) 36 | } 37 | 38 | dut.io.req.bits.operands(0)(0).poke("h080000000".U) // 1 39 | dut.io.req.bits.operands(1)(0).poke("h080800000".U) // 2 40 | dut.io.req.bits.operands(2)(0).poke("h080c00000".U) // 3 41 | dut.io.req.bits.operands(0)(1).poke("h081000000".U) // 4 42 | dut.io.req.bits.operands(1)(1).poke("h081200000".U) // 5 43 | dut.io.req.bits.operands(2)(1).poke("h081400000".U) // 6 44 | dut.io.req.bits.op.poke(FMAOp.FMADD) 45 | enqueueReq() 46 | expectResp() { dut => 47 | dut.io.resp.bits.res(0).expect("h081200000".U) // 5 48 | dut.io.resp.bits.res(1).expect("h082500000".U) // 26 49 | } 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/hardfloat/HFMulTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | 10 | class HFFMulTest extends AnyFreeSpec { 11 | for (stages <- 1 to 5) { 12 | s"HFFMul of ${stages} stages should work" in { 13 | simulate(new HFFMul(FloatS, 2, stages)) { dut => 14 | dut.clock.step(16) 15 | 16 | def enqueueReq(): Unit = { 17 | dut.io.req.valid.poke(true.B) 18 | dut.clock.step(1) 19 | dut.io.req.valid.poke(false.B) 20 | } 21 | 22 | def expectResp()(x: HFFMul => Unit) = { 23 | val expectedCycles = stages - 1 24 | var cycles = 0 25 | while (dut.io.resp.valid.peek().litToBoolean == false) { 26 | dut.clock.step(1) 27 | cycles += 1 28 | } 29 | dut.io.resp.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | assert( 33 | cycles == expectedCycles, 34 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 35 | ) 36 | } 37 | 38 | dut.io.req.bits.a(0).poke("h080000000".U) // 1 39 | dut.io.req.bits.b(0).poke("h080800000".U) // 2 40 | dut.io.req.bits.a(1).poke("h080c00000".U) // 3 41 | dut.io.req.bits.b(1).poke("h081000000".U) // 4 42 | enqueueReq() 43 | expectResp() { dut => 44 | dut.io.resp.bits.res(0).expect("h080800000".U) // 2 45 | dut.io.resp.bits.res(1).expect("h081c00000".U) // 12 46 | } 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/hardfloat/IEEEFMATest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | 10 | class IEEEFMATest extends AnyFreeSpec { 11 | for (stages <- 1 to 5) { 12 | s"IEEEFMA of ${stages} stages should work" in { 13 | simulate(new IEEEFMA(FloatS, 2, stages)) { dut => 14 | dut.clock.step(16) 15 | 16 | def enqueueReq() = { 17 | dut.io.req.valid.poke(true.B) 18 | dut.clock.step(1) 19 | dut.io.req.valid.poke(false.B) 20 | } 21 | 22 | def expectResp()(x: IEEEFMA => Unit) = { 23 | val expectedCycles = stages - 1 24 | var cycles = 0 25 | while (dut.io.resp.valid.peek().litToBoolean == false) { 26 | dut.clock.step(1) 27 | cycles += 1 28 | } 29 | dut.io.resp.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | assert( 33 | cycles == expectedCycles, 34 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 35 | ) 36 | } 37 | 38 | dut.io.req.bits.operands(0)(0).poke("h3f800000".U) // 1 39 | dut.io.req.bits.operands(1)(0).poke("h40000000".U) // 2 40 | dut.io.req.bits.operands(2)(0).poke("h40400000".U) // 3 41 | dut.io.req.bits.operands(0)(1).poke("h40800000".U) // 4 42 | dut.io.req.bits.operands(1)(1).poke("h40a00000".U) // 5 43 | dut.io.req.bits.operands(2)(1).poke("h40c00000".U) // 6 44 | dut.io.req.bits.op.poke(FMAOp.FMADD) 45 | enqueueReq() 46 | expectResp() { dut => 47 | dut.io.resp.bits.res(0).expect("h40a00000".U) // 5 48 | dut.io.resp.bits.res(1).expect("h41d00000".U) // 26 49 | } 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/hardfloat/IEEEToHFTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.hardfloat 2 | 3 | import chisel3._ 4 | import chisel3.experimental.BundleLiterals._ 5 | import chisel3.simulator.EphemeralSimulator._ 6 | import org.scalatest.freespec.AnyFreeSpec 7 | import fpuwrapper.FloatS 8 | 9 | 10 | class IEEEToHFTest extends AnyFreeSpec { 11 | for (stages <- 1 to 5) { 12 | s"IEEEToHF of ${stages} stages should work" in { 13 | simulate(new IEEEToHF(FloatS, 2, stages)) { dut => 14 | dut.clock.step(16) 15 | 16 | def enqueueReq() = { 17 | dut.io.float.valid.poke(true.B) 18 | dut.clock.step(1) 19 | dut.io.float.valid.poke(false.B) 20 | } 21 | 22 | def expectResp()(x: IEEEToHF => Unit) = { 23 | val expectedCycles = stages - 1 24 | var cycles = 0 25 | while (dut.io.hardfloat.valid.peek().litToBoolean == false) { 26 | dut.clock.step(1) 27 | cycles += 1 28 | } 29 | dut.io.hardfloat.valid.expect(true.B) 30 | x(dut) 31 | dut.clock.step(1) 32 | assert( 33 | cycles == expectedCycles, 34 | s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}" 35 | ) 36 | } 37 | 38 | dut.io.float.bits(0).poke("h03f800000".U) // 1 39 | dut.io.float.bits(1).poke("h042c80000".U) // 100 40 | enqueueReq() 41 | expectResp() { dut => 42 | dut.io.hardfloat.bits(0).expect("h080000000".U) 43 | dut.io.hardfloat.bits(1).expect("h083480000".U) 44 | } 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /fpu-wrappers/test/src/fpuwrapper/opencores/IEEEFPUTest.scala: -------------------------------------------------------------------------------- 1 | package fpuwrapper.opencores 2 | 3 | import org.scalatest.funsuite.AnyFunSuite 4 | import spinal.core._ 5 | import spinal.core.sim._ 6 | 7 | // IEEEFPU's testbench 8 | class IEEEFPUTest extends AnyFunSuite { 9 | test("IEEEFPU") { 10 | SimConfig.withWave.withIVerilog 11 | .doSim( 12 | new IEEEFPU() 13 | ) { dut => 14 | dut.clockDomain.forkStimulus(period = 10) 15 | dut.clockDomain.waitRisingEdge() 16 | 17 | dut.io.req.valid #= false 18 | sleep(160) 19 | 20 | dut.clockDomain.waitSampling() 21 | dut.io.req.valid #= true 22 | dut.io.req.operands(0) #= BigInt("3f800000", 16) // 1.0 23 | dut.io.req.operands(1) #= BigInt("40000000", 16) // 2.0 24 | dut.io.req.op #= IEEEFPUOp.FADD 25 | 26 | dut.clockDomain.waitSampling() 27 | dut.io.req.valid #= false 28 | dut.clockDomain.waitSamplingWhere { 29 | dut.io.resp.valid.toBoolean 30 | } 31 | assert(dut.io.resp.res.toBigInt == BigInt("40400000", 16)) // 3.0 32 | 33 | dut.clockDomain.waitSampling() 34 | dut.io.req.valid #= true 35 | dut.io.req.operands(0) #= BigInt("40400000", 16) // 3.0 36 | dut.io.req.operands(1) #= BigInt("40800000", 16) // 4.0 37 | 38 | dut.clockDomain.waitSampling() 39 | dut.io.req.valid #= false 40 | dut.clockDomain.waitSamplingWhere { 41 | dut.io.resp.valid.toBoolean 42 | } 43 | assert(dut.io.resp.res.toBigInt == BigInt("40e00000", 16)) // 7.0 44 | 45 | sleep(100) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/e42377bbe5ef06ffec13eebf7949d72793ed66f9.tar.gz") {} 2 | }: 3 | 4 | pkgs.mkShell { 5 | buildInputs = with pkgs; [ 6 | circt # 1.59.0 7 | mill 8 | boost 9 | verilog # iverilog 10 | verilator 11 | ]; 12 | } 13 | -------------------------------------------------------------------------------- /synWorkspace/.gitignore: -------------------------------------------------------------------------------- 1 | alib-* 2 | formality_svf 3 | FM_WORK 4 | *.log 5 | *.svf 6 | *.txt 7 | *.ddc 8 | *.sdc 9 | *.sdf 10 | *.tcl 11 | *.syn 12 | *.mr 13 | *.vhdl 14 | *.vif 15 | *.lck 16 | -------------------------------------------------------------------------------- /synWorkspace/report.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import collections 4 | 5 | data = collections.defaultdict(lambda :{}) 6 | 7 | for file_name in glob.glob('*/*_report_area.txt'): 8 | parts = file_name.split('/') 9 | key = parts[0] 10 | with open(file_name) as f: 11 | for line in f: 12 | line = line.strip() 13 | if line.startswith('Combinational area:'): 14 | area = line.split(' ')[-1] 15 | data[key]['comb_area'] = float(area) 16 | if line.startswith('Buf/Inv area:'): 17 | area = line.split(' ')[-1] 18 | data[key]['buf_area'] = float(area) 19 | if line.startswith('Noncombinational area:'): 20 | area = line.split(' ')[-1] 21 | data[key]['non_comb_area'] = float(area) 22 | if line.startswith('Macro/Black Box area:'): 23 | area = line.split(' ')[-1] 24 | data[key]['macro_area'] = float(area) 25 | if line.startswith('Total area:'): 26 | area = line.split(' ')[-1] 27 | data[key]['total_area'] = float(area) 28 | if line.startswith('Number of cells:'): 29 | cells = line.split(' ')[-1] 30 | data[key]['cells'] = int(cells) 31 | 32 | for file_name in glob.glob('*/*_report_power.txt'): 33 | parts = file_name.split('/') 34 | key = parts[0] 35 | with open(file_name) as f: 36 | for line in f: 37 | line = line.strip() 38 | if '100.0' in line: 39 | parts = list(filter(lambda s: len(s) > 0, line.split(' '))) 40 | total_power = parts[-2] 41 | data[key]['total_power'] = float(total_power) 42 | leakage_power = parts[-3] 43 | data[key]['leakage_power'] = float(leakage_power) 44 | internal_power = parts[-4] 45 | data[key]['internal_power'] = float(internal_power) 46 | switch_power = parts[-5] 47 | data[key]['switch_power'] = float(switch_power) 48 | 49 | for file_name in glob.glob('*/*_report_timing_setup.txt'): 50 | parts = file_name.split('/') 51 | key = parts[0] 52 | with open(file_name) as f: 53 | for line in f: 54 | line = line.strip() 55 | if 'data arrival time' in line: 56 | time = line.split(' ')[-1] 57 | data[key]['max_comb_delay'] = float(time) 58 | break 59 | 60 | keys = data.keys() 61 | for key in sorted(keys): 62 | value = data[key] 63 | print('{}:'.format(key)) 64 | print(' Cells: {}'.format(value['cells'])) 65 | print(' Area: Comb={:.0f} Buf={:.0f} NonComb={:.0f} Macro={:.0f} Total={:.0f}'.format(value['comb_area'], value['buf_area'], value['non_comb_area'], value['macro_area'], value['total_area'])) 66 | print(' Power:', end='') 67 | if 'switch_power' in value: 68 | print(' Switch({:.3f} mW)'.format(value['switch_power']), end='') 69 | if 'internal_power' in value: 70 | print(' Internal({:.3f} mW)'.format(value['internal_power']), end='') 71 | if 'leakage_power' in value: 72 | print(' Leakage({:.3f} mW)'.format(value['leakage_power'] / 1000), end='') 73 | if 'total_power' in value: 74 | print(' Total({:.3f} mW)'.format(value['total_power']), end='') 75 | print() 76 | print(' Max Freq: {:.0f} MHz ({:.2f} ns)'.format(1000.0 / value['max_comb_delay'], value['max_comb_delay'])) -------------------------------------------------------------------------------- /synWorkspace/report.yaml: -------------------------------------------------------------------------------- 1 | IEEEFMA_D1l2s_fudian: 2 | Cells: 22859 3 | Area: Comb=39987 Buf=4012 NonComb=789 Macro=0 Total=40776 4 | Power: Switch(60.202 mW) Internal(21.814 mW) Leakage(1.450 mW) Total(83.467 mW) 5 | Max Freq: 690 MHz (1.45 ns) 6 | IEEEFMA_D1l2s_hardfloat: 7 | Cells: 17432 8 | Area: Comb=30823 Buf=3175 NonComb=1110 Macro=0 Total=31933 9 | Power: Switch(32.621 mW) Internal(14.072 mW) Leakage(1.150 mW) Total(47.841 mW) 10 | Max Freq: 662 MHz (1.51 ns) 11 | IEEEFMA_D1l3s_fudian: 12 | Cells: 23930 13 | Area: Comb=38593 Buf=3947 NonComb=4051 Macro=0 Total=42645 14 | Power: Switch(39.656 mW) Internal(19.178 mW) Leakage(1.520 mW) Total(60.354 mW) 15 | Max Freq: 980 MHz (1.02 ns) 16 | IEEEFMA_D1l3s_hardfloat: 17 | Cells: 18451 18 | Area: Comb=30849 Buf=3251 NonComb=2898 Macro=0 Total=33746 19 | Power: Switch(26.427 mW) Internal(14.495 mW) Leakage(1.200 mW) Total(42.118 mW) 20 | Max Freq: 862 MHz (1.16 ns) 21 | IEEEFMA_D1l4s_fudian: 22 | Cells: 23244 23 | Area: Comb=27459 Buf=2308 NonComb=3166 Macro=0 Total=30625 24 | Power: Switch(29.996 mW) Internal(12.216 mW) Leakage(0.927 mW) Total(43.139 mW) 25 | Max Freq: 1020 MHz (0.98 ns) 26 | IEEEFMA_D1l4s_hardfloat: 27 | Cells: 18059 28 | Area: Comb=21752 Buf=2167 NonComb=2968 Macro=0 Total=24720 29 | Power: Switch(15.330 mW) Internal(7.890 mW) Leakage(0.786 mW) Total(24.006 mW) 30 | Max Freq: 1020 MHz (0.98 ns) 31 | IEEEFMA_H1l2s_fudian: 32 | Cells: 3909 33 | Area: Comb=6650 Buf=803 NonComb=308 Macro=0 Total=6959 34 | Power: Switch(7.892 mW) Internal(3.254 mW) Leakage(0.275 mW) Total(11.421 mW) 35 | Max Freq: 980 MHz (1.02 ns) 36 | IEEEFMA_H1l2s_hardfloat: 37 | Cells: 2724 38 | Area: Comb=4096 Buf=435 NonComb=315 Macro=0 Total=4411 39 | Power: Switch(3.905 mW) Internal(1.788 mW) Leakage(0.161 mW) Total(5.854 mW) 40 | Max Freq: 1020 MHz (0.98 ns) 41 | IEEEFMA_H1l3s_fudian: 42 | Cells: 3797 43 | Area: Comb=4331 Buf=476 NonComb=524 Macro=0 Total=4856 44 | Power: Switch(4.396 mW) Internal(1.837 mW) Leakage(0.162 mW) Total(6.395 mW) 45 | Max Freq: 1042 MHz (0.96 ns) 46 | IEEEFMA_H1l3s_hardfloat: 47 | Cells: 2614 48 | Area: Comb=2864 Buf=297 NonComb=476 Macro=0 Total=3340 49 | Power: Switch(2.124 mW) Internal(1.127 mW) Leakage(0.106 mW) Total(3.357 mW) 50 | Max Freq: 1020 MHz (0.98 ns) 51 | IEEEFMA_H1l4s_fudian: 52 | Cells: 3546 53 | Area: Comb=4155 Buf=471 NonComb=586 Macro=0 Total=4741 54 | Power: Switch(3.649 mW) Internal(1.651 mW) Leakage(0.158 mW) Total(5.458 mW) 55 | Max Freq: 1042 MHz (0.96 ns) 56 | IEEEFMA_H1l4s_hardfloat: 57 | Cells: 2521 58 | Area: Comb=2467 Buf=242 NonComb=598 Macro=0 Total=3065 59 | Power: Switch(1.937 mW) Internal(1.093 mW) Leakage(0.090 mW) Total(3.121 mW) 60 | Max Freq: 1053 MHz (0.95 ns) 61 | IEEEFMA_S1l2s_fudian: 62 | Cells: 9280 63 | Area: Comb=17082 Buf=1891 NonComb=446 Macro=0 Total=17528 64 | Power: Switch(21.939 mW) Internal(8.593 mW) Leakage(0.670 mW) Total(31.202 mW) 65 | Max Freq: 833 MHz (1.20 ns) 66 | IEEEFMA_S1l2s_hardfloat: 67 | Cells: 6829 68 | Area: Comb=11081 Buf=1122 NonComb=606 Macro=0 Total=11687 69 | Power: Switch(10.686 mW) Internal(4.941 mW) Leakage(0.431 mW) Total(16.057 mW) 70 | Max Freq: 806 MHz (1.24 ns) 71 | IEEEFMA_S1l3s_fudian: 72 | Cells: 9163 73 | Area: Comb=11887 Buf=1165 NonComb=1704 Macro=0 Total=13590 74 | Power: Switch(11.780 mW) Internal(5.466 mW) Leakage(0.459 mW) Total(17.705 mW) 75 | Max Freq: 1020 MHz (0.98 ns) 76 | IEEEFMA_S1l3s_hardfloat: 77 | Cells: 7191 78 | Area: Comb=10342 Buf=1083 NonComb=1199 Macro=0 Total=11541 79 | Power: Switch(8.059 mW) Internal(4.300 mW) Leakage(0.405 mW) Total(12.764 mW) 80 | Max Freq: 1020 MHz (0.98 ns) 81 | IEEEFMA_S1l4s_fudian: 82 | Cells: 9166 83 | Area: Comb=9920 Buf=1011 NonComb=1263 Macro=0 Total=11182 84 | Power: Switch(10.482 mW) Internal(4.270 mW) Leakage(0.340 mW) Total(15.092 mW) 85 | Max Freq: 1053 MHz (0.95 ns) 86 | IEEEFMA_S1l4s_hardfloat: 87 | Cells: 6424 88 | Area: Comb=6892 Buf=746 NonComb=1201 Macro=0 Total=8094 89 | Power: Switch(5.456 mW) Internal(2.716 mW) Leakage(0.248 mW) Total(8.419 mW) 90 | Max Freq: 1020 MHz (0.98 ns) 91 | --------------------------------------------------------------------------------