├── .envrc
├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── .gitmodules
├── .mill-version
├── .scalafix.conf
├── .scalafmt.conf
├── .vscode
    └── settings.json
├── BUILDING.md
├── LICENSE
├── README.md
├── build.sc
├── fpu-wrappers
    ├── resources
    │   ├── flopoco
    │   │   ├── .gitignore
    │   │   ├── FPCFExp_D1s.v
    │   │   ├── FPCFExp_D3s.v
    │   │   ├── FPCFExp_D4s.v
    │   │   ├── FPCFExp_D5s.v
    │   │   ├── FPCFExp_D6s.v
    │   │   ├── FPCFExp_H1s.v
    │   │   ├── FPCFExp_H2s.v
    │   │   ├── FPCFExp_H3s.v
    │   │   ├── FPCFExp_S1s.v
    │   │   ├── FPCFExp_S2s.v
    │   │   ├── FPCFExp_S3s.v
    │   │   ├── FPCFExp_S4s.v
    │   │   ├── FPCFExp_S5s.v
    │   │   ├── IEEEFMA_D10s.v
    │   │   ├── IEEEFMA_D3s.v
    │   │   ├── IEEEFMA_D5s.v
    │   │   ├── IEEEFMA_D7s.v
    │   │   ├── IEEEFMA_D9s.v
    │   │   ├── IEEEFMA_H1s.v
    │   │   ├── IEEEFMA_H2s.v
    │   │   ├── IEEEFMA_H3s.v
    │   │   ├── IEEEFMA_S1s.v
    │   │   ├── IEEEFMA_S2s.v
    │   │   ├── IEEEFMA_S3s.v
    │   │   ├── IEEEFMA_S4s.v
    │   │   └── gen.py
    │   ├── fpnew
    │   │   ├── .gitignore
    │   │   ├── FPNewBlackbox.sv
    │   │   ├── FPNewBlackbox_1s.sv
    │   │   ├── FPNewBlackbox_2s.sv
    │   │   ├── FPNewBlackbox_3s.sv
    │   │   ├── FPNewBlackbox_4s.sv
    │   │   ├── FPNewBlackbox_5s.sv
    │   │   ├── FPNewBlackbox_D1l1s.synth.v
    │   │   ├── FPNewBlackbox_D1l2s.synth.v
    │   │   ├── FPNewBlackbox_D1l3s.synth.v
    │   │   ├── FPNewBlackbox_D1l4s.synth.v
    │   │   ├── FPNewBlackbox_D1l5s.synth.v
    │   │   ├── FPNewBlackbox_D2l1s.synth.v
    │   │   ├── FPNewBlackbox_D2l2s.synth.v
    │   │   ├── FPNewBlackbox_D2l3s.synth.v
    │   │   ├── FPNewBlackbox_D2l4s.synth.v
    │   │   ├── FPNewBlackbox_D2l5s.synth.v
    │   │   ├── FPNewBlackbox_S1l1s.synth.v
    │   │   ├── FPNewBlackbox_S1l2s.synth.v
    │   │   ├── FPNewBlackbox_S1l3s.synth.v
    │   │   ├── FPNewBlackbox_S1l4s.synth.v
    │   │   ├── FPNewBlackbox_S1l5s.synth.v
    │   │   ├── FPNewBlackbox_S2l1s.synth.v
    │   │   ├── FPNewBlackbox_S2l2s.synth.v
    │   │   ├── FPNewBlackbox_S2l3s.synth.v
    │   │   ├── FPNewBlackbox_S2l4s.synth.v
    │   │   ├── FPNewBlackbox_S2l5s.synth.v
    │   │   ├── Makefile
    │   │   └── gen.py
    │   ├── opencores
    │   │   ├── .gitignore
    │   │   ├── except.v
    │   │   ├── fpu.v
    │   │   ├── post_norm.v
    │   │   ├── pre_norm.v
    │   │   ├── pre_norm_fmul.v
    │   │   └── primitives.v
    │   └── syn.tcl
    ├── src
    │   └── fpuwrapper
    │   │   ├── Mul.scala
    │   │   ├── bench.scala
    │   │   ├── common.scala
    │   │   ├── emit.scala
    │   │   ├── float.scala
    │   │   ├── flopoco
    │   │       ├── FPCFExp.scala
    │   │       ├── FPCToIEEE.scala
    │   │       ├── IEEEFExp.scala
    │   │       ├── IEEEFMA.scala
    │   │       └── IEEEToFPC.scala
    │   │   ├── formal
    │   │       ├── HFRoundtrip.scala
    │   │       └── IEEEFMAFormal.scala
    │   │   ├── fpnew
    │   │       ├── FPNewBlackbox.scala
    │   │       └── IEEEFPU.scala
    │   │   ├── fudian
    │   │       ├── IEEEFAdd.scala
    │   │       ├── IEEEFDivSqrt.scala
    │   │       └── IEEEFMA.scala
    │   │   ├── hardfloat
    │   │       ├── FMACommon.scala
    │   │       ├── HFFCmp.scala
    │   │       ├── HFFDivSqrt.scala
    │   │       ├── HFFMA.scala
    │   │       ├── HFFMul.scala
    │   │       ├── HFToIEEE.scala
    │   │       ├── IEEEFMA.scala
    │   │       ├── IEEEToHF.scala
    │   │       └── MulCommon.scala
    │   │   ├── opencores
    │   │       └── IEEEFPU.scala
    │   │   ├── sifive.scala
    │   │   └── synthesis.scala
    └── test
    │   └── src
    │       └── fpuwrapper
    │           ├── common.scala
    │           ├── flopoco
    │               ├── FPCFExpTest.scala
    │               ├── IEEEFExpTest.scala
    │               └── IEEEFMATest.scala
    │           ├── fpnew
    │               └── IEEEFPUTest.scala
    │           ├── fudian
    │               ├── IEEEFAddTest.scala
    │               ├── IEEEFDivSqrtTest.scala
    │               └── IEEEFMATest.scala
    │           ├── hardfloat
    │               ├── HFFCmpTest.scala
    │               ├── HFFDivSqrtTest.scala
    │               ├── HFFMATest.scala
    │               ├── HFMulTest.scala
    │               ├── IEEEFMATest.scala
    │               └── IEEEToHFTest.scala
    │           └── opencores
    │               └── IEEEFPUTest.scala
├── shell.nix
└── synWorkspace
    ├── .gitignore
    ├── report.py
    └── report.yaml


/.envrc:
--------------------------------------------------------------------------------
1 | use nix
2 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ '**' ]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-22.04
10 |     strategy:
11 |       matrix:
12 |         scala: [2.13.10]
13 |         jvm: [adopt@1.11]
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v2
17 |         with:
18 |           submodules: true
19 |       - name: Setup Mill
20 |         uses: jodersky/setup-mill@master
21 |         with:
22 |           mill-version: 0.11.5
23 |       - name: Setup nix env
24 |         uses: JRMurr/direnv-nix-action@v4.1.0
25 |       - name: Install simulators
26 |         run: sudo apt-get install -y verilator iverilog
27 |       - name: Install other dependencies
28 |         run: sudo apt-get install -y gcc libboost-dev z3
29 |       - name: Compile
30 |         run: mill fpu-wrappers.compile
31 |       - name: Test
32 |         run: mill fpu-wrappers.test
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .metals/
 2 | .bsp/
 3 | .bloop/
 4 | .direnv/
 5 | target/
 6 | project/project/
 7 | test_run_dir/
 8 | simWorkspace/
 9 | tmp/
10 | out/
11 | 
12 | *.v
13 | *.sv
14 | *.smt2
15 | *.anno.json
16 | *.fir
17 | ucli.key
18 | fpuwrapper.Simulator
19 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "berkeley-hardfloat"]
 2 | 	path = thirdparty/berkeley-hardfloat
 3 | 	url = git@github.com:ucb-bar/berkeley-hardfloat.git
 4 | [submodule "opencores-fpu"]
 5 | 	path = thirdparty/opencores-fpu
 6 | 	url = git@github.com:jiegec/opencores-fpu.git
 7 | [submodule "thirdparty/CNRV-FPU"]
 8 | 	path = thirdparty/CNRV-FPU
 9 | 	url = git@github.com:cnrv/CNRV-FPU.git
10 | [submodule "thirdparty/fpnew"]
11 | 	path = thirdparty/fpnew
12 | 	url = git@github.com:pulp-platform/fpnew.git
13 | [submodule "thirdparty/fudian"]
14 | 	path = thirdparty/fudian
15 | 	url = git@github.com:OpenXiangShan/fudian.git
16 | 


--------------------------------------------------------------------------------
/.mill-version:
--------------------------------------------------------------------------------
1 | 0.11.5
2 | 


--------------------------------------------------------------------------------
/.scalafix.conf:
--------------------------------------------------------------------------------
1 | rules = [
2 |   // builtin
3 |   ExplicitResultTypes,
4 |   RemoveUnused,
5 |   NoAutoTupling,
6 |   // community
7 |   OrganizeImports
8 | ]


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | version = "3.0.8"
2 | runner.dialect = scala213


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"files.watcherExclude": {
3 | 		"**/target": true
4 | 	},
5 | 	"cSpell.words": [
6 | 		"hardfloat"
7 | 	]
8 | }


--------------------------------------------------------------------------------
/BUILDING.md:
--------------------------------------------------------------------------------
 1 | # Berkeley-Hardfloat
 2 | 
 3 | Language: Chisel -> Verilog
 4 | 
 5 | http://www.jhauser.us/arithmetic/HardFloat-1/doc/HardFloat-Verilog.html
 6 | 
 7 | float format: 32 bit -> 33 bit
 8 | 
 9 | # FPNew
10 | 
11 | Language: SystemVerilog
12 | 
13 | # FloPoCo
14 | 
15 | Language: C++ -> VHDL
16 | 
17 | float format: 32 bit -> 34 bit
18 | 
19 | ## Installation
20 | 
21 | ### wcpg
22 | 
23 | ```shell
24 | git clone https://scm.gforge.inria.fr/anonscm/git/metalibm/wcpg.git
25 | cd wcpg
26 | sh autogen.sh
27 | ./configure --prefix=$HOME/prefix/wcpg
28 | make install -j
29 | ```
30 | 
31 | ### scalp
32 | 
33 | ```shell
34 | git clone https://digidev.digi.e-technik.uni-kassel.de/git/scalp.git
35 | cd scalp
36 | mkdir build
37 | cd build
38 | cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/prefix/scalp -DUSE_LPSOLVE=ON -DLPSOLVE_LIBRARIES=/usr/lib/liblpsolve55_pic.a -DLP_INCLUDE_DIRS=/usr/include
39 | make install -j
40 | ```
41 | 
42 | ### pagsuite
43 | 
44 | ```shell
45 | svn co https://digidev.digi.e-technik.uni-kassel.de/home/svn/pagsuite
46 | cd pagsuite/trunk
47 | mkdir build
48 | cd build
49 | cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/prefix/pagsuite -DCMAKE_PREFIX_PATH=$HOME/prefix/scalp
50 | make install -j
51 | ```
52 | 
53 | ### flopoco
54 | 
55 | ```shell
56 | git clone https://gitlab.inria.fr/fdupont/flopoco.git
57 | cd flopoco
58 | mkdir build
59 | cd build
60 | cmake .. -DCMAKE_PREFIX_PATH="$HOME/prefix/wcpg;$HOME/prefix/pagsuite;$HOME/prefix/scalp" -DCMAKE_INSTALL_PREFIX=$HOME/prefix/flopoco
61 | make -j
62 | ```


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Jiajie Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # fpu-wrappers
 2 | 
 3 | This repo intends to create wrappers for open source FPU hardware implementations currently including:
 4 | 
 5 | 1. [berkeley-hardfloat](https://github.com/ucb-bar/berkeley-hardfloat)
 6 | 2. [fpnew](https://github.com/pulp-platform/fpnew)
 7 | 3. [flopoco](http://flopoco.gforge.inria.fr/)
 8 | 4. [CNRV-FPU](https://github.com/cnrv/CNRV-FPU)
 9 | 5. [opencores-fpu](https://github.com/jiegec/opencores-fpu)
10 | 6. [fudian](https://github.com/OpenXiangShan/fudian)
11 | 7. [vfloat](https://github.com/jiegec/vfloat)
12 | 
13 | | op     | berkeley-hardfloat | fpnew | flopoco | CNRV-FPU | opencores-fpu | fudian | vfloat |
14 | | ------ | ------------------ | ----- | ------- | -------- | ------------- | ------ | ------ |
15 | | add    | Y                  | Y     | Y       | Y        | Y             | Y      | Y      |
16 | | mul    | Y                  | Y     | Y       | Y        | Y             | Y      | Y      |
17 | | fma    | Y                  | Y     | Y       | Y        |               | Y      | Y      |
18 | | cmp    | Y                  | Y     | Y       |          |               | Y      |        |
19 | | div    | Y                  | Y     | Y       | Y        | Y             | Y      | Y      |
20 | | sqrt   | Y                  | Y     | Y       | Y        |               | Y      | Y      |
21 | | fp2int | Y                  | Y     | Y       | Y        | Y             | Y      | Y      |
22 | | int2fp | Y                  | Y     | Y       | Y        | Y             | Y      | Y      |
23 | | fp2fp  | Y                  | Y     | Y       |          |               | Y      |        |
24 | | pow    |                    |       | Y       |          |               |        |        |
25 | | log    |                    |       | Y       |          |               |        |        |
26 | | exp    |                    |       | Y       |          |               |        |        |
27 | | custom | Y                  |       | Y       |          |               |        |        |
28 | 
29 | `custom` means custom floating point format.
30 | 
31 | And make performance comparison.
32 | 
33 | Module naming convection:
34 | 
35 | 1. Type 1: Floating point format + operator(FAdd/FMA/FExp)
36 | 2. Type 2: Floating point format `To` Floating point format
37 | 3. Type 3: Floating point format + FPU(many operations)
38 | 
39 | Possible floating point formats:
40 | 
41 | 1. HF: berkeley-hardfloat +1 bits
42 | 2. IEEE: IEEE 754
43 | 3. FPC: flopoco +2 bits
44 | 
45 | IEEE754 FMA:
46 | 
47 | 1. Area: fpnew = flopoco < hardfloat < fudian
48 | 2. Frequency: hardfloat = flopoco > fpnew > fudian
49 | 3. Power: fpnew < hardfloat < flopoco < fudian


--------------------------------------------------------------------------------
/build.sc:
--------------------------------------------------------------------------------
  1 | import mill._
  2 | import mill.scalalib.publish._
  3 | import scalalib._
  4 | import scalafmt._
  5 | import coursier.maven.MavenRepository
  6 | 
  7 | // learned from https://github.com/OpenXiangShan/fudian/blob/main/build.sc
  8 | val defaultVersions = Map(
  9 |   "chisel" -> ("org.chipsalliance", "6.2.0", false),
 10 |   "chisel-plugin" -> ("org.chipsalliance", "6.2.0", true),
 11 |   "scalatest" -> ("org.scalatest", "3.2.10", false),
 12 |   "spinalhdl-core" -> ("com.github.spinalhdl", "1.10.1", false),
 13 |   "spinalhdl-lib" -> ("com.github.spinalhdl", "1.10.1", false),
 14 |   "spinalhdl-idsl-plugin" -> ("com.github.spinalhdl", "1.10.1", false)
 15 | )
 16 | 
 17 | val commonScalaVersion = "2.13.10"
 18 | 
 19 | def getVersion(dep: String) = {
 20 |   val (org, ver, cross) = defaultVersions(dep)
 21 |   val version = sys.env.getOrElse(dep + "Version", ver)
 22 |   if (cross)
 23 |     ivy"$org:::$dep:$version"
 24 |   else
 25 |     ivy"$org::$dep:$version"
 26 | }
 27 | 
 28 | trait CommonModule extends ScalaModule {
 29 |   def scalaVersion = commonScalaVersion
 30 | 
 31 |   // for snapshot dependencies
 32 |   override def repositoriesTask = T.task {
 33 |     super.repositoriesTask() ++ Seq(
 34 |       MavenRepository("https://oss.sonatype.org/content/repositories/snapshots")
 35 |     )
 36 |   }
 37 | 
 38 |   override def scalacOptions =
 39 |     Seq("-deprecation", "-feature", "-language:reflectiveCalls")
 40 | }
 41 | 
 42 | object hardfloat extends SbtModule with PublishModule {
 43 |   override def scalaVersion = commonScalaVersion
 44 |   override def millSourcePath =
 45 |     os.pwd / "thirdparty" / "berkeley-hardfloat" / "hardfloat"
 46 | 
 47 |   override def ivyDeps = super.ivyDeps() ++ Agg(
 48 |     getVersion("chisel")
 49 |   )
 50 | 
 51 |   override def scalacPluginIvyDeps = super.scalacPluginIvyDeps() ++ Agg(
 52 |     getVersion("chisel-plugin")
 53 |   )
 54 | 
 55 |   // publish
 56 |   def publishVersion = "1.5-SNAPSHOT"
 57 |   def pomSettings = PomSettings(
 58 |     description = artifactName(),
 59 |     organization = "edu.berkeley.cs",
 60 |     url = "http://chisel.eecs.berkeley.edu",
 61 |     licenses = Seq(License.`BSD-3-Clause`),
 62 |     versionControl = VersionControl.github("ucb-bar", "berkeley-hardfloat"),
 63 |     developers = Seq(
 64 |       Developer(
 65 |         "jhauser-ucberkeley",
 66 |         "John Hauser",
 67 |         "https://www.colorado.edu/faculty/hauser/about/"
 68 |       ),
 69 |       Developer(
 70 |         "aswaterman",
 71 |         "Andrew Waterman",
 72 |         "https://aspire.eecs.berkeley.edu/author/waterman/"
 73 |       ),
 74 |       Developer(
 75 |         "yunsup",
 76 |         "Yunsup Lee",
 77 |         "https://aspire.eecs.berkeley.edu/author/yunsup/"
 78 |       )
 79 |     )
 80 |   )
 81 | }
 82 | 
 83 | object fudian extends CommonModule with PublishModule {
 84 |   override def ivyDeps = super.ivyDeps() ++ Agg(
 85 |     getVersion("chisel"),
 86 |     getVersion("scalatest")
 87 |   )
 88 | 
 89 |   override def millSourcePath = os.pwd / "thirdparty" / "fudian"
 90 | 
 91 |   override def scalacPluginIvyDeps = super.scalacPluginIvyDeps() ++ Agg(
 92 |     getVersion("chisel-plugin")
 93 |   )
 94 | 
 95 |   // publish
 96 |   def publishVersion = "1.0-SNAPSHOT"
 97 |   def pomSettings = PomSettings(
 98 |     description = artifactName(),
 99 |     organization = "cn.cas.ict",
100 |     url = "https://github.com/openxiangshan/fudian",
101 |     licenses = Seq(License.MIT), // Mulan PSL v2 is not included in Mill
102 |     versionControl = VersionControl.github("openxiangshan", "fudian"),
103 |     developers = Seq()
104 |   )
105 | }
106 | 
107 | object `fpu-wrappers`
108 |     extends CommonModule
109 |     with PublishModule
110 |     with ScalafmtModule {
111 |   override def ivyDeps = super.ivyDeps() ++ Agg(
112 |     getVersion("chisel"),
113 |     getVersion("spinalhdl-core"),
114 |     getVersion("spinalhdl-lib")
115 |   )
116 | 
117 |   override def scalacPluginIvyDeps = super.scalacPluginIvyDeps() ++ Agg(
118 |     getVersion("spinalhdl-idsl-plugin"),
119 |     getVersion("chisel-plugin")
120 |   )
121 | 
122 |   override def moduleDeps = super.moduleDeps ++ Seq(hardfloat, fudian)
123 | 
124 |   object test extends ScalaTests with TestModule.ScalaTest {
125 |     override def ivyDeps = super.ivyDeps() ++ Agg(
126 |       getVersion("scalatest")
127 |     )
128 |   }
129 | 
130 |   // publish
131 |   def publishVersion = "1.0-SNAPSHOT"
132 |   def pomSettings = PomSettings(
133 |     description = artifactName(),
134 |     organization = "je.jia",
135 |     url = "https://github.com/jiegec/fpu-wrapeprs",
136 |     licenses = Seq(License.MIT),
137 |     versionControl = VersionControl.github("jiegec", "fpu-wrappers"),
138 |     developers = Seq()
139 |   )
140 | }
141 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/flopoco/.gitignore:
--------------------------------------------------------------------------------
1 | !*.v
2 | *.vhdl
3 | *.cache
4 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/flopoco/gen.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | 
 4 | tasks = [{
 5 |     'type': 'H',
 6 |     'exp': 5,
 7 |     'frac': 10
 8 | }, {
 9 |     'type': 'S',
10 |     'exp': 8,
11 |     'frac': 23
12 | }, {
13 |     'type': 'D',
14 |     'exp': 11,
15 |     'frac': 52
16 | }]
17 | 
18 | home = os.getenv('HOME')
19 | flopoco = home + "/flopoco/build/flopoco"
20 | 
21 | def gen_fma(frequency, task):
22 |     # generate vhdl
23 |     out = subprocess.check_output(
24 |         [flopoco, "IEEEFMA", f"wE={task['exp']}", f"wF={task['frac']}",
25 |             f"name=IEEEFMA_{task['type']}", f"frequency={frequency}"],
26 |         stderr=subprocess.STDOUT).decode('utf-8')
27 | 
28 |     # parse stages from output
29 |     stages = 0
30 |     for line in out.splitlines():
31 |         if 'Pipeline depth' in line:
32 |             stages = int(line.split(' ')[-1])
33 | 
34 |     # save vhdl
35 |     name = f"IEEEFMA_{task['type']}{stages}s"
36 |     file = f"{name}.vhdl"
37 |     file_vhdl08 = f"{name}_vhdl08.vhdl"
38 |     os.rename('flopoco.vhdl', file)
39 | 
40 |     # vhdl08
41 |     os.system(f"sed -e 's/std_logic_arith/numeric_std/g' -e 's/std_logic_unsigned/numeric_std_unsigned/g' {file} > {file_vhdl08}")
42 | 
43 |     # synthesize to verilog
44 |     os.system(f"sudo docker run -it --rm -t -v $PWD:/src -w /src hdlc/ghdl:yosys yosys -m ghdl -p 'ghdl --std=08 {name}_vhdl08.vhdl -e IEEEFMA_{task['type']}; write_verilog {name}.v'")
45 | 
46 | def gen_exp(frequency, task):
47 |     # generate vhdl
48 |     out = subprocess.check_output(
49 |         [flopoco, "FPExp", f"wE={task['exp']}", f"wF={task['frac']}",
50 |             f"name=FPCFExp_{task['type']}", f"plainVHDL=1", f"frequency={frequency}"],
51 |         stderr=subprocess.STDOUT).decode('utf-8')
52 | 
53 |     # parse stages from output
54 |     stages = 0
55 |     for line in out.splitlines():
56 |         if 'Pipeline depth' in line:
57 |             stages = int(line.split(' ')[-1])
58 | 
59 |     # save vhdl
60 |     name = f"FPCFExp_{task['type']}{stages}s"
61 |     file = f"{name}.vhdl"
62 |     os.rename('flopoco.vhdl', file)
63 | 
64 |     # synthesize to verilog
65 |     os.system(f"sudo docker run -it --rm -t -v $PWD:/src -w /src hdlc/ghdl:yosys yosys -m ghdl -p 'ghdl -fsynopsys -fexplicit {name}.vhdl -e FPCFExp_{task['type']}; write_verilog {name}.v'")
66 | 
67 | for task in tasks:
68 |     for frequency in [100, 150, 200, 250, 300]:
69 |         gen_fma(frequency, task)
70 |         gen_exp(frequency, task)
71 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/.gitignore:
--------------------------------------------------------------------------------
1 | !FPNewBlackbox.sv
2 | !*.synth.v
3 | *.preprocessed.sv
4 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/FPNewBlackbox.sv:
--------------------------------------------------------------------------------
  1 | module FPNewBlackbox #(
  2 |     // fpu features
  3 |     parameter FLEN = __FLEN__,
  4 |     parameter ENABLE_VECTORS = 1,
  5 |     parameter ENABLE_NAN_BOX = 1,
  6 |     parameter ENABLE_FP32 = __FP32__,
  7 |     parameter ENABLE_FP64 = __FP64__,
  8 |     parameter ENABLE_FP16 = 0,
  9 |     parameter ENABLE_FP8 = 0,
 10 |     parameter ENABLE_FP16ALT = 0,
 11 |     parameter ENABLE_INT8 = 0,
 12 |     parameter ENABLE_INT16 = 0,
 13 |     parameter ENABLE_INT32 = 0,
 14 |     parameter ENABLE_INT64 = 0,
 15 |     // fpu implementation
 16 |     parameter PIPELINE_STAGES = __STAGES__,
 17 |     // tag type: logic array
 18 |     parameter TAG_WIDTH = 2,
 19 |     // Do not change, follow fp-new definition
 20 |     localparam int unsigned WIDTH        = FLEN,
 21 |     localparam int unsigned NUM_OPERANDS = 3,
 22 |     localparam type TagType = logic [TAG_WIDTH-1:0]
 23 | 
 24 | ) (
 25 |     // Copied from fpnew_top
 26 |     input logic                               clk_i,
 27 |     input logic                               rst_ni,
 28 |     // Input signals
 29 |     input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
 30 |     input fpnew_pkg::roundmode_e              rnd_mode_i,
 31 |     input fpnew_pkg::operation_e              op_i,
 32 |     input logic                               op_mod_i,
 33 |     input fpnew_pkg::fp_format_e              src_fmt_i,
 34 |     input fpnew_pkg::fp_format_e              dst_fmt_i,
 35 |     input fpnew_pkg::int_format_e             int_fmt_i,
 36 |     input logic                               vectorial_op_i,
 37 |     input TagType                             tag_i,
 38 |     // Input Handshake
 39 |     input  logic                              in_valid_i,
 40 |     output logic                              in_ready_o,
 41 |     input  logic                              flush_i,
 42 |     // Output signals
 43 |     output logic [WIDTH-1:0]                  result_o,
 44 |     output fpnew_pkg::status_t                status_o,
 45 |     output TagType                            tag_o,
 46 |     // Output handshake
 47 |     output logic                              out_valid_o,
 48 |     input  logic                              out_ready_i,
 49 |     // Indication of valid data in flight
 50 |     output logic                              busy_o
 51 | );
 52 | 
 53 |     localparam fpnew_pkg::fpu_features_t Features = '{
 54 |         Width: int'(FLEN),
 55 |         EnableVectors: int'(ENABLE_VECTORS),
 56 |         EnableNanBox: int'(ENABLE_NAN_BOX),
 57 |         FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0),
 58 |         IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0)
 59 |     };
 60 | 
 61 |     // only pipeline regs is customized
 62 |     localparam fpnew_pkg::fpu_implementation_t Implementation = '{
 63 |         PipeRegs:   '{'{default: PIPELINE_STAGES},
 64 |                       '{default: PIPELINE_STAGES},
 65 |                       '{default: PIPELINE_STAGES},
 66 |                       '{default: PIPELINE_STAGES}},
 67 |         UnitTypes:  '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
 68 |                     '{default: fpnew_pkg::DISABLED},   // DIVSQRT
 69 |                     '{default: fpnew_pkg::DISABLED}, // NONCOMP
 70 |                     '{default: fpnew_pkg::DISABLED}},  // CONV
 71 |         PipeConfig: fpnew_pkg::DISTRIBUTED
 72 |     };
 73 | 
 74 |     fpnew_top #(
 75 |         .Features(Features),
 76 |         .Implementation(Implementation),
 77 |         .TagType(TagType)
 78 |     ) inst (
 79 |         .clk_i(clk_i),
 80 |         .rst_ni(rst_ni),
 81 | 
 82 |         .operands_i(operands_i),
 83 |         .rnd_mode_i(rnd_mode_i),
 84 |         .op_i(op_i),
 85 |         .op_mod_i(op_mod_i),
 86 |         .src_fmt_i(src_fmt_i),
 87 |         .dst_fmt_i(dst_fmt_i),
 88 |         .int_fmt_i(int_fmt_i),
 89 |         .vectorial_op_i(vectorial_op_i),
 90 |         .tag_i(tag_i),
 91 | 
 92 |         .in_valid_i(in_valid_i),
 93 |         .in_ready_o(in_ready_o),
 94 |         .flush_i(flush_i),
 95 | 
 96 |         .result_o(result_o),
 97 |         .status_o(status_o),
 98 |         .tag_o(tag_o),
 99 | 
100 |         .out_valid_o(out_valid_o),
101 |         .out_ready_i(out_ready_i),
102 |         
103 |         .busy_o(busy_o)
104 |     );
105 | 
106 | 
107 | endmodule
108 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/FPNewBlackbox_1s.sv:
--------------------------------------------------------------------------------
  1 | module FPNewBlackbox #(
  2 |     // fpu features
  3 |     parameter FLEN = 64,
  4 |     parameter ENABLE_VECTORS = 1,
  5 |     parameter ENABLE_NAN_BOX = 1,
  6 |     parameter ENABLE_FP32 = 1,
  7 |     parameter ENABLE_FP64 = 0,
  8 |     parameter ENABLE_FP16 = 0,
  9 |     parameter ENABLE_FP8 = 0,
 10 |     parameter ENABLE_FP16ALT = 0,
 11 |     parameter ENABLE_INT8 = 0,
 12 |     parameter ENABLE_INT16 = 0,
 13 |     parameter ENABLE_INT32 = 0,
 14 |     parameter ENABLE_INT64 = 0,
 15 |     // fpu implementation
 16 |     parameter PIPELINE_STAGES = 1,
 17 |     // tag type: logic array
 18 |     parameter TAG_WIDTH = 2,
 19 |     // Do not change, follow fp-new definition
 20 |     localparam int unsigned WIDTH        = FLEN,
 21 |     localparam int unsigned NUM_OPERANDS = 3,
 22 |     localparam type TagType = logic [TAG_WIDTH-1:0]
 23 | 
 24 | ) (
 25 |     // Copied from fpnew_top
 26 |     input logic                               clk_i,
 27 |     input logic                               rst_ni,
 28 |     // Input signals
 29 |     input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
 30 |     input fpnew_pkg::roundmode_e              rnd_mode_i,
 31 |     input fpnew_pkg::operation_e              op_i,
 32 |     input logic                               op_mod_i,
 33 |     input fpnew_pkg::fp_format_e              src_fmt_i,
 34 |     input fpnew_pkg::fp_format_e              dst_fmt_i,
 35 |     input fpnew_pkg::int_format_e             int_fmt_i,
 36 |     input logic                               vectorial_op_i,
 37 |     input TagType                             tag_i,
 38 |     // Input Handshake
 39 |     input  logic                              in_valid_i,
 40 |     output logic                              in_ready_o,
 41 |     input  logic                              flush_i,
 42 |     // Output signals
 43 |     output logic [WIDTH-1:0]                  result_o,
 44 |     output fpnew_pkg::status_t                status_o,
 45 |     output TagType                            tag_o,
 46 |     // Output handshake
 47 |     output logic                              out_valid_o,
 48 |     input  logic                              out_ready_i,
 49 |     // Indication of valid data in flight
 50 |     output logic                              busy_o
 51 | );
 52 | 
 53 |     localparam fpnew_pkg::fpu_features_t Features = '{
 54 |         Width: int'(FLEN),
 55 |         EnableVectors: int'(ENABLE_VECTORS),
 56 |         EnableNanBox: int'(ENABLE_NAN_BOX),
 57 |         FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0),
 58 |         IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0)
 59 |     };
 60 | 
 61 |     // only pipeline regs is customized
 62 |     localparam fpnew_pkg::fpu_implementation_t Implementation = '{
 63 |         PipeRegs:   '{'{default: PIPELINE_STAGES},
 64 |                       '{default: PIPELINE_STAGES},
 65 |                       '{default: PIPELINE_STAGES},
 66 |                       '{default: PIPELINE_STAGES}},
 67 |         UnitTypes:  '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
 68 |                     '{default: fpnew_pkg::MERGED},   // DIVSQRT
 69 |                     '{default: fpnew_pkg::PARALLEL}, // NONCOMP
 70 |                     '{default: fpnew_pkg::MERGED}},  // CONV
 71 |         PipeConfig: fpnew_pkg::BEFORE
 72 |     };
 73 | 
 74 |     fpnew_top #(
 75 |         .Features(Features),
 76 |         .Implementation(Implementation),
 77 |         .TagType(TagType)
 78 |     ) inst (
 79 |         .clk_i(clk_i),
 80 |         .rst_ni(rst_ni),
 81 | 
 82 |         .operands_i(operands_i),
 83 |         .rnd_mode_i(rnd_mode_i),
 84 |         .op_i(op_i),
 85 |         .op_mod_i(op_mod_i),
 86 |         .src_fmt_i(src_fmt_i),
 87 |         .dst_fmt_i(dst_fmt_i),
 88 |         .int_fmt_i(int_fmt_i),
 89 |         .vectorial_op_i(vectorial_op_i),
 90 |         .tag_i(tag_i),
 91 | 
 92 |         .in_valid_i(in_valid_i),
 93 |         .in_ready_o(in_ready_o),
 94 |         .flush_i(flush_i),
 95 | 
 96 |         .result_o(result_o),
 97 |         .status_o(status_o),
 98 |         .tag_o(tag_o),
 99 | 
100 |         .out_valid_o(out_valid_o),
101 |         .out_ready_i(out_ready_i),
102 |         
103 |         .busy_o(busy_o)
104 |     );
105 | 
106 | 
107 | endmodule
108 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/FPNewBlackbox_2s.sv:
--------------------------------------------------------------------------------
  1 | module FPNewBlackbox #(
  2 |     // fpu features
  3 |     parameter FLEN = 64,
  4 |     parameter ENABLE_VECTORS = 1,
  5 |     parameter ENABLE_NAN_BOX = 1,
  6 |     parameter ENABLE_FP32 = 1,
  7 |     parameter ENABLE_FP64 = 0,
  8 |     parameter ENABLE_FP16 = 0,
  9 |     parameter ENABLE_FP8 = 0,
 10 |     parameter ENABLE_FP16ALT = 0,
 11 |     parameter ENABLE_INT8 = 0,
 12 |     parameter ENABLE_INT16 = 0,
 13 |     parameter ENABLE_INT32 = 0,
 14 |     parameter ENABLE_INT64 = 0,
 15 |     // fpu implementation
 16 |     parameter PIPELINE_STAGES = 2,
 17 |     // tag type: logic array
 18 |     parameter TAG_WIDTH = 2,
 19 |     // Do not change, follow fp-new definition
 20 |     localparam int unsigned WIDTH        = FLEN,
 21 |     localparam int unsigned NUM_OPERANDS = 3,
 22 |     localparam type TagType = logic [TAG_WIDTH-1:0]
 23 | 
 24 | ) (
 25 |     // Copied from fpnew_top
 26 |     input logic                               clk_i,
 27 |     input logic                               rst_ni,
 28 |     // Input signals
 29 |     input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
 30 |     input fpnew_pkg::roundmode_e              rnd_mode_i,
 31 |     input fpnew_pkg::operation_e              op_i,
 32 |     input logic                               op_mod_i,
 33 |     input fpnew_pkg::fp_format_e              src_fmt_i,
 34 |     input fpnew_pkg::fp_format_e              dst_fmt_i,
 35 |     input fpnew_pkg::int_format_e             int_fmt_i,
 36 |     input logic                               vectorial_op_i,
 37 |     input TagType                             tag_i,
 38 |     // Input Handshake
 39 |     input  logic                              in_valid_i,
 40 |     output logic                              in_ready_o,
 41 |     input  logic                              flush_i,
 42 |     // Output signals
 43 |     output logic [WIDTH-1:0]                  result_o,
 44 |     output fpnew_pkg::status_t                status_o,
 45 |     output TagType                            tag_o,
 46 |     // Output handshake
 47 |     output logic                              out_valid_o,
 48 |     input  logic                              out_ready_i,
 49 |     // Indication of valid data in flight
 50 |     output logic                              busy_o
 51 | );
 52 | 
 53 |     localparam fpnew_pkg::fpu_features_t Features = '{
 54 |         Width: int'(FLEN),
 55 |         EnableVectors: int'(ENABLE_VECTORS),
 56 |         EnableNanBox: int'(ENABLE_NAN_BOX),
 57 |         FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0),
 58 |         IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0)
 59 |     };
 60 | 
 61 |     // only pipeline regs is customized
 62 |     localparam fpnew_pkg::fpu_implementation_t Implementation = '{
 63 |         PipeRegs:   '{'{default: PIPELINE_STAGES},
 64 |                       '{default: PIPELINE_STAGES},
 65 |                       '{default: PIPELINE_STAGES},
 66 |                       '{default: PIPELINE_STAGES}},
 67 |         UnitTypes:  '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
 68 |                     '{default: fpnew_pkg::MERGED},   // DIVSQRT
 69 |                     '{default: fpnew_pkg::PARALLEL}, // NONCOMP
 70 |                     '{default: fpnew_pkg::MERGED}},  // CONV
 71 |         PipeConfig: fpnew_pkg::BEFORE
 72 |     };
 73 | 
 74 |     fpnew_top #(
 75 |         .Features(Features),
 76 |         .Implementation(Implementation),
 77 |         .TagType(TagType)
 78 |     ) inst (
 79 |         .clk_i(clk_i),
 80 |         .rst_ni(rst_ni),
 81 | 
 82 |         .operands_i(operands_i),
 83 |         .rnd_mode_i(rnd_mode_i),
 84 |         .op_i(op_i),
 85 |         .op_mod_i(op_mod_i),
 86 |         .src_fmt_i(src_fmt_i),
 87 |         .dst_fmt_i(dst_fmt_i),
 88 |         .int_fmt_i(int_fmt_i),
 89 |         .vectorial_op_i(vectorial_op_i),
 90 |         .tag_i(tag_i),
 91 | 
 92 |         .in_valid_i(in_valid_i),
 93 |         .in_ready_o(in_ready_o),
 94 |         .flush_i(flush_i),
 95 | 
 96 |         .result_o(result_o),
 97 |         .status_o(status_o),
 98 |         .tag_o(tag_o),
 99 | 
100 |         .out_valid_o(out_valid_o),
101 |         .out_ready_i(out_ready_i),
102 |         
103 |         .busy_o(busy_o)
104 |     );
105 | 
106 | 
107 | endmodule
108 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/FPNewBlackbox_3s.sv:
--------------------------------------------------------------------------------
  1 | module FPNewBlackbox #(
  2 |     // fpu features
  3 |     parameter FLEN = 64,
  4 |     parameter ENABLE_VECTORS = 1,
  5 |     parameter ENABLE_NAN_BOX = 1,
  6 |     parameter ENABLE_FP32 = 1,
  7 |     parameter ENABLE_FP64 = 0,
  8 |     parameter ENABLE_FP16 = 0,
  9 |     parameter ENABLE_FP8 = 0,
 10 |     parameter ENABLE_FP16ALT = 0,
 11 |     parameter ENABLE_INT8 = 0,
 12 |     parameter ENABLE_INT16 = 0,
 13 |     parameter ENABLE_INT32 = 0,
 14 |     parameter ENABLE_INT64 = 0,
 15 |     // fpu implementation
 16 |     parameter PIPELINE_STAGES = 3,
 17 |     // tag type: logic array
 18 |     parameter TAG_WIDTH = 2,
 19 |     // Do not change, follow fp-new definition
 20 |     localparam int unsigned WIDTH        = FLEN,
 21 |     localparam int unsigned NUM_OPERANDS = 3,
 22 |     localparam type TagType = logic [TAG_WIDTH-1:0]
 23 | 
 24 | ) (
 25 |     // Copied from fpnew_top
 26 |     input logic                               clk_i,
 27 |     input logic                               rst_ni,
 28 |     // Input signals
 29 |     input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
 30 |     input fpnew_pkg::roundmode_e              rnd_mode_i,
 31 |     input fpnew_pkg::operation_e              op_i,
 32 |     input logic                               op_mod_i,
 33 |     input fpnew_pkg::fp_format_e              src_fmt_i,
 34 |     input fpnew_pkg::fp_format_e              dst_fmt_i,
 35 |     input fpnew_pkg::int_format_e             int_fmt_i,
 36 |     input logic                               vectorial_op_i,
 37 |     input TagType                             tag_i,
 38 |     // Input Handshake
 39 |     input  logic                              in_valid_i,
 40 |     output logic                              in_ready_o,
 41 |     input  logic                              flush_i,
 42 |     // Output signals
 43 |     output logic [WIDTH-1:0]                  result_o,
 44 |     output fpnew_pkg::status_t                status_o,
 45 |     output TagType                            tag_o,
 46 |     // Output handshake
 47 |     output logic                              out_valid_o,
 48 |     input  logic                              out_ready_i,
 49 |     // Indication of valid data in flight
 50 |     output logic                              busy_o
 51 | );
 52 | 
 53 |     localparam fpnew_pkg::fpu_features_t Features = '{
 54 |         Width: int'(FLEN),
 55 |         EnableVectors: int'(ENABLE_VECTORS),
 56 |         EnableNanBox: int'(ENABLE_NAN_BOX),
 57 |         FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0),
 58 |         IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0)
 59 |     };
 60 | 
 61 |     // only pipeline regs is customized
 62 |     localparam fpnew_pkg::fpu_implementation_t Implementation = '{
 63 |         PipeRegs:   '{'{default: PIPELINE_STAGES},
 64 |                       '{default: PIPELINE_STAGES},
 65 |                       '{default: PIPELINE_STAGES},
 66 |                       '{default: PIPELINE_STAGES}},
 67 |         UnitTypes:  '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
 68 |                     '{default: fpnew_pkg::MERGED},   // DIVSQRT
 69 |                     '{default: fpnew_pkg::PARALLEL}, // NONCOMP
 70 |                     '{default: fpnew_pkg::MERGED}},  // CONV
 71 |         PipeConfig: fpnew_pkg::BEFORE
 72 |     };
 73 | 
 74 |     fpnew_top #(
 75 |         .Features(Features),
 76 |         .Implementation(Implementation),
 77 |         .TagType(TagType)
 78 |     ) inst (
 79 |         .clk_i(clk_i),
 80 |         .rst_ni(rst_ni),
 81 | 
 82 |         .operands_i(operands_i),
 83 |         .rnd_mode_i(rnd_mode_i),
 84 |         .op_i(op_i),
 85 |         .op_mod_i(op_mod_i),
 86 |         .src_fmt_i(src_fmt_i),
 87 |         .dst_fmt_i(dst_fmt_i),
 88 |         .int_fmt_i(int_fmt_i),
 89 |         .vectorial_op_i(vectorial_op_i),
 90 |         .tag_i(tag_i),
 91 | 
 92 |         .in_valid_i(in_valid_i),
 93 |         .in_ready_o(in_ready_o),
 94 |         .flush_i(flush_i),
 95 | 
 96 |         .result_o(result_o),
 97 |         .status_o(status_o),
 98 |         .tag_o(tag_o),
 99 | 
100 |         .out_valid_o(out_valid_o),
101 |         .out_ready_i(out_ready_i),
102 |         
103 |         .busy_o(busy_o)
104 |     );
105 | 
106 | 
107 | endmodule
108 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/FPNewBlackbox_4s.sv:
--------------------------------------------------------------------------------
  1 | module FPNewBlackbox #(
  2 |     // fpu features
  3 |     parameter FLEN = 64,
  4 |     parameter ENABLE_VECTORS = 1,
  5 |     parameter ENABLE_NAN_BOX = 1,
  6 |     parameter ENABLE_FP32 = 1,
  7 |     parameter ENABLE_FP64 = 0,
  8 |     parameter ENABLE_FP16 = 0,
  9 |     parameter ENABLE_FP8 = 0,
 10 |     parameter ENABLE_FP16ALT = 0,
 11 |     parameter ENABLE_INT8 = 0,
 12 |     parameter ENABLE_INT16 = 0,
 13 |     parameter ENABLE_INT32 = 0,
 14 |     parameter ENABLE_INT64 = 0,
 15 |     // fpu implementation
 16 |     parameter PIPELINE_STAGES = 4,
 17 |     // tag type: logic array
 18 |     parameter TAG_WIDTH = 2,
 19 |     // Do not change, follow fp-new definition
 20 |     localparam int unsigned WIDTH        = FLEN,
 21 |     localparam int unsigned NUM_OPERANDS = 3,
 22 |     localparam type TagType = logic [TAG_WIDTH-1:0]
 23 | 
 24 | ) (
 25 |     // Copied from fpnew_top
 26 |     input logic                               clk_i,
 27 |     input logic                               rst_ni,
 28 |     // Input signals
 29 |     input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
 30 |     input fpnew_pkg::roundmode_e              rnd_mode_i,
 31 |     input fpnew_pkg::operation_e              op_i,
 32 |     input logic                               op_mod_i,
 33 |     input fpnew_pkg::fp_format_e              src_fmt_i,
 34 |     input fpnew_pkg::fp_format_e              dst_fmt_i,
 35 |     input fpnew_pkg::int_format_e             int_fmt_i,
 36 |     input logic                               vectorial_op_i,
 37 |     input TagType                             tag_i,
 38 |     // Input Handshake
 39 |     input  logic                              in_valid_i,
 40 |     output logic                              in_ready_o,
 41 |     input  logic                              flush_i,
 42 |     // Output signals
 43 |     output logic [WIDTH-1:0]                  result_o,
 44 |     output fpnew_pkg::status_t                status_o,
 45 |     output TagType                            tag_o,
 46 |     // Output handshake
 47 |     output logic                              out_valid_o,
 48 |     input  logic                              out_ready_i,
 49 |     // Indication of valid data in flight
 50 |     output logic                              busy_o
 51 | );
 52 | 
 53 |     localparam fpnew_pkg::fpu_features_t Features = '{
 54 |         Width: int'(FLEN),
 55 |         EnableVectors: int'(ENABLE_VECTORS),
 56 |         EnableNanBox: int'(ENABLE_NAN_BOX),
 57 |         FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0),
 58 |         IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0)
 59 |     };
 60 | 
 61 |     // only pipeline regs is customized
 62 |     localparam fpnew_pkg::fpu_implementation_t Implementation = '{
 63 |         PipeRegs:   '{'{default: PIPELINE_STAGES},
 64 |                       '{default: PIPELINE_STAGES},
 65 |                       '{default: PIPELINE_STAGES},
 66 |                       '{default: PIPELINE_STAGES}},
 67 |         UnitTypes:  '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
 68 |                     '{default: fpnew_pkg::MERGED},   // DIVSQRT
 69 |                     '{default: fpnew_pkg::PARALLEL}, // NONCOMP
 70 |                     '{default: fpnew_pkg::MERGED}},  // CONV
 71 |         PipeConfig: fpnew_pkg::BEFORE
 72 |     };
 73 | 
 74 |     fpnew_top #(
 75 |         .Features(Features),
 76 |         .Implementation(Implementation),
 77 |         .TagType(TagType)
 78 |     ) inst (
 79 |         .clk_i(clk_i),
 80 |         .rst_ni(rst_ni),
 81 | 
 82 |         .operands_i(operands_i),
 83 |         .rnd_mode_i(rnd_mode_i),
 84 |         .op_i(op_i),
 85 |         .op_mod_i(op_mod_i),
 86 |         .src_fmt_i(src_fmt_i),
 87 |         .dst_fmt_i(dst_fmt_i),
 88 |         .int_fmt_i(int_fmt_i),
 89 |         .vectorial_op_i(vectorial_op_i),
 90 |         .tag_i(tag_i),
 91 | 
 92 |         .in_valid_i(in_valid_i),
 93 |         .in_ready_o(in_ready_o),
 94 |         .flush_i(flush_i),
 95 | 
 96 |         .result_o(result_o),
 97 |         .status_o(status_o),
 98 |         .tag_o(tag_o),
 99 | 
100 |         .out_valid_o(out_valid_o),
101 |         .out_ready_i(out_ready_i),
102 |         
103 |         .busy_o(busy_o)
104 |     );
105 | 
106 | 
107 | endmodule
108 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/FPNewBlackbox_5s.sv:
--------------------------------------------------------------------------------
  1 | module FPNewBlackbox #(
  2 |     // fpu features
  3 |     parameter FLEN = 64,
  4 |     parameter ENABLE_VECTORS = 1,
  5 |     parameter ENABLE_NAN_BOX = 1,
  6 |     parameter ENABLE_FP32 = 1,
  7 |     parameter ENABLE_FP64 = 0,
  8 |     parameter ENABLE_FP16 = 0,
  9 |     parameter ENABLE_FP8 = 0,
 10 |     parameter ENABLE_FP16ALT = 0,
 11 |     parameter ENABLE_INT8 = 0,
 12 |     parameter ENABLE_INT16 = 0,
 13 |     parameter ENABLE_INT32 = 0,
 14 |     parameter ENABLE_INT64 = 0,
 15 |     // fpu implementation
 16 |     parameter PIPELINE_STAGES = 5,
 17 |     // tag type: logic array
 18 |     parameter TAG_WIDTH = 2,
 19 |     // Do not change, follow fp-new definition
 20 |     localparam int unsigned WIDTH        = FLEN,
 21 |     localparam int unsigned NUM_OPERANDS = 3,
 22 |     localparam type TagType = logic [TAG_WIDTH-1:0]
 23 | 
 24 | ) (
 25 |     // Copied from fpnew_top
 26 |     input logic                               clk_i,
 27 |     input logic                               rst_ni,
 28 |     // Input signals
 29 |     input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i,
 30 |     input fpnew_pkg::roundmode_e              rnd_mode_i,
 31 |     input fpnew_pkg::operation_e              op_i,
 32 |     input logic                               op_mod_i,
 33 |     input fpnew_pkg::fp_format_e              src_fmt_i,
 34 |     input fpnew_pkg::fp_format_e              dst_fmt_i,
 35 |     input fpnew_pkg::int_format_e             int_fmt_i,
 36 |     input logic                               vectorial_op_i,
 37 |     input TagType                             tag_i,
 38 |     // Input Handshake
 39 |     input  logic                              in_valid_i,
 40 |     output logic                              in_ready_o,
 41 |     input  logic                              flush_i,
 42 |     // Output signals
 43 |     output logic [WIDTH-1:0]                  result_o,
 44 |     output fpnew_pkg::status_t                status_o,
 45 |     output TagType                            tag_o,
 46 |     // Output handshake
 47 |     output logic                              out_valid_o,
 48 |     input  logic                              out_ready_i,
 49 |     // Indication of valid data in flight
 50 |     output logic                              busy_o
 51 | );
 52 | 
 53 |     localparam fpnew_pkg::fpu_features_t Features = '{
 54 |         Width: int'(FLEN),
 55 |         EnableVectors: int'(ENABLE_VECTORS),
 56 |         EnableNanBox: int'(ENABLE_NAN_BOX),
 57 |         FpFmtMask: (int'(ENABLE_FP32) << 4) | (int'(ENABLE_FP64) << 3) | (int'(ENABLE_FP16) << 2) | (int'(ENABLE_FP8) << 1) | (int'(ENABLE_FP16ALT) << 0),
 58 |         IntFmtMask: (int'(ENABLE_INT8) << 3) | (int'(ENABLE_INT16) << 2) | (int'(ENABLE_INT32) << 1) | (int'(ENABLE_INT64) << 0)
 59 |     };
 60 | 
 61 |     // only pipeline regs is customized
 62 |     localparam fpnew_pkg::fpu_implementation_t Implementation = '{
 63 |         PipeRegs:   '{'{default: PIPELINE_STAGES},
 64 |                       '{default: PIPELINE_STAGES},
 65 |                       '{default: PIPELINE_STAGES},
 66 |                       '{default: PIPELINE_STAGES}},
 67 |         UnitTypes:  '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
 68 |                     '{default: fpnew_pkg::MERGED},   // DIVSQRT
 69 |                     '{default: fpnew_pkg::PARALLEL}, // NONCOMP
 70 |                     '{default: fpnew_pkg::MERGED}},  // CONV
 71 |         PipeConfig: fpnew_pkg::BEFORE
 72 |     };
 73 | 
 74 |     fpnew_top #(
 75 |         .Features(Features),
 76 |         .Implementation(Implementation),
 77 |         .TagType(TagType)
 78 |     ) inst (
 79 |         .clk_i(clk_i),
 80 |         .rst_ni(rst_ni),
 81 | 
 82 |         .operands_i(operands_i),
 83 |         .rnd_mode_i(rnd_mode_i),
 84 |         .op_i(op_i),
 85 |         .op_mod_i(op_mod_i),
 86 |         .src_fmt_i(src_fmt_i),
 87 |         .dst_fmt_i(dst_fmt_i),
 88 |         .int_fmt_i(int_fmt_i),
 89 |         .vectorial_op_i(vectorial_op_i),
 90 |         .tag_i(tag_i),
 91 | 
 92 |         .in_valid_i(in_valid_i),
 93 |         .in_ready_o(in_ready_o),
 94 |         .flush_i(flush_i),
 95 | 
 96 |         .result_o(result_o),
 97 |         .status_o(status_o),
 98 |         .tag_o(tag_o),
 99 | 
100 |         .out_valid_o(out_valid_o),
101 |         .out_ready_i(out_ready_i),
102 |         
103 |         .busy_o(busy_o)
104 |     );
105 | 
106 | 
107 | endmodule
108 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/Makefile:
--------------------------------------------------------------------------------
 1 | ROOT = ../../../thirdparty
 2 | VSRCS = $(ROOT)/fpnew/src/common_cells/src/rr_arb_tree.sv \
 3 | 		$(ROOT)/fpnew/src/common_cells/src/cf_math_pkg.sv \
 4 | 		$(ROOT)/fpnew/src/common_cells/src/lzc.sv \
 5 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \
 6 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv \
 7 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv \
 8 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv \
 9 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv \
10 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv \
11 | 		$(ROOT)/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
12 | VSRCS += $(ROOT)/fpnew/src/fpnew_pkg.sv \
13 | 		$(ROOT)/fpnew/src/fpnew_cast_multi.sv \
14 | 		$(ROOT)/fpnew/src/fpnew_classifier.sv \
15 | 		$(ROOT)/fpnew/src/fpnew_divsqrt_multi.sv \
16 | 		$(ROOT)/fpnew/src/fpnew_fma.sv \
17 | 		$(ROOT)/fpnew/src/fpnew_fma_multi.sv \
18 | 		$(ROOT)/fpnew/src/fpnew_noncomp.sv \
19 | 		$(ROOT)/fpnew/src/fpnew_opgroup_block.sv \
20 | 		$(ROOT)/fpnew/src/fpnew_opgroup_fmt_slice.sv \
21 | 		$(ROOT)/fpnew/src/fpnew_opgroup_multifmt_slice.sv \
22 | 		$(ROOT)/fpnew/src/fpnew_rounding.sv \
23 | 		$(ROOT)/fpnew/src/fpnew_top.sv
24 | VSRCS += FPNewBlackbox$(SUFFIX).sv
25 | 
26 | VERILATOR_OPTS = -E +incdir+$(ROOT)/fpnew/src/common_cells/include
27 | 
28 | # Must use defer here, otherwise it can fail with TAG_WIDTH=0
29 | FPNewBlackbox$(SUFFIX).synth.v: FPNewBlackbox$(SUFFIX).preprocessed.v
30 | 	yosys -p 'read_verilog -defer $^' -p 'hierarchy -top FPNewBlackbox' -p 'proc' -p 'opt' -p 'write_verilog -noattr $@'
31 | 
32 | FPNewBlackbox$(SUFFIX).preprocessed.v: FPNewBlackbox$(SUFFIX).preprocessed.sv
33 | 	~/sv2v/bin/sv2v $^ > $@
34 | 	sed -i '/\$$fatal/d' $@
35 | 
36 | FPNewBlackbox$(SUFFIX).preprocessed.sv: Makefile $(VSRCS)
37 | 	cat $(VSRCS) > cat.sv
38 | 	verilator --cc --exe $(VERILATOR_OPTS) cat.sv --top-module FPNewBlackbox > $@
39 | 	sed -i '/^`line/d' $@
40 | 	rm cat.sv
41 | 
42 | clean:
43 | 	rm -f FPNewBlackbox*.synth.v FPNewBlackbox*.preprocessed.v FPNewBlackbox*.preprocessed.sv
44 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/fpnew/gen.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | width = {
 4 |     'S': 32,
 5 |     'D': 64
 6 | }
 7 | 
 8 | for stage in range(1, 6):
 9 |     for format in ['S', 'D']:
10 |         for lane in range(1, 3):
11 |             suffix = f'_{format}{lane}l{stage}s'
12 |             os.system(f"cp FPNewBlackbox.sv FPNewBlackbox{suffix}.sv")
13 |             os.system(f"sed -i 's/__FLEN__/{width[format]*lane}/' FPNewBlackbox{suffix}.sv")
14 |             fp32 = int(format == "S")
15 |             os.system(f"sed -i 's/__FP32__/{fp32}/' FPNewBlackbox{suffix}.sv")
16 |             fp64 = int(format == "D")
17 |             os.system(f"sed -i 's/__FP64__/{fp64}/' FPNewBlackbox{suffix}.sv")
18 |             os.system(f"sed -i 's/__STAGES__/{stage}/' FPNewBlackbox{suffix}.sv")
19 |             os.system(f"make SUFFIX={suffix}")
20 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/opencores/.gitignore:
--------------------------------------------------------------------------------
1 | !*.v
2 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/opencores/except.v:
--------------------------------------------------------------------------------
  1 | /////////////////////////////////////////////////////////////////////
  2 | ////                                                             ////
  3 | ////  EXCEPT                                                     ////
  4 | ////  Floating Point Exception/Special Numbers Unit              ////
  5 | ////                                                             ////
  6 | ////  Author: Rudolf Usselmann                                   ////
  7 | ////          rudi@asics.ws                                      ////
  8 | ////                                                             ////
  9 | /////////////////////////////////////////////////////////////////////
 10 | ////                                                             ////
 11 | //// Copyright (C) 2000 Rudolf Usselmann                         ////
 12 | ////                    rudi@asics.ws                            ////
 13 | ////                                                             ////
 14 | //// This source file may be used and distributed without        ////
 15 | //// restriction provided that this copyright statement is not   ////
 16 | //// removed from the file and that any derivative work contains ////
 17 | //// the original copyright notice and the associated disclaimer.////
 18 | ////                                                             ////
 19 | ////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
 28 | //// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
 31 | //// POSSIBILITY OF SUCH DAMAGE.                                 ////
 32 | ////                                                             ////
 33 | /////////////////////////////////////////////////////////////////////
 34 | 
 35 | 
 36 | `timescale 1ns / 100ps
 37 | 
 38 | 
 39 | module except(	clk, opa, opb, inf, ind, qnan, snan, opa_nan, opb_nan,
 40 | 		opa_00, opb_00, opa_inf, opb_inf, opa_dn, opb_dn);
 41 | input		clk;
 42 | input	[31:0]	opa, opb;
 43 | output		inf, ind, qnan, snan, opa_nan, opb_nan;
 44 | output		opa_00, opb_00;
 45 | output		opa_inf, opb_inf;
 46 | output		opa_dn;
 47 | output		opb_dn;
 48 | 
 49 | ////////////////////////////////////////////////////////////////////////
 50 | //
 51 | // Local Wires and registers
 52 | //
 53 | 
 54 | wire	[7:0]	expa, expb;		// alias to opX exponent
 55 | wire	[22:0]	fracta, fractb;		// alias to opX fraction
 56 | reg		expa_ff, infa_f_r, qnan_r_a, snan_r_a;
 57 | reg		expb_ff, infb_f_r, qnan_r_b, snan_r_b;
 58 | reg		inf, ind, qnan, snan;	// Output registers
 59 | reg		opa_nan, opb_nan;
 60 | reg		expa_00, expb_00, fracta_00, fractb_00;
 61 | reg		opa_00, opb_00;
 62 | reg		opa_inf, opb_inf;
 63 | reg		opa_dn, opb_dn;
 64 | 
 65 | ////////////////////////////////////////////////////////////////////////
 66 | //
 67 | // Aliases
 68 | //
 69 | 
 70 | assign   expa = opa[30:23];
 71 | assign   expb = opb[30:23];
 72 | assign fracta = opa[22:0];
 73 | assign fractb = opb[22:0];
 74 | 
 75 | ////////////////////////////////////////////////////////////////////////
 76 | //
 77 | // Determine if any of the input operators is a INF or NAN or any other special number
 78 | //
 79 | 
 80 | always @(posedge clk)
 81 | 	expa_ff <= &expa;
 82 | 
 83 | always @(posedge clk)
 84 | 	expb_ff <= &expb;
 85 | 	
 86 | always @(posedge clk)
 87 | 	infa_f_r <= !(|fracta);
 88 | 
 89 | always @(posedge clk)
 90 | 	infb_f_r <= !(|fractb);
 91 | 
 92 | always @(posedge clk)
 93 | 	qnan_r_a <=  fracta[22];
 94 | 
 95 | always @(posedge clk)
 96 | 	snan_r_a <= !fracta[22] & |fracta[21:0];
 97 | 	
 98 | always @(posedge clk)
 99 | 	qnan_r_b <=  fractb[22];
100 | 
101 | always @(posedge clk)
102 | 	snan_r_b <= !fractb[22] & |fractb[21:0];
103 | 
104 | always @(posedge clk)
105 | 	ind  <= (expa_ff & infa_f_r) & (expb_ff & infb_f_r);
106 | 
107 | always @(posedge clk)
108 | 	inf  <= (expa_ff & infa_f_r) | (expb_ff & infb_f_r);
109 | 
110 | always @(posedge clk)
111 | 	qnan <= (expa_ff & qnan_r_a) | (expb_ff & qnan_r_b);
112 | 
113 | always @(posedge clk)
114 | 	snan <= (expa_ff & snan_r_a) | (expb_ff & snan_r_b);
115 | 
116 | always @(posedge clk)
117 | 	opa_nan <= &expa & (|fracta[22:0]);
118 | 
119 | always @(posedge clk)
120 | 	opb_nan <= &expb & (|fractb[22:0]);
121 | 
122 | always @(posedge clk)
123 | 	opa_inf <= (expa_ff & infa_f_r);
124 | 
125 | always @(posedge clk)
126 | 	opb_inf <= (expb_ff & infb_f_r);
127 | 
128 | always @(posedge clk)
129 | 	expa_00 <= !(|expa);
130 | 
131 | always @(posedge clk)
132 | 	expb_00 <= !(|expb);
133 | 
134 | always @(posedge clk)
135 | 	fracta_00 <= !(|fracta);
136 | 
137 | always @(posedge clk)
138 | 	fractb_00 <= !(|fractb);
139 | 
140 | always @(posedge clk)
141 | 	opa_00 <= expa_00 & fracta_00;
142 | 
143 | always @(posedge clk)
144 | 	opb_00 <= expb_00 & fractb_00;
145 | 
146 | always @(posedge clk)
147 | 	opa_dn <= expa_00;
148 | 
149 | always @(posedge clk)
150 | 	opb_dn <= expb_00;
151 | 
152 | endmodule
153 | 
154 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/opencores/fpu.v:
--------------------------------------------------------------------------------
  1 | /////////////////////////////////////////////////////////////////////
  2 | ////                                                             ////
  3 | ////  FPU                                                        ////
  4 | ////  Floating Point Unit (Single precision)                     ////
  5 | ////                                                             ////
  6 | ////  Author: Rudolf Usselmann                                   ////
  7 | ////          rudi@asics.ws                                      ////
  8 | ////                                                             ////
  9 | /////////////////////////////////////////////////////////////////////
 10 | ////                                                             ////
 11 | //// Copyright (C) 2000 Rudolf Usselmann                         ////
 12 | ////                    rudi@asics.ws                            ////
 13 | ////                                                             ////
 14 | //// This source file may be used and distributed without        ////
 15 | //// restriction provided that this copyright statement is not   ////
 16 | //// removed from the file and that any derivative work contains ////
 17 | //// the original copyright notice and the associated disclaimer.////
 18 | ////                                                             ////
 19 | ////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
 28 | //// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
 31 | //// POSSIBILITY OF SUCH DAMAGE.                                 ////
 32 | ////                                                             ////
 33 | /////////////////////////////////////////////////////////////////////
 34 | 
 35 | `timescale 1ns / 100ps
 36 | 
 37 | /*
 38 | 
 39 | FPU Operations (fpu_op):
 40 | ========================
 41 | 
 42 | 0 = add
 43 | 1 = sub
 44 | 2 = mul
 45 | 3 = div
 46 | 4 =
 47 | 5 =
 48 | 6 =
 49 | 7 =
 50 | 
 51 | Rounding Modes (rmode):
 52 | =======================
 53 | 
 54 | 0 = round_nearest_even
 55 | 1 = round_to_zero
 56 | 2 = round_up
 57 | 3 = round_down
 58 | 
 59 | */
 60 | 
 61 | 
 62 | module fpu( clk, rmode, fpu_op, opa, opb, out, inf, snan, qnan, ine, overflow, underflow, zero, div_by_zero);
 63 | input		clk;
 64 | input	[1:0]	rmode;
 65 | input	[2:0]	fpu_op;
 66 | input	[31:0]	opa, opb;
 67 | output	[31:0]	out;
 68 | output		inf, snan, qnan;
 69 | output		ine;
 70 | output		overflow, underflow;
 71 | output		zero;
 72 | output		div_by_zero;
 73 | 
 74 | parameter	INF  = 31'h7f800000,
 75 | 		QNAN = 31'h7fc00001,
 76 | 		SNAN = 31'h7f800001;
 77 | 
 78 | ////////////////////////////////////////////////////////////////////////
 79 | //
 80 | // Local Wires
 81 | //
 82 | reg		zero;
 83 | reg	[31:0]	opa_r, opb_r;		// Input operand registers
 84 | reg	[31:0]	out;			// Output register
 85 | reg		div_by_zero;		// Divide by zero output register
 86 | wire		signa, signb;		// alias to opX sign
 87 | wire		sign_fasu;		// sign output
 88 | wire	[26:0]	fracta, fractb;		// Fraction Outputs from EQU block
 89 | wire	[7:0]	exp_fasu;		// Exponent output from EQU block
 90 | reg	[7:0]	exp_r;			// Exponent output (registerd)
 91 | wire	[26:0]	fract_out_d;		// fraction output
 92 | wire		co_d;			// carry output
 93 | reg	[27:0]	fract_out_q;		// fraction output (registerd)
 94 | wire	[30:0]	out_d;			// Intermediate final result output
 95 | wire		overflow_d, underflow_d;// Overflow/Underflow Indicators
 96 | reg		overflow, underflow;	// Output registers for Overflow & Underflow
 97 | reg		inf, snan, qnan;	// Output Registers for INF, SNAN and QNAN
 98 | reg		ine;			// Output Registers for INE
 99 | reg	[1:0]	rmode_r1, rmode_r2, 	// Pipeline registers for rounding mode
100 | 		rmode_r3;
101 | reg	[2:0]	fpu_op_r1, fpu_op_r2,	// Pipeline registers for fp opration
102 | 		fpu_op_r3;
103 | wire		mul_inf, div_inf;
104 | wire		mul_00, div_00;
105 | 
106 | ////////////////////////////////////////////////////////////////////////
107 | //
108 | // Input Registers
109 | //
110 | 
111 | always @(posedge clk)
112 | 	opa_r <= opa;
113 | 
114 | always @(posedge clk)
115 | 	opb_r <= opb;
116 | 
117 | always @(posedge clk)
118 | 	rmode_r1 <= rmode;
119 | 
120 | always @(posedge clk)
121 | 	rmode_r2 <= rmode_r1;
122 | 
123 | always @(posedge clk)
124 | 	rmode_r3 <= rmode_r2;
125 | 
126 | always @(posedge clk)
127 | 	fpu_op_r1 <= fpu_op;
128 | 
129 | always @(posedge clk)
130 | 	fpu_op_r2 <= fpu_op_r1;
131 | 
132 | always @(posedge clk)
133 | 	fpu_op_r3 <= fpu_op_r2;
134 | 
135 | ////////////////////////////////////////////////////////////////////////
136 | //
137 | // Exceptions block
138 | //
139 | wire		inf_d, ind_d, qnan_d, snan_d, opa_nan, opb_nan;
140 | wire		opa_00, opb_00;
141 | wire		opa_inf, opb_inf;
142 | wire		opa_dn, opb_dn;
143 | 
144 | except u0(	.clk(clk),
145 | 		.opa(opa_r), .opb(opb_r),
146 | 		.inf(inf_d), .ind(ind_d),
147 | 		.qnan(qnan_d), .snan(snan_d),
148 | 		.opa_nan(opa_nan), .opb_nan(opb_nan),
149 | 		.opa_00(opa_00), .opb_00(opb_00),
150 | 		.opa_inf(opa_inf), .opb_inf(opb_inf),
151 | 		.opa_dn(opa_dn), .opb_dn(opb_dn)
152 | 		);
153 | 
154 | ////////////////////////////////////////////////////////////////////////
155 | //
156 | // Pre-Normalize block
157 | // - Adjusts the numbers to equal exponents and sorts them
158 | // - determine result sign
159 | // - determine actual operation to perform (add or sub)
160 | //
161 | 
162 | wire		fasu_op;
163 | wire		nan_sign_d, result_zero_sign_d;
164 | reg		sign_fasu_r;
165 | wire	[7:0]	exp_mul;
166 | wire		sign_mul;
167 | reg		sign_mul_r;
168 | wire	[23:0]	fracta_mul, fractb_mul;
169 | wire		inf_mul;
170 | reg		inf_mul_r;
171 | wire	[1:0]	exp_ovf;
172 | reg	[1:0]	exp_ovf_r;
173 | wire		sign_exe;
174 | reg		sign_exe_r;
175 | wire	[2:0]	underflow_fmul_d;
176 | 
177 | 
178 | pre_norm u1(.clk(clk),				// System Clock
179 | 	.rmode(rmode_r2),			// Roundin Mode
180 | 	.add(!fpu_op_r1[0]),			// Add/Sub Input
181 | 	.opa(opa_r),  .opb(opb_r),		// Registered OP Inputs
182 | 	.opa_nan(opa_nan),			// OpA is a NAN indicator
183 | 	.opb_nan(opb_nan),			// OpB is a NAN indicator
184 | 	.fracta_out(fracta),			// Equalized and sorted fraction
185 | 	.fractb_out(fractb),			// outputs (Registered)
186 | 	.exp_dn_out(exp_fasu),			// Selected exponent output (registered);
187 | 	.sign(sign_fasu),			// Encoded output Sign (registered)
188 | 	.nan_sign(nan_sign_d),			// Output Sign for NANs (registered)
189 | 	.result_zero_sign(result_zero_sign_d),	// Output Sign for zero result (registered)
190 | 	.fasu_op(fasu_op)			// Actual fasu operation output (registered)
191 | 	);
192 | 
193 | always @(posedge clk)
194 | 	sign_fasu_r <= sign_fasu;
195 | 
196 | pre_norm_fmul u2(
197 | 		.clk(clk),
198 | 		.fpu_op(fpu_op_r1),
199 | 		.opa(opa_r), .opb(opb_r),
200 | 		.fracta(fracta_mul),
201 | 		.fractb(fractb_mul),
202 | 		.exp_out(exp_mul),	// FMUL exponent output (registered)
203 | 		.sign(sign_mul),	// FMUL sign output (registered)
204 | 		.sign_exe(sign_exe),	// FMUL exception sign output (registered)
205 | 		.inf(inf_mul),		// FMUL inf output (registered)
206 | 		.exp_ovf(exp_ovf),	// FMUL exponnent overflow output (registered)
207 | 		.underflow(underflow_fmul_d)
208 | 		);
209 | 
210 | 
211 | always @(posedge clk)
212 | 	sign_mul_r <= sign_mul;
213 | 
214 | always @(posedge clk)
215 | 	sign_exe_r <= sign_exe;
216 | 
217 | always @(posedge clk)
218 | 	inf_mul_r <= inf_mul;
219 | 
220 | always @(posedge clk)
221 | 	exp_ovf_r <= exp_ovf;
222 | 
223 | 
224 | ////////////////////////////////////////////////////////////////////////
225 | //
226 | // Add/Sub
227 | //
228 | 
229 | add_sub27 u3(
230 | 	.add(fasu_op),			// Add/Sub
231 | 	.opa(fracta),			// Fraction A input
232 | 	.opb(fractb),			// Fraction B Input
233 | 	.sum(fract_out_d),		// SUM output
234 | 	.co(co_d) );			// Carry Output
235 | 
236 | always @(posedge clk)
237 | 	fract_out_q <= {co_d, fract_out_d};
238 | 
239 | ////////////////////////////////////////////////////////////////////////
240 | //
241 | // Mul
242 | //
243 | wire	[47:0]	prod;
244 | 
245 | mul_r2 u5(.clk(clk), .opa(fracta_mul), .opb(fractb_mul), .prod(prod));
246 | 
247 | ////////////////////////////////////////////////////////////////////////
248 | //
249 | // Divide
250 | //
251 | wire	[49:0]	quo;
252 | wire	[49:0]	fdiv_opa;
253 | wire	[49:0]	remainder;
254 | wire		remainder_00;
255 | reg	[4:0]	div_opa_ldz_d, div_opa_ldz_r1, div_opa_ldz_r2;
256 | 
257 | always @(fracta_mul)
258 | 	casez(fracta_mul[22:0])
259 | 	   23'b1??????????????????????: div_opa_ldz_d = 1;
260 | 	   23'b01?????????????????????: div_opa_ldz_d = 2;
261 | 	   23'b001????????????????????: div_opa_ldz_d = 3;
262 | 	   23'b0001???????????????????: div_opa_ldz_d = 4;
263 | 	   23'b00001??????????????????: div_opa_ldz_d = 5;
264 | 	   23'b000001?????????????????: div_opa_ldz_d = 6;
265 | 	   23'b0000001????????????????: div_opa_ldz_d = 7;
266 | 	   23'b00000001???????????????: div_opa_ldz_d = 8;
267 | 	   23'b000000001??????????????: div_opa_ldz_d = 9;
268 | 	   23'b0000000001?????????????: div_opa_ldz_d = 10;
269 | 	   23'b00000000001????????????: div_opa_ldz_d = 11;
270 | 	   23'b000000000001???????????: div_opa_ldz_d = 12;
271 | 	   23'b0000000000001??????????: div_opa_ldz_d = 13;
272 | 	   23'b00000000000001?????????: div_opa_ldz_d = 14;
273 | 	   23'b000000000000001????????: div_opa_ldz_d = 15;
274 | 	   23'b0000000000000001???????: div_opa_ldz_d = 16;
275 | 	   23'b00000000000000001??????: div_opa_ldz_d = 17;
276 | 	   23'b000000000000000001?????: div_opa_ldz_d = 18;
277 | 	   23'b0000000000000000001????: div_opa_ldz_d = 19;
278 | 	   23'b00000000000000000001???: div_opa_ldz_d = 20;
279 | 	   23'b000000000000000000001??: div_opa_ldz_d = 21;
280 | 	   23'b0000000000000000000001?: div_opa_ldz_d = 22;
281 | 	   23'b0000000000000000000000?: div_opa_ldz_d = 23;
282 | 	endcase
283 | 
284 | assign fdiv_opa = !(|opa_r[30:23]) ? {(fracta_mul<<div_opa_ldz_d), 26'h0} : {fracta_mul, 26'h0};
285 | 
286 | 
287 | div_r2 u6(.clk(clk), .opa(fdiv_opa), .opb(fractb_mul), .quo(quo), .rem(remainder));
288 | 
289 | assign remainder_00 = !(|remainder);
290 | 
291 | always @(posedge clk)
292 | 	div_opa_ldz_r1 <= div_opa_ldz_d;
293 | 
294 | always @(posedge clk)
295 | 	div_opa_ldz_r2 <= div_opa_ldz_r1;
296 | 
297 | 
298 | ////////////////////////////////////////////////////////////////////////
299 | //
300 | // Normalize Result
301 | //
302 | wire		ine_d;
303 | reg	[47:0]	fract_denorm;
304 | wire	[47:0]	fract_div;
305 | wire		sign_d;
306 | reg		sign;
307 | reg	[30:0]	opa_r1;
308 | reg	[47:0]	fract_i2f;
309 | reg		opas_r1, opas_r2;
310 | wire		f2i_out_sign;
311 | 
312 | always @(posedge clk)			// Exponent must be once cycle delayed
313 | 	case(fpu_op_r2)
314 | 	  0,1:	exp_r <= exp_fasu;
315 | 	  2,3:	exp_r <= exp_mul;
316 | 	  4:	exp_r <= 0;
317 | 	  5:	exp_r <= opa_r1[30:23];
318 | 	endcase
319 | 
320 | assign fract_div = (opb_dn ? quo[49:2] : {quo[26:0], 21'h0});
321 | 
322 | always @(posedge clk)
323 | 	opa_r1 <= opa_r[30:0];
324 | 
325 | always @(posedge clk)
326 | 	fract_i2f <= (fpu_op_r2==5) ?
327 | 			(sign_d ?  1-{24'h00, (|opa_r1[30:23]), opa_r1[22:0]}-1 : {24'h0, (|opa_r1[30:23]), opa_r1[22:0]}) :
328 | 			(sign_d ? 1 - {opa_r1, 17'h01} : {opa_r1, 17'h0});
329 | 
330 | always @(fpu_op_r3 or fract_out_q or prod or fract_div or fract_i2f)
331 | 	case(fpu_op_r3)
332 | 	   0,1:	fract_denorm = {fract_out_q, 20'h0};
333 | 	   2:	fract_denorm = prod;
334 | 	   3:	fract_denorm = fract_div;
335 | 	   4,5:	fract_denorm = fract_i2f;
336 | 	endcase
337 | 
338 | 
339 | always @(posedge clk)
340 | 	opas_r1 <= opa_r[31];
341 | 
342 | always @(posedge clk)
343 | 	opas_r2 <= opas_r1;
344 | 
345 | assign sign_d = fpu_op_r2[1] ? sign_mul : sign_fasu;
346 | 
347 | always @(posedge clk)
348 | 	sign <= (rmode_r2==2'h3) ? !sign_d : sign_d;
349 | 
350 | post_norm u4(.clk(clk),			// System Clock
351 | 	.fpu_op(fpu_op_r3),		// Floating Point Operation
352 | 	.opas(opas_r2),			// OPA Sign
353 | 	.sign(sign),			// Sign of the result
354 | 	.rmode(rmode_r3),		// Rounding mode
355 | 	.fract_in(fract_denorm),	// Fraction Input
356 | 	.exp_ovf(exp_ovf_r),		// Exponent Overflow
357 | 	.exp_in(exp_r),			// Exponent Input
358 | 	.opa_dn(opa_dn),		// Operand A Denormalized
359 | 	.opb_dn(opb_dn),		// Operand A Denormalized
360 | 	.rem_00(remainder_00),		// Diveide Remainder is zero
361 | 	.div_opa_ldz(div_opa_ldz_r2),	// Divide opa leading zeros count
362 | 	.output_zero(mul_00 | div_00),	// Force output to Zero
363 | 	.out(out_d),			// Normalized output (un-registered)
364 | 	.ine(ine_d),			// Result Inexact output (un-registered)
365 | 	.overflow(overflow_d),		// Overflow output (un-registered)
366 | 	.underflow(underflow_d),	// Underflow output (un-registered)
367 | 	.f2i_out_sign(f2i_out_sign)	// F2I Output Sign
368 | 	);
369 | 
370 | ////////////////////////////////////////////////////////////////////////
371 | //
372 | // FPU Outputs
373 | //
374 | reg		fasu_op_r1, fasu_op_r2;
375 | wire	[30:0]	out_fixed;
376 | wire		output_zero_fasu;
377 | wire		output_zero_fdiv;
378 | wire		output_zero_fmul;
379 | reg		inf_mul2;
380 | wire		overflow_fasu;
381 | wire		overflow_fmul;
382 | wire		overflow_fdiv;
383 | wire		inf_fmul;
384 | wire		sign_mul_final;
385 | wire		out_d_00;
386 | wire		sign_div_final;
387 | wire		ine_mul, ine_mula, ine_div, ine_fasu;
388 | wire		underflow_fasu, underflow_fmul, underflow_fdiv;
389 | wire		underflow_fmul1;
390 | reg	[2:0]	underflow_fmul_r;
391 | reg		opa_nan_r;
392 | 
393 | 
394 | always @(posedge clk)
395 | 	fasu_op_r1 <= fasu_op;
396 | 
397 | always @(posedge clk)
398 | 	fasu_op_r2 <= fasu_op_r1;
399 | 
400 | always @(posedge clk)
401 | 	inf_mul2 <= exp_mul == 8'hff;
402 | 
403 | 
404 | // Force pre-set values for non numerical output
405 | assign mul_inf = (fpu_op_r3==3'b010) & (inf_mul_r | inf_mul2) & (rmode_r3==2'h0);
406 | assign div_inf = (fpu_op_r3==3'b011) & (opb_00 | opa_inf);
407 | 
408 | assign mul_00 = (fpu_op_r3==3'b010) & (opa_00 | opb_00);
409 | assign div_00 = (fpu_op_r3==3'b011) & (opa_00 | opb_inf);
410 | 
411 | assign out_fixed = (	(qnan_d | snan_d) |
412 | 			(ind_d & !fasu_op_r2) | 
413 | 			((fpu_op_r3==3'b011) & opb_00 & opa_00) |
414 | 			(((opa_inf & opb_00) | (opb_inf & opa_00 )) & fpu_op_r3==3'b010)
415 | 		   )  ? QNAN : INF;
416 | 
417 | always @(posedge clk)
418 | 	out[30:0] <= (mul_inf | div_inf | (inf_d & (fpu_op_r3!=3'b011) & (fpu_op_r3!=3'b101)) | snan_d | qnan_d) & fpu_op_r3!=3'b100 ? out_fixed :
419 | 			out_d;
420 | 
421 | assign out_d_00 = !(|out_d);
422 | 
423 | assign sign_mul_final = (sign_exe_r & ((opa_00 & opb_inf) | (opb_00 & opa_inf))) ? !sign_mul_r : sign_mul_r;
424 | assign sign_div_final = (sign_exe_r & (opa_inf & opb_inf)) ? !sign_mul_r : sign_mul_r | (opa_00 & opb_00);
425 | 
426 | always @(posedge clk)
427 | 	out[31] <=	((fpu_op_r3==3'b101) & out_d_00) ? (f2i_out_sign & !(qnan_d | snan_d) ) :
428 | 			((fpu_op_r3==3'b010) & !(snan_d | qnan_d)) ?	sign_mul_final :
429 | 			((fpu_op_r3==3'b011) & !(snan_d | qnan_d)) ?	sign_div_final :
430 | 			(snan_d | qnan_d | ind_d) ?			nan_sign_d :
431 | 			output_zero_fasu ?				result_zero_sign_d :
432 | 									sign_fasu_r;
433 | 
434 | // Exception Outputs
435 | assign ine_mula = ((inf_mul_r |  inf_mul2 | opa_inf | opb_inf) & (rmode_r3==2'h1) & 
436 | 		!((opa_inf & opb_00) | (opb_inf & opa_00 )) & fpu_op_r3[1]);
437 | 
438 | assign ine_mul  = (ine_mula | ine_d | inf_fmul | out_d_00 | overflow_d | underflow_d) &
439 | 		  !opa_00 & !opb_00 & !(snan_d | qnan_d | inf_d);
440 | assign ine_div  = (ine_d | overflow_d | underflow_d) & !(opb_00 | snan_d | qnan_d | inf_d);
441 | assign ine_fasu = (ine_d | overflow_d | underflow_d) & !(snan_d | qnan_d | inf_d);
442 | 
443 | always @(posedge  clk)
444 | 	ine <=	 fpu_op_r3[2] ? ine_d :
445 | 			!fpu_op_r3[1] ? ine_fasu :
446 | 			 fpu_op_r3[0] ? ine_div  : ine_mul;
447 | 
448 | 
449 | assign overflow_fasu = overflow_d & !(snan_d | qnan_d | inf_d);
450 | assign overflow_fmul = !inf_d & (inf_mul_r | inf_mul2 | overflow_d) & !(snan_d | qnan_d);
451 | assign overflow_fdiv = (overflow_d & !(opb_00 | inf_d | snan_d | qnan_d));
452 | 
453 | always @(posedge clk)
454 | 	overflow <=	 fpu_op_r3[2] ? 0 :
455 | 			!fpu_op_r3[1] ? overflow_fasu :
456 | 			 fpu_op_r3[0] ? overflow_fdiv : overflow_fmul;
457 | 
458 | always @(posedge clk)
459 | 	underflow_fmul_r <= underflow_fmul_d;
460 | 
461 | 
462 | assign underflow_fmul1 = underflow_fmul_r[0] |
463 | 			(underflow_fmul_r[1] & underflow_d ) |
464 | 			((opa_dn | opb_dn) & out_d_00 & (prod!=0) & sign) |
465 | 			(underflow_fmul_r[2] & ((out_d[30:23]==0) | (out_d[22:0]==0)));
466 | 
467 | assign underflow_fasu = underflow_d & !(inf_d | snan_d | qnan_d);
468 | assign underflow_fmul = underflow_fmul1 & !(snan_d | qnan_d | inf_mul_r);
469 | assign underflow_fdiv = underflow_fasu & !opb_00;
470 | 
471 | always @(posedge clk)
472 | 	underflow <=  fpu_op_r3[2] ? 0 :
473 | 			!fpu_op_r3[1] ? underflow_fasu :
474 | 			 fpu_op_r3[0] ? underflow_fdiv : underflow_fmul;
475 | 
476 | always @(posedge clk)
477 | 	snan <= snan_d;
478 | 
479 | 
480 | 
481 | 
482 | // Status Outputs
483 | always @(posedge clk)
484 | 	qnan <=	fpu_op_r3[2] ? 0 : (
485 | 						snan_d | qnan_d | (ind_d & !fasu_op_r2) |
486 | 						(opa_00 & opb_00 & fpu_op_r3==3'b011) |
487 | 						(((opa_inf & opb_00) | (opb_inf & opa_00 )) & fpu_op_r3==3'b010)
488 | 					   );
489 | 
490 | assign inf_fmul = 	(((inf_mul_r | inf_mul2) & (rmode_r3==2'h0)) | opa_inf | opb_inf) & 
491 | 			!((opa_inf & opb_00) | (opb_inf & opa_00 )) &
492 | 			fpu_op_r3==3'b010;
493 | 
494 | always @(posedge clk)
495 | 	inf <=	fpu_op_r3[2] ? 0 :
496 | 			(!(qnan_d | snan_d) & (
497 | 						((&out_d[30:23]) & !(|out_d[22:0]) & !(opb_00 & fpu_op_r3==3'b011)) |
498 | 						(inf_d & !(ind_d & !fasu_op_r2) & !fpu_op_r3[1]) |
499 | 						inf_fmul |
500 | 						(!opa_00 & opb_00 & fpu_op_r3==3'b011) |
501 | 						(fpu_op_r3==3'b011 & opa_inf & !opb_inf)
502 | 					      )
503 | 			);
504 | 
505 | assign output_zero_fasu = out_d_00 & !(inf_d | snan_d | qnan_d);
506 | assign output_zero_fdiv = (div_00 | (out_d_00 & !opb_00)) & !(opa_inf & opb_inf) &
507 | 			  !(opa_00 & opb_00) & !(qnan_d | snan_d);
508 | assign output_zero_fmul = (out_d_00 | opa_00 | opb_00) &
509 | 			  !(inf_mul_r | inf_mul2 | opa_inf | opb_inf | snan_d | qnan_d) &
510 | 			  !(opa_inf & opb_00) & !(opb_inf & opa_00);
511 | 
512 | always @(posedge clk)
513 | 	zero <=	fpu_op_r3==3'b101 ?	out_d_00 & !(snan_d | qnan_d):
514 | 			fpu_op_r3==3'b011 ?	output_zero_fdiv :
515 | 			fpu_op_r3==3'b010 ?	output_zero_fmul :
516 | 						output_zero_fasu ;
517 | 
518 | always @(posedge clk)
519 | 	opa_nan_r <= !opa_nan & fpu_op_r2==3'b011;
520 | 
521 | always @(posedge clk)
522 | 	div_by_zero <= opa_nan_r & !opa_00 & !opa_inf & opb_00;
523 | 
524 | endmodule
525 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/opencores/pre_norm.v:
--------------------------------------------------------------------------------
  1 | /////////////////////////////////////////////////////////////////////
  2 | ////                                                             ////
  3 | ////  Pre Normalize                                              ////
  4 | ////  Pre Normalization Unit for Add/Sub Operations              ////
  5 | ////                                                             ////
  6 | ////  Author: Rudolf Usselmann                                   ////
  7 | ////          rudi@asics.ws                                      ////
  8 | ////                                                             ////
  9 | /////////////////////////////////////////////////////////////////////
 10 | ////                                                             ////
 11 | //// Copyright (C) 2000 Rudolf Usselmann                         ////
 12 | ////                    rudi@asics.ws                            ////
 13 | ////                                                             ////
 14 | //// This source file may be used and distributed without        ////
 15 | //// restriction provided that this copyright statement is not   ////
 16 | //// removed from the file and that any derivative work contains ////
 17 | //// the original copyright notice and the associated disclaimer.////
 18 | ////                                                             ////
 19 | ////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
 28 | //// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
 31 | //// POSSIBILITY OF SUCH DAMAGE.                                 ////
 32 | ////                                                             ////
 33 | /////////////////////////////////////////////////////////////////////
 34 | 
 35 | `timescale 1ns / 100ps
 36 | 
 37 | 
 38 | module pre_norm(clk, rmode, add, opa, opb, opa_nan, opb_nan, fracta_out,
 39 | 		fractb_out, exp_dn_out, sign, nan_sign, result_zero_sign,
 40 | 		fasu_op);
 41 | input		clk;
 42 | input	[1:0]	rmode;
 43 | input		add;
 44 | input	[31:0]	opa, opb;
 45 | input		opa_nan, opb_nan;
 46 | output	[26:0]	fracta_out, fractb_out;
 47 | output	[7:0]	exp_dn_out;
 48 | output		sign;
 49 | output		nan_sign, result_zero_sign;
 50 | output		fasu_op;			// Operation Output
 51 | 
 52 | ////////////////////////////////////////////////////////////////////////
 53 | //
 54 | // Local Wires and registers
 55 | //
 56 | 
 57 | wire		signa, signb;		// alias to opX sign
 58 | wire	[7:0]	expa, expb;		// alias to opX exponent
 59 | wire	[22:0]	fracta, fractb;		// alias to opX fraction
 60 | wire		expa_lt_expb;		// expa is larger than expb indicator
 61 | wire		fractb_lt_fracta;	// fractb is larger than fracta indicator
 62 | reg	[7:0]	exp_dn_out;		// de normalized exponent output
 63 | wire	[7:0]	exp_small, exp_large;
 64 | wire	[7:0]	exp_diff;		// Numeric difference of the two exponents
 65 | wire	[22:0]	adj_op;			// Fraction adjustment: input
 66 | wire	[26:0]	adj_op_tmp;
 67 | wire	[26:0]	adj_op_out;		// Fraction adjustment: output
 68 | wire	[26:0]	fracta_n, fractb_n;	// Fraction selection after normalizing
 69 | wire	[26:0]	fracta_s, fractb_s;	// Fraction Sorting out
 70 | reg	[26:0]	fracta_out, fractb_out;	// Fraction Output
 71 | reg		sign, sign_d;		// Sign Output
 72 | reg		add_d;			// operation (add/sub)
 73 | reg		fasu_op;		// operation (add/sub) register
 74 | wire		expa_dn, expb_dn;
 75 | reg		sticky;
 76 | reg		result_zero_sign;
 77 | reg		add_r, signa_r, signb_r;
 78 | wire	[4:0]	exp_diff_sft;
 79 | wire		exp_lt_27;
 80 | wire		op_dn;
 81 | wire	[26:0]	adj_op_out_sft;
 82 | reg		fracta_lt_fractb, fracta_eq_fractb;
 83 | wire		nan_sign1;
 84 | reg		nan_sign;
 85 | 
 86 | ////////////////////////////////////////////////////////////////////////
 87 | //
 88 | // Aliases
 89 | //
 90 | 
 91 | assign  signa = opa[31];
 92 | assign  signb = opb[31];
 93 | assign   expa = opa[30:23];
 94 | assign   expb = opb[30:23];
 95 | assign fracta = opa[22:0];
 96 | assign fractb = opb[22:0];
 97 | 
 98 | ////////////////////////////////////////////////////////////////////////
 99 | //
100 | // Pre-Normalize exponents (and fractions)
101 | //
102 | 
103 | assign expa_lt_expb = expa > expb;		// expa is larger than expb
104 | 
105 | // ---------------------------------------------------------------------
106 | // Normalize
107 | 
108 | assign expa_dn = !(|expa);			// opa denormalized
109 | assign expb_dn = !(|expb);			// opb denormalized
110 | 
111 | // ---------------------------------------------------------------------
112 | // Calculate the difference between the smaller and larger exponent
113 | 
114 | wire	[7:0]	exp_diff1, exp_diff1a, exp_diff2;
115 | 
116 | assign exp_small  = expa_lt_expb ? expb : expa;
117 | assign exp_large  = expa_lt_expb ? expa : expb;
118 | assign exp_diff1  = exp_large - exp_small;
119 | assign exp_diff1a = exp_diff1-1;
120 | assign exp_diff2  = (expa_dn | expb_dn) ? exp_diff1a : exp_diff1;
121 | assign  exp_diff  = (expa_dn & expb_dn) ? 8'h0 : exp_diff2;
122 | 
123 | always @(posedge clk)	// If numbers are equal we should return zero
124 | 	exp_dn_out <= (!add_d & expa==expb & fracta==fractb) ? 8'h0 : exp_large;
125 | 
126 | // ---------------------------------------------------------------------
127 | // Adjust the smaller fraction
128 | 
129 | 
130 | assign op_dn	  = expa_lt_expb ? expb_dn : expa_dn;
131 | assign adj_op     = expa_lt_expb ? fractb : fracta;
132 | assign adj_op_tmp = { ~op_dn, adj_op, 3'b0 };	// recover hidden bit (op_dn) 
133 | 
134 | // adj_op_out is 27 bits wide, so can only be shifted 27 bits to the right
135 | assign exp_lt_27	= exp_diff  > 8'd27;
136 | assign exp_diff_sft	= exp_lt_27 ? 5'd27 : exp_diff[4:0];
137 | assign adj_op_out_sft	= adj_op_tmp >> exp_diff_sft;
138 | assign adj_op_out	= {adj_op_out_sft[26:1], adj_op_out_sft[0] | sticky };
139 | 
140 | // ---------------------------------------------------------------------
141 | // Get truncated portion (sticky bit)
142 | 
143 | always @(exp_diff_sft or adj_op_tmp)
144 |    case(exp_diff_sft)		// synopsys full_case parallel_case
145 | 	00: sticky = 1'h0;
146 | 	01: sticky =  adj_op_tmp[0]; 
147 | 	02: sticky = |adj_op_tmp[01:0];
148 | 	03: sticky = |adj_op_tmp[02:0];
149 | 	04: sticky = |adj_op_tmp[03:0];
150 | 	05: sticky = |adj_op_tmp[04:0];
151 | 	06: sticky = |adj_op_tmp[05:0];
152 | 	07: sticky = |adj_op_tmp[06:0];
153 | 	08: sticky = |adj_op_tmp[07:0];
154 | 	09: sticky = |adj_op_tmp[08:0];
155 | 	10: sticky = |adj_op_tmp[09:0];
156 | 	11: sticky = |adj_op_tmp[10:0];
157 | 	12: sticky = |adj_op_tmp[11:0];
158 | 	13: sticky = |adj_op_tmp[12:0];
159 | 	14: sticky = |adj_op_tmp[13:0];
160 | 	15: sticky = |adj_op_tmp[14:0];
161 | 	16: sticky = |adj_op_tmp[15:0];
162 | 	17: sticky = |adj_op_tmp[16:0];
163 | 	18: sticky = |adj_op_tmp[17:0];
164 | 	19: sticky = |adj_op_tmp[18:0];
165 | 	20: sticky = |adj_op_tmp[19:0];
166 | 	21: sticky = |adj_op_tmp[20:0];
167 | 	22: sticky = |adj_op_tmp[21:0];
168 | 	23: sticky = |adj_op_tmp[22:0];
169 | 	24: sticky = |adj_op_tmp[23:0];
170 | 	25: sticky = |adj_op_tmp[24:0];
171 | 	26: sticky = |adj_op_tmp[25:0];
172 | 	27: sticky = |adj_op_tmp[26:0];
173 |    endcase
174 | 
175 | // ---------------------------------------------------------------------
176 | // Select operands for add/sub (recover hidden bit)
177 | 
178 | assign fracta_n = expa_lt_expb ? {~expa_dn, fracta, 3'b0} : adj_op_out;
179 | assign fractb_n = expa_lt_expb ? adj_op_out : {~expb_dn, fractb, 3'b0};
180 | 
181 | // ---------------------------------------------------------------------
182 | // Sort operands (for sub only)
183 | 
184 | assign fractb_lt_fracta = fractb_n > fracta_n;	// fractb is larger than fracta
185 | assign fracta_s = fractb_lt_fracta ? fractb_n : fracta_n;
186 | assign fractb_s = fractb_lt_fracta ? fracta_n : fractb_n;
187 | 
188 | always @(posedge clk)
189 | 	fracta_out <= fracta_s;
190 | 
191 | always @(posedge clk)
192 | 	fractb_out <= fractb_s;
193 | 
194 | // ---------------------------------------------------------------------
195 | // Determine sign for the output
196 | 
197 | // sign: 0=Positive Number; 1=Negative Number
198 | always @(signa or signb or add or fractb_lt_fracta)
199 |    case({signa, signb, add})		// synopsys full_case parallel_case
200 | 
201 |    	// Add
202 | 	3'b0_0_1: sign_d = 0;
203 | 	3'b0_1_1: sign_d = fractb_lt_fracta;
204 | 	3'b1_0_1: sign_d = !fractb_lt_fracta;
205 | 	3'b1_1_1: sign_d = 1;
206 | 
207 | 	// Sub
208 | 	3'b0_0_0: sign_d = fractb_lt_fracta;
209 | 	3'b0_1_0: sign_d = 0;
210 | 	3'b1_0_0: sign_d = 1;
211 | 	3'b1_1_0: sign_d = !fractb_lt_fracta;
212 |    endcase
213 | 
214 | always @(posedge clk)
215 | 	sign <= sign_d;
216 | 
217 | // Fix sign for ZERO result
218 | always @(posedge clk)
219 | 	signa_r <= signa;
220 | 
221 | always @(posedge clk)
222 | 	signb_r <= signb;
223 | 
224 | always @(posedge clk)
225 | 	add_r <= add;
226 | 
227 | always @(posedge clk)
228 | 	result_zero_sign <=	( add_r &  signa_r &  signb_r) |
229 | 				(!add_r &  signa_r & !signb_r) |
230 | 				( add_r & (signa_r |  signb_r) & (rmode==3)) |
231 | 				(!add_r & (signa_r == signb_r) & (rmode==3));
232 | 
233 | // Fix sign for NAN result
234 | always @(posedge clk)
235 | 	fracta_lt_fractb <= fracta < fractb;
236 | 
237 | always @(posedge clk)
238 | 	fracta_eq_fractb <= fracta == fractb;
239 | 
240 | assign nan_sign1 = fracta_eq_fractb ? (signa_r & signb_r) : fracta_lt_fractb ? signb_r : signa_r;
241 | 
242 | always @(posedge clk)
243 | 	nan_sign <= (opa_nan & opb_nan) ? nan_sign1 : opb_nan ? signb_r : signa_r;
244 | 
245 | ////////////////////////////////////////////////////////////////////////
246 | //
247 | // Decode Add/Sub operation
248 | //
249 | 
250 | // add: 1=Add; 0=Subtract
251 | always @(signa or signb or add)
252 |    case({signa, signb, add})		// synopsys full_case parallel_case
253 |    
254 |    	// Add
255 | 	3'b0_0_1: add_d = 1;
256 | 	3'b0_1_1: add_d = 0;
257 | 	3'b1_0_1: add_d = 0;
258 | 	3'b1_1_1: add_d = 1;
259 | 	
260 | 	// Sub
261 | 	3'b0_0_0: add_d = 0;
262 | 	3'b0_1_0: add_d = 1;
263 | 	3'b1_0_0: add_d = 1;
264 | 	3'b1_1_0: add_d = 0;
265 |    endcase
266 | 
267 | always @(posedge clk)
268 | 	fasu_op <= add_d;
269 | 
270 | endmodule
271 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/opencores/pre_norm_fmul.v:
--------------------------------------------------------------------------------
  1 | /////////////////////////////////////////////////////////////////////
  2 | ////                                                             ////
  3 | ////  Pre Normalize                                              ////
  4 | ////  Floating Point Pre Normalization Unit for FMUL             ////
  5 | ////                                                             ////
  6 | ////  Author: Rudolf Usselmann                                   ////
  7 | ////          rudi@asics.ws                                      ////
  8 | ////                                                             ////
  9 | /////////////////////////////////////////////////////////////////////
 10 | ////                                                             ////
 11 | //// Copyright (C) 2000 Rudolf Usselmann                         ////
 12 | ////                    rudi@asics.ws                            ////
 13 | ////                                                             ////
 14 | //// This source file may be used and distributed without        ////
 15 | //// restriction provided that this copyright statement is not   ////
 16 | //// removed from the file and that any derivative work contains ////
 17 | //// the original copyright notice and the associated disclaimer.////
 18 | ////                                                             ////
 19 | ////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
 28 | //// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
 31 | //// POSSIBILITY OF SUCH DAMAGE.                                 ////
 32 | ////                                                             ////
 33 | /////////////////////////////////////////////////////////////////////
 34 | 
 35 | `timescale 1ns / 100ps
 36 | 
 37 | module pre_norm_fmul(clk, fpu_op, opa, opb, fracta, fractb, exp_out, sign,
 38 | 		sign_exe, inf, exp_ovf, underflow);
 39 | input		clk;
 40 | input	[2:0]	fpu_op;
 41 | input	[31:0]	opa, opb;
 42 | output	[23:0]	fracta, fractb;
 43 | output	[7:0]	exp_out;
 44 | output		sign, sign_exe;
 45 | output		inf;
 46 | output	[1:0]	exp_ovf;
 47 | output	[2:0]	underflow;
 48 | 
 49 | ////////////////////////////////////////////////////////////////////////
 50 | //
 51 | // Local Wires and registers
 52 | //
 53 | 
 54 | reg	[7:0]	exp_out;
 55 | wire		signa, signb;
 56 | reg		sign, sign_d;
 57 | reg		sign_exe;
 58 | reg		inf;
 59 | wire	[1:0]	exp_ovf_d;
 60 | reg	[1:0]	exp_ovf;
 61 | wire	[7:0]	expa, expb;
 62 | wire	[7:0]	exp_tmp1, exp_tmp2;
 63 | wire		co1, co2;
 64 | wire		expa_dn, expb_dn;
 65 | wire	[7:0]	exp_out_a;
 66 | wire		opa_00, opb_00, fracta_00, fractb_00;
 67 | wire	[7:0]	exp_tmp3, exp_tmp4, exp_tmp5;
 68 | wire	[2:0]	underflow_d;
 69 | reg	[2:0]	underflow;
 70 | wire		op_div = (fpu_op == 3'b011);
 71 | wire	[7:0]	exp_out_mul, exp_out_div;
 72 | 
 73 | ////////////////////////////////////////////////////////////////////////
 74 | //
 75 | // Aliases
 76 | //
 77 | 
 78 | assign  signa = opa[31];
 79 | assign  signb = opb[31];
 80 | assign   expa = opa[30:23];
 81 | assign   expb = opb[30:23];
 82 | 
 83 | ////////////////////////////////////////////////////////////////////////
 84 | //
 85 | // Calculate Exponenet
 86 | //
 87 | 
 88 | assign expa_dn   = !(|expa);
 89 | assign expb_dn   = !(|expb);
 90 | assign opa_00    = !(|opa[30:0]);
 91 | assign opb_00    = !(|opb[30:0]);
 92 | assign fracta_00 = !(|opa[22:0]);
 93 | assign fractb_00 = !(|opb[22:0]);
 94 | 
 95 | assign fracta = {!expa_dn,opa[22:0]};	// Recover hidden bit
 96 | assign fractb = {!expb_dn,opb[22:0]};	// Recover hidden bit
 97 | 
 98 | assign {co1,exp_tmp1} = op_div ? (expa - expb)            : (expa + expb);
 99 | assign {co2,exp_tmp2} = op_div ? ({co1,exp_tmp1} + 8'h7f) : ({co1,exp_tmp1} - 8'h7f);
100 | 
101 | assign exp_tmp3 = exp_tmp2 + 1;
102 | assign exp_tmp4 = 8'h7f - exp_tmp1;
103 | assign exp_tmp5 = op_div ? (exp_tmp4+1) : (exp_tmp4-1);
104 | 
105 | 
106 | always@(posedge clk)
107 | 	exp_out <= op_div ? exp_out_div : exp_out_mul;
108 | 
109 | assign exp_out_div = (expa_dn | expb_dn) ? (co2 ? exp_tmp5 : exp_tmp3 ) : co2 ? exp_tmp4 : exp_tmp2;
110 | assign exp_out_mul = exp_ovf_d[1] ? exp_out_a : (expa_dn | expb_dn) ? exp_tmp3 : exp_tmp2;
111 | assign exp_out_a   = (expa_dn | expb_dn) ? exp_tmp5 : exp_tmp4;
112 | assign exp_ovf_d[0] = op_div ? (expa[7] & !expb[7]) : (co2 & expa[7] & expb[7]);
113 | assign exp_ovf_d[1] = op_div ? co2                  : ((!expa[7] & !expb[7] & exp_tmp2[7]) | co2);
114 | 
115 | always @(posedge clk)
116 | 	exp_ovf <= exp_ovf_d;
117 | 
118 | assign underflow_d[0] =	(exp_tmp1 < 8'h7f) & !co1 & !(opa_00 | opb_00 | expa_dn | expb_dn);
119 | assign underflow_d[1] =	((expa[7] | expb[7]) & !opa_00 & !opb_00) |
120 | 			 (expa_dn & !fracta_00) | (expb_dn & !fractb_00);
121 | assign underflow_d[2] =	 !opa_00 & !opb_00 & (exp_tmp1 == 8'h7f);
122 | 
123 | always @(posedge clk)
124 | 	underflow <= underflow_d;
125 | 
126 | always @(posedge clk)
127 | 	inf <= op_div ? (expb_dn & !expa[7]) : ({co1,exp_tmp1} > 9'h17e) ;
128 | 
129 | 
130 | ////////////////////////////////////////////////////////////////////////
131 | //
132 | // Determine sign for the output
133 | //
134 | 
135 | // sign: 0=Posetive Number; 1=Negative Number
136 | always @(signa or signb)
137 |    case({signa, signb})		// synopsys full_case parallel_case
138 | 	2'b0_0: sign_d = 0;
139 | 	2'b0_1: sign_d = 1;
140 | 	2'b1_0: sign_d = 1;
141 | 	2'b1_1: sign_d = 0;
142 |    endcase
143 | 
144 | always @(posedge clk)
145 | 	sign <= sign_d;
146 | 
147 | always @(posedge clk)
148 | 	sign_exe <= signa & signb;
149 | 
150 | endmodule


--------------------------------------------------------------------------------
/fpu-wrappers/resources/opencores/primitives.v:
--------------------------------------------------------------------------------
  1 | /////////////////////////////////////////////////////////////////////
  2 | ////                                                             ////
  3 | ////  Primitives                                                 ////
  4 | ////  FPU Primitives                                             ////
  5 | ////                                                             ////
  6 | ////  Author: Rudolf Usselmann                                   ////
  7 | ////          rudi@asics.ws                                      ////
  8 | ////                                                             ////
  9 | /////////////////////////////////////////////////////////////////////
 10 | ////                                                             ////
 11 | //// Copyright (C) 2000 Rudolf Usselmann                         ////
 12 | ////                    rudi@asics.ws                            ////
 13 | ////                                                             ////
 14 | //// This source file may be used and distributed without        ////
 15 | //// restriction provided that this copyright statement is not   ////
 16 | //// removed from the file and that any derivative work contains ////
 17 | //// the original copyright notice and the associated disclaimer.////
 18 | ////                                                             ////
 19 | ////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
 20 | //// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
 21 | //// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
 22 | //// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
 23 | //// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
 24 | //// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
 25 | //// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
 26 | //// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
 27 | //// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
 28 | //// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
 29 | //// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
 30 | //// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
 31 | //// POSSIBILITY OF SUCH DAMAGE.                                 ////
 32 | ////                                                             ////
 33 | /////////////////////////////////////////////////////////////////////
 34 | 
 35 | 
 36 | `timescale 1ns / 100ps
 37 | 
 38 | 
 39 | ////////////////////////////////////////////////////////////////////////
 40 | //
 41 | // Add/Sub
 42 | //
 43 | 
 44 | module add_sub27(add, opa, opb, sum, co);
 45 | input		add;
 46 | input	[26:0]	opa, opb;
 47 | output	[26:0]	sum;
 48 | output		co;
 49 | 
 50 | 
 51 | 
 52 | assign {co, sum} = add ? (opa + opb) : (opa - opb);
 53 | 
 54 | endmodule
 55 | 
 56 | ////////////////////////////////////////////////////////////////////////
 57 | //
 58 | // Multiply
 59 | //
 60 | 
 61 | module mul_r2(clk, opa, opb, prod);
 62 | input		clk;
 63 | input	[23:0]	opa, opb;
 64 | output	[47:0]	prod;
 65 | 
 66 | reg	[47:0]	prod1, prod;
 67 | 
 68 | always @(posedge clk)
 69 | 	prod1 <= opa * opb;
 70 | 
 71 | always @(posedge clk)
 72 | 	prod <= prod1;
 73 | 
 74 | endmodule
 75 | 
 76 | ////////////////////////////////////////////////////////////////////////
 77 | //
 78 | // Divide
 79 | //
 80 | 
 81 | module div_r2(clk, opa, opb, quo, rem);
 82 | input		clk;
 83 | input	[49:0]	opa;
 84 | input	[23:0]	opb;
 85 | output	[49:0]	quo, rem;
 86 | 
 87 | reg	[49:0]	quo, rem, quo1, remainder;
 88 | 
 89 | always @(posedge clk)
 90 | 	quo1 <= opa / opb;
 91 | 
 92 | always @(posedge clk)
 93 | 	quo <= quo1;
 94 | 
 95 | always @(posedge clk)
 96 | 	remainder <= opa % opb;
 97 | 
 98 | always @(posedge clk)
 99 | 	rem <= remainder;
100 | 
101 | endmodule
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/fpu-wrappers/resources/syn.tcl:
--------------------------------------------------------------------------------
 1 | # usage: dc_shell -f syn.tcl
 2 | 
 3 | # load library if config exists
 4 | set rc [file exist ~/library.tcl]
 5 | if {$rc == 1} {
 6 | 	source ~/library.tcl
 7 | }
 8 | 
 9 | # args
10 | set input_verilog [list INPUT_VERILOG]
11 | set input_systemverilog [list INPUT_SYSTEMVERILOG]
12 | set input_vhdl [list INPUT_VHDL]
13 | set toplevel_name TOPLEVEL_NAME
14 | 
15 | # load design
16 | read_file -format verilog $input_verilog
17 | read_file -format sverilog $input_systemverilog
18 | read_file -format vhdl $input_vhdl
19 | # check module exists
20 | set rc [llength [get_designs -exact $toplevel_name]]
21 | if {$rc == 0} {
22 | 	quit
23 | }
24 | current_design $toplevel_name
25 | 
26 | # setup
27 | set_host_options -max_cores 16
28 | 
29 | # timing
30 | # 1GHz clock
31 | create_clock clock -period 1.0000
32 | create_clock clk -period 1.0000
33 | # dff clock to output: 0.14ns
34 | # assume all input comes from output of dff
35 | set_input_delay 0.14 -clock clock [all_inputs]
36 | set_input_delay 0.14 -clock clk [all_inputs]
37 | # dff setup time: 0.02ns
38 | # assume all output goes to dff
39 | set_output_delay 0.02 -clock clock [all_outputs]
40 | set_output_delay 0.02 -clock clk [all_outputs]
41 | 
42 | # synthesis flow
43 | link
44 | uniquify
45 | ungroup -flatten -all
46 | set_optimize_registers
47 | compile_ultra
48 | 
49 | # export
50 | write -format ddc -hierarchy -output [format "%s%s" $toplevel_name ".ddc"]
51 | write_sdf -version 1.0 [format "%s%s" $toplevel_name ".sdf"]
52 | write -format verilog -hierarchy -output [format "%s%s" $toplevel_name ".syn.v"]
53 | write_sdc [format "%s%s" $toplevel_name ".sdc"]
54 | 
55 | # reports
56 | check_timing > ${toplevel_name}_check_timing.txt
57 | check_design > ${toplevel_name}_check_design.txt
58 | report_design > ${toplevel_name}_report_design.txt
59 | report_area -hierarchy > ${toplevel_name}_report_area.txt
60 | report_power -hierarchy > ${toplevel_name}_report_power.txt
61 | report_cell > ${toplevel_name}_report_cell.txt
62 | report_timing -delay_type max -max_paths 5 > ${toplevel_name}_report_timing_setup.txt
63 | report_timing -delay_type min -max_paths 5 > ${toplevel_name}_report_timing_hold.txt
64 | report_constraint -all_violators > ${toplevel_name}_report_constraint.txt
65 | report_qor > ${toplevel_name}_report_qor.txt
66 | 
67 | # quit
68 | quit
69 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/Mul.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper
 2 | 
 3 | import spinal.core._
 4 | import spinal.lib._
 5 | 
 6 | /** An integer multiplier
 7 |   *
 8 |   * @param bitWidth
 9 |   *   the bit width of integer
10 |   * @param stages
11 |   *   pipeline stages
12 |   */
13 | class Mul(bitWidth: Int, stages: Int) extends Component {
14 |   val a = in(UInt(bitWidth bits))
15 |   val b = in(UInt(bitWidth bits))
16 |   val c = out(UInt(2 * bitWidth bits))
17 | 
18 |   c := Delay(a * b, stages)
19 | }
20 | 
21 | /** Generate Mul module
22 |   */
23 | object Mul extends SpinalEmitVerilog {
24 |   for (width <- Seq(8, 16, 32)) {
25 |     for (stages <- Seq(0, 1, 2)) {
26 |       work(new Mul(width, stages), s"Mul_${width}w${stages}s")
27 |     }
28 |   }
29 | }
30 | 
31 | /** Synthesize Mul
32 |   */
33 | object MulSynth extends SpinalEmitVerilog {
34 |   for (width <- Seq(11, 24, 53)) {
35 |     for (stages <- Seq(0, 1, 2)) {
36 |       work(new Mul(width, stages), s"Mul_${width}w${stages}s")
37 |       val name = s"Mul_${width}w${stages}s"
38 |       Synthesis.build(
39 |         Seq(
40 |           s"${name}.v"
41 |         ),
42 |         s"Mul",
43 |         name
44 |       )
45 |     }
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/bench.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper
 2 | 
 3 | import spinal.core._
 4 | import spinal.lib.eda.bench._
 5 | import spinal.lib.eda.xilinx.VivadoFlow
 6 | 
 7 | import scala.collection.mutable.ArrayBuffer
 8 | 
 9 | /** Benchmark with Vivado
10 |   */
11 | trait VivadoBench extends App {
12 |   def bench(name: String, paths: Seq[String], topModuleName: String) = {
13 |     val targets = ArrayBuffer[Target]()
14 |     val vivadoPath = "/opt/Xilinx/Vivado/2020.2/bin"
15 | 
16 |     for (
17 |       (family, device) <- Seq(
18 |         ("Kintex 7", "xc7k325tffg900-3"),
19 |         ("Virtex UltraScale+", "xcvu37p-fsvh2892-3-e")
20 |       )
21 |     ) {
22 |       for (
23 |         (freq, name) <- Seq(
24 |           (50 MHz, "area"),
25 |           (400 MHz, "fmax")
26 |         )
27 |       ) {
28 |         targets += new Target {
29 |           override def getFamilyName(): String = family
30 |           override def synthesise(rtl: Rtl, workspace: String): Report = {
31 |             VivadoFlow(
32 |               frequencyTarget = freq,
33 |               vivadoPath = vivadoPath,
34 |               workspacePath = s"${workspace}_${name}",
35 |               rtl = rtl,
36 |               family = getFamilyName(),
37 |               device = device
38 |             )
39 |           }
40 |         }
41 |       }
42 |     }
43 | 
44 |     Bench(
45 |       Seq(new Rtl {
46 |         override def getName(): String = name
47 |         override def getRtlPaths(): Seq[String] = paths
48 |         override def getTopModuleName(): String = topModuleName
49 |       }),
50 |       targets,
51 |       "/tmp/"
52 |     )
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/common.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper
 2 | 
 3 | import java.nio.file.Files
 4 | import java.nio.file.Paths
 5 | import java.nio.file.StandardCopyOption
 6 | 
 7 | /** Helper class to get resource
 8 |   */
 9 | object Resource {
10 |   def path(name: String) = {
11 |     val tmp = Paths.get(
12 |       System.getProperty("java.io.tmpdir"),
13 |       System.getProperty("user.name"),
14 |       "resource"
15 |     );
16 |     tmp.toFile().mkdirs()
17 |     val path = tmp.resolve(Paths.get(name).getFileName())
18 | 
19 |     val is = getClass().getResourceAsStream(name)
20 |     Files.copy(is, path, StandardCopyOption.REPLACE_EXISTING)
21 |     path.toFile().getAbsolutePath()
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/emit.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper
  2 | 
  3 | import chisel3._
  4 | import chisel3.stage.ChiselGeneratorAnnotation
  5 | import chisel3.experimental.ChiselAnnotation
  6 | import circt.stage.FirtoolOption
  7 | import circt.stage.ChiselStage
  8 | import _root_.sifive.enterprise.firrtl.NestedPrefixModulesAnnotation
  9 | import chisel3.experimental.annotate
 10 | 
 11 | /** Helper to add prefix
 12 |   */
 13 | object AddPrefix {
 14 |   def apply(module: Module, prefix: String, inclusive: Boolean = true) = {
 15 |     if (prefix != null && prefix != "") {
 16 |       annotate(new ChiselAnnotation {
 17 |         def toFirrtl =
 18 |           new NestedPrefixModulesAnnotation(module.toTarget, prefix, true)
 19 |       })
 20 |     }
 21 |   }
 22 | }
 23 | 
 24 | /** Emit Verilog from Chisel module
 25 |   */
 26 | trait ChiselEmitVerilog extends App {
 27 |   def emit(genModule: () => RawModule, name: String) = {
 28 |     ChiselStage.emitSystemVerilogFile(
 29 |       genModule(),
 30 |       Array(),
 31 |       Array("-o", s"${name}.sv")
 32 |     )
 33 |   }
 34 | }
 35 | 
 36 | /** Helper to generate Chisel modules
 37 |   */
 38 | trait EmitChiselModule extends ChiselEmitVerilog {
 39 |   def emitChisel(
 40 |       genModule: (FloatType, Int, Int, String) => RawModule,
 41 |       name: String,
 42 |       library: String,
 43 |       allStages: Seq[Int] = Seq(1, 2, 3),
 44 |       floatTypes: Seq[FloatType] = Seq(FloatH, FloatS, FloatD),
 45 |       lanes: Seq[Int] = Seq(1, 2, 4)
 46 |   ) = {
 47 |     for (floatType <- floatTypes) {
 48 |       val floatName = floatType.kind().toString()
 49 |       for (lanes <- lanes) {
 50 |         for (stages <- allStages) {
 51 |           val moduleName = s"${name}_${floatName}${lanes}l${stages}s_${library}"
 52 |           val prefix = s"${moduleName}_"
 53 |           emit(
 54 |             () => genModule(floatType, lanes, stages, prefix),
 55 |             moduleName
 56 |           )
 57 |         }
 58 |       }
 59 |     }
 60 |   }
 61 | }
 62 | 
 63 | /** Generate Verilog from SpinalHDL module
 64 |   */
 65 | trait SpinalEmitVerilog extends App {
 66 |   def work[T <: spinal.core.Component](
 67 |       gen: => T,
 68 |       netlistName: String = null
 69 |   ): Unit = {
 70 |     // verilog
 71 |     val verilog = spinal.core.SpinalConfig(
 72 |       netlistFileName = netlistName match {
 73 |         case null => null
 74 |         case s    => s"$s.v"
 75 |       }
 76 |     )
 77 |     verilog.generateVerilog(gen)
 78 |   }
 79 | }
 80 | 
 81 | /** Helper to generate SpinalHDL modules
 82 |   */
 83 | trait EmitSpinalModule extends SpinalEmitVerilog {
 84 |   def emitFlopoco[T <: spinal.core.Component](
 85 |       stages: Int,
 86 |       genModule: (FloatType, Int, Int) => T,
 87 |       name: String
 88 |   ) = {
 89 |     for (kind <- Seq(FloatH, FloatS, FloatD)) {
 90 |       val floatName = kind.kind().toString()
 91 |       for (lanes <- Seq(1, 2, 4, 8)) {
 92 |         work(
 93 |           genModule(kind, lanes, stages),
 94 |           s"${name}_${floatName}${lanes}l${stages}s"
 95 |         )
 96 |       }
 97 |     }
 98 |   }
 99 | }
100 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/float.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper
 2 | 
 3 | import _root_.hardfloat.fNFromRecFN
 4 | import _root_.hardfloat.recFNFromFN
 5 | import chisel3._
 6 | 
 7 | /** Trait for floating point type
 8 |   */
 9 | trait FloatType {
10 |   // must implement
11 |   // exp bits
12 |   def exp(): Int
13 |   // (total - exp) bits
14 |   def sig(): Int
15 |   def kind(): FpKind.FpKind
16 | 
17 |   // auto implemented
18 |   // total bits
19 |   def width(): Int = exp() + sig()
20 |   // HF width in bits
21 |   def widthHardfloat(): Int = width() + 1
22 |   // FloPoCo width in bits
23 |   def widthFlopoco(): Int = width() + 2
24 | 
25 |   // conversion to hardfloat internal representation
26 |   def toHardfloat(n: UInt) = recFNFromFN(exp(), sig(), n)
27 |   def fromHardfloat(n: UInt) = fNFromRecFN(exp(), sig(), n)
28 |   // extract one element from packed
29 |   def extract(data: UInt, offset: Int) =
30 |     data((offset + 1) * width() - 1, offset * width())
31 |   def extractHardfloat(data: UInt, offset: Int) =
32 |     data((offset + 1) * widthHardfloat() - 1, offset * widthHardfloat())
33 |   // generate the representation of 1.0
34 |   def oneBigInt() = (((BigInt(1) << (exp() - 1)) - 1) << (sig() - 1))
35 |   // chisel
36 |   def oneChisel() =
37 |     (((BigInt(1) << (exp() - 1)) - 1) << (sig() - 1)).U(width().W)
38 |   def oneHardfloatChisel() =
39 |     (BigInt(1) << (exp() + sig() - 1)).U(widthHardfloat().W)
40 | }
41 | 
42 | /** Enum of floating point types
43 |   */
44 | object FpKind extends Enumeration {
45 |   type FpKind = Value
46 |   // Double, Single, Half precision
47 |   val D, S, H = Value
48 | }
49 | 
50 | /** 64-bit Double
51 |   */
52 | object FloatD extends FloatType {
53 |   def exp() = 11
54 |   def sig() = 53
55 |   def kind() = FpKind.D
56 | }
57 | 
58 | /** 32-bit Float
59 |   */
60 | object FloatS extends FloatType {
61 |   def exp() = 8
62 |   def sig() = 24
63 |   def kind() = FpKind.S
64 | }
65 | 
66 | /** 16-bit Half Float
67 |   */
68 | object FloatH extends FloatType {
69 |   def exp() = 5
70 |   def sig() = 11
71 |   def kind() = FpKind.H
72 | }
73 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/flopoco/FPCFExp.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import fpuwrapper.EmitSpinalModule
 4 | import fpuwrapper.FloatType
 5 | import fpuwrapper.Resource
 6 | import spinal.core._
 7 | import spinal.lib._
 8 | 
 9 | class FPCFExpRequest(val floatType: FloatType, val lanes: Int) extends Bundle {
10 |   val a = Vec(UInt(floatType.widthFlopoco() bits), lanes)
11 | }
12 | 
13 | class FPCFExpResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
14 |   // result
15 |   val res = Vec(UInt(floatType.widthFlopoco() bits), lanes)
16 | }
17 | 
18 | class FPCFExp(floatType: FloatType, lanes: Int, stages: Int) extends Component {
19 |   val io = new Bundle {
20 |     val req = slave(Flow(new FPCFExpRequest(floatType, lanes)))
21 |     val resp = master(Flow(new FPCFExpResponse(floatType, lanes)))
22 |   }
23 | 
24 |   for (i <- 0 until lanes) {
25 |     val fma = new FPCFExpBlackBox(floatType, stages)
26 |     fma.X := io.req.a(i).asBits
27 |     io.resp.res(i) := fma.R.asUInt
28 |   }
29 | 
30 |   io.resp.valid := Delay(io.req.valid, stages)
31 | }
32 | 
33 | class FPCFExpBlackBox(floatType: FloatType, stages: Int) extends BlackBox {
34 |   val clk = in(Bool())
35 |   val X = in(Bits(floatType.widthFlopoco() bits))
36 |   val R = out(Bits(floatType.widthFlopoco() bits))
37 | 
38 |   setDefinitionName(s"FPCFExp_${floatType.kind().toString()}")
39 | 
40 |   // Map the clk
41 |   mapCurrentClockDomain(
42 |     clock = clk
43 |   )
44 | 
45 |   val fileName = s"FPCFExp_${floatType.kind().toString()}${stages}s.v"
46 |   assert(
47 |     getClass().getResource(s"/flopoco/${fileName}") != null,
48 |     s"file ${fileName} not found"
49 |   )
50 |   addRTLPath(Resource.path(s"/flopoco/${fileName}"))
51 | }
52 | 
53 | object FPCFExp extends EmitSpinalModule {
54 |   emitFlopoco(
55 |     3,
56 |     (floatType, lanes, stages) => new FPCFExp(floatType, lanes, stages),
57 |     "FlopocoFPCFExp"
58 |   )
59 | }
60 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/flopoco/FPCToIEEE.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import fpuwrapper._
 4 | import spinal.core._
 5 | import spinal.lib._
 6 | 
 7 | class FPCToIEEEInner(floatType: FloatType) extends Component {
 8 |   val io = new Bundle {
 9 |     val req = in(UInt(floatType.widthFlopoco() bits))
10 |     val resp = out(UInt(floatType.width() bits))
11 |   }
12 | 
13 |   val fpc = io.req
14 |   val fracX = fpc(floatType.sig() - 2 downto 0)
15 |   val expX = fpc(floatType.width() - 2 downto floatType.sig() - 1)
16 |   val sX = Bool()
17 |   val exnX = fpc(floatType.width() + 1 downto floatType.width())
18 |   when(exnX === 1 || exnX === 2 || exnX === 0) {
19 |     sX := fpc(floatType.width() - 1)
20 |   } otherwise {
21 |     sX := False
22 |   }
23 | 
24 |   val expZero = expX === 0
25 | 
26 |   val ieee = UInt(floatType.width() bits)
27 |   io.resp := ieee
28 | 
29 |   val fracR = UInt(floatType.sig() - 1 bits)
30 |   val expR = UInt(floatType.exp() bits)
31 |   val sR = Bool()
32 |   ieee := Cat(sR, expR, fracR).asUInt
33 | 
34 |   switch(exnX) {
35 |     is(0) {
36 |       // zero
37 |       fracR := 0
38 |       expR := 0
39 |       sR := sX
40 |     }
41 |     is(1) {
42 |       // normal
43 |       when(expZero) {
44 |         fracR := Cat(True, fracX(floatType.sig() - 2 downto 1)).asUInt
45 |       }.otherwise {
46 |         fracR := fracX
47 |       }
48 |       expR := expX
49 |       sR := sX
50 |     }
51 |     is(2) {
52 |       // inf
53 |       fracR := exnX(0).asUInt.resized
54 |       expR.setAllTo(True)
55 |       sR := sX
56 |     }
57 |     default {
58 |       // nan
59 |       fracR := exnX(0).asUInt.resized
60 |       expR.setAllTo(True)
61 |       sR := False
62 |     }
63 |   }
64 | }
65 | 
66 | /** Implementation of OutputIEEE operator of Flopoco
67 |   *
68 |   * @param floatType
69 |   * @param lanes
70 |   */
71 | class FPCToIEEE(floatType: FloatType, lanes: Int, stages: Int)
72 |     extends Component {
73 |   val io = new Bundle {
74 |     val req = slave(Flow(Vec(UInt(floatType.widthFlopoco() bits), lanes)))
75 |     val resp = master(Flow(Vec(UInt(floatType.width() bits), lanes)))
76 |   }
77 | 
78 |   io.resp.valid := Delay(io.req.valid, stages)
79 | 
80 |   for (i <- 0 until lanes) {
81 |     val inner = new FPCToIEEEInner(floatType)
82 |     inner.io.req := io.req.payload(i)
83 |     io.resp.payload(i) := Delay(inner.io.resp, stages)
84 |   }
85 | }
86 | 
87 | object FPCToIEEE extends EmitSpinalModule {
88 |   emitFlopoco(
89 |     0,
90 |     (floatType, lanes, stages) => new FPCToIEEE(floatType, lanes, stages),
91 |     "FlopocoFPCToIEEE"
92 |   )
93 | }
94 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/flopoco/IEEEFExp.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import fpuwrapper.EmitSpinalModule
 4 | import fpuwrapper.FloatS
 5 | import fpuwrapper.FloatType
 6 | import fpuwrapper.SpinalEmitVerilog
 7 | import fpuwrapper.Synthesis
 8 | import spinal.core._
 9 | import spinal.lib._
10 | 
11 | class IEEEFExpRequest(val floatType: FloatType, val lanes: Int) extends Bundle {
12 |   val a = Vec(UInt(floatType.width() bits), lanes)
13 | }
14 | 
15 | class IEEEFExpResponse(val floatType: FloatType, val lanes: Int)
16 |     extends Bundle {
17 |   // result
18 |   val res = Vec(UInt(floatType.width() bits), lanes)
19 | }
20 | 
21 | class IEEEFExp(floatType: FloatType, lanes: Int, stages: Int)
22 |     extends Component {
23 |   val io = new Bundle {
24 |     val req = slave(Flow(new IEEEFExpRequest(floatType, lanes)))
25 |     val resp = master(Flow(new IEEEFExpResponse(floatType, lanes)))
26 |   }
27 | 
28 |   for (i <- 0 until lanes) {
29 |     val ieee2fpc = new IEEEToFPCInner(floatType)
30 |     ieee2fpc.io.req := io.req.a(i)
31 | 
32 |     val fma = new FPCFExpBlackBox(floatType, stages)
33 |     fma.X := ieee2fpc.io.resp.asBits
34 | 
35 |     val fpc2ieee = new FPCToIEEEInner(floatType)
36 |     fpc2ieee.io.req := fma.R.asUInt
37 |     io.resp.res(i) := fpc2ieee.io.resp
38 |   }
39 | 
40 |   io.resp.valid := Delay(io.req.valid, stages)
41 | }
42 | 
43 | object IEEEFExp extends EmitSpinalModule {
44 |   emitFlopoco(
45 |     3,
46 |     (floatType, lanes, stages) => new IEEEFExp(floatType, lanes, stages),
47 |     "FlopocoIEEEFExp"
48 |   )
49 | }
50 | 
51 | object IEEEFExpSynth extends SpinalEmitVerilog {
52 |   for (floatType <- Seq(FloatS)) {
53 |     val floatName = floatType.kind().toString()
54 |     for (stages <- Seq(3)) {
55 |       val lanes = 1
56 |       val name = s"IEEEFExp_${floatName}${lanes}l${stages}s"
57 |       work(
58 |         new IEEEFExp(floatType, lanes, stages),
59 |         name
60 |       )
61 | 
62 |       val fileName = s"FPCFExp_${floatName}${stages}s.v"
63 |       Synthesis.build(
64 |         Seq(
65 |           s"${name}.v",
66 |           s"./fpu-wrappers/resources/flopoco/${fileName}"
67 |         ),
68 |         s"IEEEFExp",
69 |         s"${name}_flopoco"
70 |       )
71 |     }
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/flopoco/IEEEFMA.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.flopoco
  2 | 
  3 | import fpuwrapper._
  4 | import spinal.core._
  5 | import spinal.lib._
  6 | 
  7 | object IEEEFMAOp extends SpinalEnum {
  8 |   // 1 * op[1] + op[2]
  9 |   val FADD = newElement()
 10 |   // 1 * op[1] - op[2]
 11 |   val FSUB = newElement()
 12 |   // op[0] * op[1] + 0
 13 |   val FMUL = newElement()
 14 |   // op[0] * op[1] + op[2]
 15 |   val FMADD = newElement()
 16 |   // op[0] * op[1] - op[2]
 17 |   val FMSUB = newElement()
 18 |   // -(op[0] * op[1] - op[2])
 19 |   val FNMSUB = newElement()
 20 |   // -(op[0] * op[1] + op[2])
 21 |   val FNMADD = newElement()
 22 | 
 23 |   val NOP = FADD
 24 | }
 25 | 
 26 | class IEEEFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle {
 27 |   val op = IEEEFMAOp()
 28 |   val operands = Vec(Vec(UInt(floatType.width() bits), lanes), 3)
 29 | }
 30 | 
 31 | class IEEEFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
 32 |   // result
 33 |   val res = Vec(UInt(floatType.width() bits), lanes)
 34 | }
 35 | 
 36 | class IEEEFMA(floatType: FloatType, lanes: Int, stages: Int) extends Component {
 37 |   val io = new Bundle {
 38 |     val req = slave(Flow(new IEEEFMARequest(floatType, lanes)))
 39 |     val resp = master(Flow(new IEEEFMAResponse(floatType, lanes)))
 40 |   }
 41 | 
 42 |   val negateAB = False
 43 |   val negateC = False
 44 |   val op1 = Vec(UInt(floatType.width() bits), lanes)
 45 |   val op2 = Vec(UInt(floatType.width() bits), lanes)
 46 |   val op3 = Vec(UInt(floatType.width() bits), lanes)
 47 |   op1 := io.req.operands(0)
 48 |   op2 := io.req.operands(1)
 49 |   op3 := io.req.operands(2)
 50 | 
 51 |   val one = Vec(UInt(floatType.width() bits), lanes)
 52 |   val zero = Vec(UInt(floatType.width() bits), lanes)
 53 |   for (i <- 0 until lanes) {
 54 |     one(i) := floatType.oneBigInt()
 55 |     zero(i) := 0
 56 |   }
 57 | 
 58 |   switch(io.req.op) {
 59 |     is(IEEEFMAOp.FADD) {
 60 |       op1 := one
 61 |     }
 62 |     is(IEEEFMAOp.FSUB) {
 63 |       op1 := one
 64 |       negateC := True
 65 |     }
 66 |     is(IEEEFMAOp.FMUL) {
 67 |       op3 := zero
 68 |     }
 69 |     is(IEEEFMAOp.FMADD) {
 70 |       // do nothing
 71 |     }
 72 |     is(IEEEFMAOp.FMSUB) {
 73 |       negateC := True
 74 |     }
 75 |     is(IEEEFMAOp.FNMSUB) {
 76 |       negateAB := True
 77 |     }
 78 |     is(IEEEFMAOp.FNMADD) {
 79 |       negateAB := True
 80 |       negateC := True
 81 |     }
 82 |   }
 83 | 
 84 |   for (i <- 0 until lanes) {
 85 |     val fma = new IEEEFMABlackBox(floatType, stages)
 86 |     fma.A := op1(i).asBits
 87 |     fma.B := op2(i).asBits
 88 |     fma.C := op3(i).asBits
 89 |     fma.negateAB := negateAB
 90 |     fma.negateC := negateC
 91 |     fma.RndMode := 0
 92 |     io.resp.res(i) := fma.R.asUInt
 93 |   }
 94 | 
 95 |   io.resp.valid := Delay(io.req.valid, stages)
 96 | }
 97 | 
 98 | class IEEEFMABlackBox(floatType: FloatType, stages: Int) extends BlackBox {
 99 |   val clk = in(Bool())
100 |   val A = in(Bits(floatType.width() bits))
101 |   val B = in(Bits(floatType.width() bits))
102 |   val C = in(Bits(floatType.width() bits))
103 |   val negateAB = in(Bool())
104 |   val negateC = in(Bool())
105 |   val RndMode = in(Bits(2 bits))
106 |   val R = out(Bits(floatType.width() bits))
107 | 
108 |   setDefinitionName(s"IEEEFMA_${floatType.kind().toString()}")
109 | 
110 |   // Map the clk
111 |   mapCurrentClockDomain(
112 |     clock = clk
113 |   )
114 | 
115 |   val fileName = s"IEEEFMA_${floatType.kind().toString()}${stages}s.v"
116 |   assert(
117 |     getClass().getResource(s"/flopoco/${fileName}") != null,
118 |     s"file ${fileName} not found"
119 |   )
120 |   addRTLPath(Resource.path(s"/flopoco/${fileName}"))
121 | }
122 | 
123 | object IEEEFMA extends EmitSpinalModule {
124 |   emitFlopoco(
125 |     3,
126 |     (floatType, lanes, stages) => new IEEEFMA(floatType, lanes, stages),
127 |     "FlopocoIEEEFMA"
128 |   )
129 | }
130 | 
131 | object IEEEFMASynth extends SpinalEmitVerilog {
132 |   for (floatType <- Seq(FloatS)) {
133 |     val floatName = floatType.kind().toString()
134 |     for (stages <- Seq(4)) {
135 |       val lanes = 1
136 |       val name = s"IEEEFMA_${floatName}${lanes}l${stages}s"
137 |       work(
138 |         new IEEEFMA(floatType, lanes, stages),
139 |         name
140 |       )
141 | 
142 |       val fileName = s"IEEEFMA_${floatName}${stages}s.v"
143 |       Synthesis.build(
144 |         Seq(
145 |           s"${name}.v",
146 |           s"./fpu-wrappers/resources/flopoco/${fileName}"
147 |         ),
148 |         s"IEEEFMA",
149 |         s"${name}_flopoco"
150 |       )
151 |     }
152 |   }
153 | }
154 | 
155 | object IEEEFMABench extends SpinalEmitVerilog with VivadoBench {
156 |   for (floatType <- Seq(FloatS)) {
157 |     val floatName = floatType.kind().toString()
158 |     for (stages <- Seq(3)) {
159 |       val lanes = 1
160 |       val name = s"IEEEFMA_${floatName}${lanes}l${stages}s"
161 |       work(
162 |         new IEEEFMA(floatType, lanes, stages),
163 |         name
164 |       )
165 | 
166 |       val fileName = s"IEEEFMA_${floatName}${stages}s.v"
167 |       bench(
168 |         s"${name}_flopoco",
169 |         Seq(s"${name}.v", s"./fpu-wrappers/resources/flopoco/${fileName}"),
170 |         s"IEEEFMA"
171 |       )
172 |     }
173 |   }
174 | }
175 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/flopoco/IEEEToFPC.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import fpuwrapper._
 4 | import spinal.core._
 5 | import spinal.lib._
 6 | 
 7 | class IEEEToFPCInner(floatType: FloatType) extends Component {
 8 |   val io = new Bundle {
 9 |     val req = in(UInt(floatType.width() bits))
10 |     val resp = out(UInt(floatType.widthFlopoco() bits))
11 |   }
12 | 
13 |   val ieee = io.req
14 |   val fracX = ieee(floatType.sig() - 2 downto 0)
15 |   val expX = ieee(floatType.width() - 2 downto floatType.sig() - 1)
16 |   val sX = ieee(floatType.width() - 1)
17 | 
18 |   val expZero = expX === 0
19 |   val expInfty = expX.andR
20 |   val fracZero = fracX === 0
21 |   val reprSubNormal = fracX(floatType.sig() - 2)
22 | 
23 |   // representable subnormal numbers
24 |   val sfracX = UInt(floatType.sig() - 1 bits)
25 |   when(expZero && reprSubNormal) {
26 |     sfracX := fracX(floatType.sig() - 3 downto 0) << 1
27 |   } otherwise {
28 |     sfracX := fracX
29 |   }
30 | 
31 |   val fpc = UInt(floatType.widthFlopoco() bits)
32 |   io.resp := fpc
33 | 
34 |   val fracR = UInt(floatType.sig() - 1 bits)
35 |   val expR = UInt(floatType.exp() bits)
36 |   val sR = Bool()
37 |   val exnR = UInt(2 bits)
38 |   fpc := Cat(exnR, sR, expR, fracR).asUInt
39 | 
40 |   sR := sX
41 |   expR := expX
42 |   fracR := sfracX
43 | 
44 |   when(expInfty) {
45 |     when(fracZero) {
46 |       // inf
47 |       exnR := 2
48 |     } otherwise {
49 |       // nan
50 |       exnR := 3
51 |     }
52 |   } elsewhen (expZero && !reprSubNormal) {
53 |     // zero
54 |     exnR := 0
55 |   } otherwise {
56 |     // normal and representable subnormal
57 |     exnR := 1
58 |   }
59 | }
60 | 
61 | /** Conversion from IEEE for flopoco format
62 |   *
63 |   * @param floatType
64 |   * @param lanes
65 |   */
66 | class IEEEToFPC(floatType: FloatType, lanes: Int, stages: Int)
67 |     extends Component {
68 |   val io = new Bundle {
69 |     val req = slave(Flow(Vec(UInt(floatType.width() bits), lanes)))
70 |     val resp = master(Flow(Vec(UInt(floatType.widthFlopoco() bits), lanes)))
71 |   }
72 | 
73 |   io.resp.valid := Delay(io.req.valid, stages)
74 | 
75 |   for (i <- 0 until lanes) {
76 |     val inner = new IEEEToFPCInner(floatType)
77 |     inner.io.req := io.req.payload(i)
78 |     io.resp.payload(i) := Delay(inner.io.resp, stages)
79 |   }
80 | }
81 | 
82 | object IEEEToFPC extends EmitSpinalModule {
83 |   emitFlopoco(
84 |     0,
85 |     (floatType, lanes, stages) => new IEEEToFPC(floatType, lanes, stages),
86 |     "FlopocoIEEEToFPC"
87 |   )
88 | }
89 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/formal/HFRoundtrip.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.formal
 2 | 
 3 | import circt.stage.ChiselStage
 4 | import chisel3._
 5 | import fpuwrapper.FloatS
 6 | import fpuwrapper.FloatType
 7 | import fpuwrapper.hardfloat.HFToIEEE
 8 | import fpuwrapper.hardfloat.IEEEToHF
 9 | 
10 | import scala.sys.process._
11 | 
12 | class HFRoundtrip(floatType: FloatType) extends Module {
13 |   val io = IO(new Bundle {
14 |     val req = Input(UInt(floatType.width().W))
15 |   })
16 | 
17 |   val ieee2hf = Module(
18 |     new IEEEToHF(floatType, 1, 0)
19 |   )
20 |   ieee2hf.io.float.valid := true.B
21 |   ieee2hf.io.float.bits(0) := io.req
22 | 
23 |   val hf2ieee = Module(
24 |     new HFToIEEE(floatType, 1, 0)
25 |   )
26 |   hf2ieee.io.hardfloat.valid := true.B
27 |   hf2ieee.io.hardfloat.bits(0) := ieee2hf.io.hardfloat.bits(0)
28 | 
29 |   chisel3.assert(hf2ieee.io.float.bits(0) === io.req)
30 | }
31 | 
32 | object HFRoundtrip extends App {
33 |   ChiselStage.emitSystemVerilog(
34 |     new HFRoundtrip(FloatS),
35 |     Array("-o", "HFRoundtrip")
36 |   )
37 |   Seq(
38 |     "yosys",
39 |     "-v2",
40 |     "-p",
41 |     "read_verilog -formal HFRoundtrip.sv",
42 |     "-p",
43 |     "prep",
44 |     "-p",
45 |     "write_smt2 -wires HFRoundtrip.smt2"
46 |   ).!
47 |   Seq(
48 |     "yosys-smtbmc",
49 |     "-s",
50 |     "z3",
51 |     "-t",
52 |     "30",
53 |     "--dump-vcd",
54 |     "test.vcd",
55 |     "-m",
56 |     "HFRoundtrip",
57 |     "HFRoundtrip.smt2"
58 |   ).!
59 | }
60 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/formal/IEEEFMAFormal.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.formal
 2 | 
 3 | import circt.stage.ChiselStage
 4 | import chisel3._
 5 | import chisel3.util._
 6 | import fpuwrapper.ChiselEmitVerilog
 7 | import fpuwrapper.FloatH
 8 | import fpuwrapper.FloatType
 9 | 
10 | class FMARequest(val floatType: FloatType, val lanes: Int) extends Bundle {
11 |   val a = Vec(lanes, UInt(floatType.width().W))
12 |   val b = Vec(lanes, UInt(floatType.width().W))
13 |   val c = Vec(lanes, UInt(floatType.width().W))
14 | }
15 | 
16 | class FMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
17 |   // result
18 |   val res = Vec(lanes, UInt(floatType.width().W))
19 |   // exception status
20 |   val exc = Vec(lanes, Bits(5.W))
21 | }
22 | 
23 | class IEEEFMAFormal(floatType: FloatType, lanes: Int, stages: Int)
24 |     extends Module {
25 |   val io = IO(new Bundle {
26 |     val req = Flipped(Valid(new FMARequest(floatType, lanes)))
27 |   })
28 | 
29 |   val zeros = WireInit(VecInit.fill(lanes)(0.U(floatType.width().W)))
30 | 
31 |   val hardfloat = Module(
32 |     new fpuwrapper.hardfloat.IEEEFMA(floatType, lanes, stages)
33 |   )
34 |   hardfloat.io.req.valid := io.req.valid
35 |   hardfloat.io.req.bits.op := fpuwrapper.hardfloat.FMAOp.FMADD
36 |   hardfloat.io.req.bits.operands(0) := zeros
37 |   hardfloat.io.req.bits.operands(1) := io.req.bits.b
38 |   hardfloat.io.req.bits.operands(2) := io.req.bits.c
39 | 
40 |   val fudian = Module(
41 |     new fpuwrapper.fudian.IEEEFMA(floatType, lanes, stages)
42 |   )
43 |   fudian.io.req.valid := io.req.valid
44 |   fudian.io.req.bits.operands(0) := zeros
45 |   fudian.io.req.bits.operands(1) := io.req.bits.b
46 |   fudian.io.req.bits.operands(2) := io.req.bits.c
47 | 
48 |   chisel3.assert(
49 |     hardfloat.io.resp.valid === fudian.io.resp.valid
50 |   )
51 |   when(hardfloat.io.resp.valid) {
52 |     for (i <- 0 until lanes) {
53 |       chisel3.assert(
54 |         hardfloat.io.resp.bits.res(i) === fudian.io.resp.bits.res(i)
55 |       )
56 |     }
57 |   }
58 | }
59 | 
60 | object IEEEFMAFormal extends App {
61 |   ChiselStage.emitSystemVerilogFile(
62 |     new IEEEFMAFormal(FloatH, 1, 1)
63 |   )
64 | }
65 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/fpnew/FPNewBlackbox.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.fpnew
 2 | 
 3 | import chisel3._
 4 | import chisel3.util.HasBlackBoxResource
 5 | import fpuwrapper.FloatType
 6 | 
 7 | class FPNewBlackbox(
 8 |     floatType: FloatType,
 9 |     lanes: Int,
10 |     stages: Int,
11 |     tagWidth: Int
12 | ) extends BlackBox(
13 |       Map()
14 |     )
15 |     with HasBlackBoxResource {
16 |   val fLen = floatType.width() * lanes
17 |   val io = IO(new Bundle {
18 |     val clk_i = Input(Clock())
19 |     val rst_ni = Input(Bool())
20 |     val operands_i = Input(UInt((fLen * 3).W))
21 |     val rnd_mode_i = Input(UInt(3.W))
22 |     val op_i = Input(UInt(4.W))
23 |     val op_mod_i = Input(Bool())
24 |     val src_fmt_i = Input(UInt(3.W))
25 |     val dst_fmt_i = Input(UInt(3.W))
26 |     val int_fmt_i = Input(UInt(2.W))
27 |     val vectorial_op_i = Input(Bool())
28 |     val tag_i = Input(UInt(tagWidth.W))
29 |     val in_valid_i = Input(Bool())
30 |     val in_ready_o = Output(Bool())
31 |     val flush_i = Input(Bool())
32 |     val result_o = Output(UInt(fLen.W))
33 |     val status_o = Output(UInt(5.W))
34 |     val tag_o = Output(UInt(tagWidth.W))
35 |     val out_valid_o = Output(Bool())
36 |     val out_ready_i = Input(Bool())
37 |     val busy_o = Output(Bool())
38 |   }).suggestName("io")
39 | 
40 |   addResource(
41 |     s"/fpnew/FPNewBlackbox_${floatType.kind().toString()}${lanes}l${stages}s.synth.v"
42 |   )
43 | }
44 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/fpnew/IEEEFPU.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.fpnew
  2 | 
  3 | import chisel3._
  4 | import chisel3.util.Decoupled
  5 | import fpuwrapper.EmitChiselModule
  6 | import fpuwrapper.FloatType
  7 | import fpuwrapper.FloatS
  8 | import fpuwrapper.FloatD
  9 | import fpuwrapper.Synthesis
 10 | 
 11 | class FPConfig()
 12 | 
 13 | object FPFloatFormat extends ChiselEnum {
 14 |   val Fp32, Fp64, Fp16, Fp8, Fp16Alt = Value
 15 | }
 16 | 
 17 | object FPIntFormat extends ChiselEnum {
 18 |   val Int8, Int16, Int32, Int64 = Value
 19 | }
 20 | 
 21 | object FPOperation extends ChiselEnum {
 22 |   val FMADD, FNMSUB, ADD, MUL, DIV, SQRT, SGNJ, MINMAX, CMP, CLASSIFY, F2F, F2I,
 23 |       I2F, CPKAB, CPKCD = Value
 24 | }
 25 | 
 26 | object FPRoundingMode extends ChiselEnum {
 27 |   val RNE, RTZ, RDN, RUP, RMM, DYN = Value
 28 | }
 29 | 
 30 | class FPRequest(val fLen: Int) extends Bundle {
 31 |   val operands = Vec(3, UInt(fLen.W))
 32 |   val roundingMode = FPRoundingMode()
 33 |   val op = FPOperation()
 34 |   val opModifier = Bool()
 35 |   val srcFormat = FPFloatFormat()
 36 |   val dstFormat = FPFloatFormat()
 37 |   val intFormat = FPIntFormat()
 38 | }
 39 | 
 40 | class FPStatus extends Bundle {
 41 |   val NV = Bool() // Invalid
 42 |   val DZ = Bool() // Divide by zero
 43 |   val OF = Bool() // Overflow
 44 |   val UF = Bool() // Underflow
 45 |   val NX = Bool() // Inexact
 46 | }
 47 | 
 48 | class FPResponse(val fLen: Int) extends Bundle {
 49 |   val result = UInt(fLen.W)
 50 |   val status = new FPStatus()
 51 | }
 52 | 
 53 | /** FPNew IO port. For meanings of ports, visit
 54 |   * https://github.com/pulp-platform/fpnew/blob/develop/docs/README.md
 55 |   */
 56 | class FPIO(val fLen: Int) extends Bundle {
 57 |   val req = Flipped(Decoupled(new FPRequest(fLen)))
 58 |   val resp = Decoupled(new FPResponse(fLen))
 59 |   val flush = Input(Bool())
 60 |   val busy = Output(Bool())
 61 | }
 62 | 
 63 | class IEEEFPU(
 64 |     val floatType: FloatType,
 65 |     val lanes: Int,
 66 |     val stages: Int
 67 | ) extends Module {
 68 | 
 69 |   val fLen = floatType.width() * lanes
 70 |   val io = IO(new FPIO(fLen))
 71 | 
 72 |   val blackbox = Module(
 73 |     new FPNewBlackbox(
 74 |       floatType,
 75 |       lanes,
 76 |       stages,
 77 |       tagWidth = 0
 78 |     )
 79 |   )
 80 | 
 81 |   // clock & reset
 82 |   blackbox.io.clk_i := clock
 83 |   blackbox.io.rst_ni := ~reset.asBool
 84 | 
 85 |   // request
 86 |   blackbox.io.operands_i := io.req.bits.operands.asUInt
 87 |   blackbox.io.rnd_mode_i := io.req.bits.roundingMode.asUInt
 88 |   blackbox.io.op_i := io.req.bits.op.asUInt
 89 |   blackbox.io.op_mod_i := io.req.bits.opModifier
 90 |   blackbox.io.src_fmt_i := io.req.bits.srcFormat.asUInt
 91 |   blackbox.io.dst_fmt_i := io.req.bits.dstFormat.asUInt
 92 |   blackbox.io.int_fmt_i := io.req.bits.intFormat.asUInt
 93 |   blackbox.io.vectorial_op_i := 1.B
 94 |   blackbox.io.tag_i := 0.B
 95 |   blackbox.io.in_valid_i := io.req.valid
 96 |   io.req.ready := blackbox.io.in_ready_o
 97 | 
 98 |   // response
 99 |   io.resp.bits.result := blackbox.io.result_o
100 |   io.resp.bits.status := blackbox.io.status_o.asTypeOf(io.resp.bits.status)
101 |   io.resp.valid := blackbox.io.out_valid_o
102 |   blackbox.io.out_ready_i := io.resp.ready
103 | 
104 |   // flush & flush
105 |   blackbox.io.flush_i := io.flush
106 |   io.busy := blackbox.io.busy_o
107 | }
108 | 
109 | object IEEEFPU extends EmitChiselModule {
110 |   emitChisel(
111 |     (floatType, lanes, stages, _) => new IEEEFPU(floatType, lanes, stages),
112 |     "IEEEFPU",
113 |     "fpnew"
114 |   )
115 | }
116 | 
117 | object IEEEFPUSynth extends EmitChiselModule {
118 |   for (floatType <- Seq(FloatS, FloatD)) {
119 |     val floatName = floatType.kind().toString()
120 |     for (stages <- Seq(2, 3)) {
121 |       for (lanes <- Seq(1)) {
122 |         emitChisel(
123 |           (
124 |               floatType,
125 |               lanes,
126 |               stages,
127 |               _
128 |           ) => new IEEEFPU(floatType, lanes, stages),
129 |           "IEEEFPU",
130 |           "fpnew",
131 |           allStages = Seq(stages),
132 |           floatTypes = Seq(floatType),
133 |           lanes = Seq(lanes)
134 |         )
135 |         val name = s"IEEEFPU_${floatName}${lanes}l${stages}s_fpnew"
136 | 
137 |         val fileName =
138 |           s"FPNewBlackbox_${floatType.kind().toString()}${lanes}l${stages}s.synth.v"
139 |         Synthesis.build(
140 |           Seq(s"${name}.v", s"./fpu-wrappers/resources/fpnew/${fileName}"),
141 |           s"${name}_IEEEFPU",
142 |           s"${name}"
143 |         )
144 |       }
145 |     }
146 |   }
147 | }
148 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/fudian/IEEEFAdd.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.fudian
  2 | 
  3 | import chisel3._
  4 | import chisel3.util._
  5 | import fpuwrapper.EmitChiselModule
  6 | import fpuwrapper.FloatD
  7 | import fpuwrapper.FloatType
  8 | import fpuwrapper.Synthesis
  9 | 
 10 | class IEEEFAddRequest(val floatType: FloatType, val lanes: Int) extends Bundle {
 11 |   val a = Vec(lanes, UInt(floatType.width().W))
 12 |   val b = Vec(lanes, UInt(floatType.width().W))
 13 | }
 14 | 
 15 | class IEEEFAddResponse(val floatType: FloatType, val lanes: Int)
 16 |     extends Bundle {
 17 |   // result
 18 |   val res = Vec(lanes, UInt(floatType.width().W))
 19 |   // exception status
 20 |   val exc = Vec(lanes, Bits(5.W))
 21 | }
 22 | 
 23 | class IEEEFAdd(floatType: FloatType, lanes: Int, stages: Int) extends Module {
 24 |   val io = IO(new Bundle {
 25 |     val req = Flipped(Valid(new IEEEFAddRequest(floatType, lanes)))
 26 |     val resp = Valid(new IEEEFAddResponse(floatType, lanes))
 27 |   })
 28 | 
 29 |   val inputStages = stages / 2
 30 |   val outputStages = stages - inputStages
 31 | 
 32 |   val reqValid = io.req.valid
 33 |   val results = for (i <- 0 until lanes) yield {
 34 |     val fma = Module(
 35 |       new fudian.FADD(
 36 |         floatType.exp(),
 37 |         floatType.sig()
 38 |       )
 39 |     )
 40 |     fma.suggestName(s"fadd_${floatType.kind()}_${i}")
 41 |     fma.io.a := Pipe(
 42 |       reqValid,
 43 |       io.req.bits.a(i),
 44 |       inputStages
 45 |     ).bits
 46 |     fma.io.b := Pipe(
 47 |       reqValid,
 48 |       io.req.bits.b(i),
 49 |       inputStages
 50 |     ).bits
 51 | 
 52 |     // TODO
 53 |     fma.io.rm := 0.U
 54 | 
 55 |     val res =
 56 |       Pipe(true.B, fma.io.result, outputStages).bits
 57 |     val exc = Pipe(true.B, fma.io.fflags, outputStages).bits
 58 |     (res, exc)
 59 |   }
 60 | 
 61 |   // collect result
 62 |   val res = results.map(_._1)
 63 |   // exception flags
 64 |   val exc = results.map(_._2)
 65 | 
 66 |   val resValid = ShiftRegister(reqValid, stages)
 67 | 
 68 |   io.resp.valid := resValid
 69 |   io.resp.bits.res := res
 70 |   io.resp.bits.exc := exc
 71 | }
 72 | 
 73 | object IEEEFAdd extends EmitChiselModule {
 74 |   emitChisel(
 75 |     (floatType, lanes, stages, _) => new IEEEFAdd(floatType, lanes, stages),
 76 |     "IEEEFAdd",
 77 |     "fudian"
 78 |   )
 79 | }
 80 | 
 81 | object IEEEFAddSynth extends EmitChiselModule {
 82 |   for (floatType <- Seq(FloatD)) {
 83 |     val floatName = floatType.kind().toString()
 84 |     for (stages <- Seq(4)) {
 85 |       emitChisel(
 86 |         (floatType, lanes, stages, _) => new IEEEFAdd(floatType, lanes, stages),
 87 |         "IEEEFAdd",
 88 |         "fudian",
 89 |         allStages = Seq(stages),
 90 |         floatTypes = Seq(floatType),
 91 |         lanes = Seq(1)
 92 |       )
 93 |       val name = s"Fudian_IEEEFAdd_${floatName}1l${stages}s"
 94 |       Synthesis.build(
 95 |         Seq(s"${name}.v"),
 96 |         s"${name}_IEEEFAdd",
 97 |         s"fudian_${name}"
 98 |       )
 99 |     }
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/fudian/IEEEFDivSqrt.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.fudian
 2 | 
 3 | import chisel3._
 4 | import chisel3.util._
 5 | import fpuwrapper.FloatType
 6 | 
 7 | object IEEEFDivSqrtOp extends ChiselEnum {
 8 |   val DIV = Value
 9 |   val SQRT = Value
10 | 
11 |   val NOP = DIV
12 | }
13 | 
14 | class IEEEFDivSqrtRequest(val floatType: FloatType, val lanes: Int)
15 |     extends Bundle {
16 |   val op = IEEEFDivSqrtOp()
17 |   val a = Vec(lanes, UInt(floatType.width().W))
18 |   val b = Vec(lanes, UInt(floatType.width().W))
19 | }
20 | 
21 | class IEEEFDivSqrtResponse(val floatType: FloatType, val lanes: Int)
22 |     extends Bundle {
23 |   // result
24 |   val res = Vec(lanes, UInt(floatType.width().W))
25 |   // exception status
26 |   val exc = Vec(lanes, Bits(5.W))
27 | }
28 | 
29 | class IEEEFDivSqrt(val floatType: FloatType, val lanes: Int) extends Module {
30 |   val io = IO(new Bundle {
31 |     val req = Flipped(Decoupled(new IEEEFDivSqrtRequest(floatType, lanes)))
32 |     val resp = Valid(new IEEEFDivSqrtResponse(floatType, lanes))
33 |   })
34 | 
35 |   // replicate small units for higher throughput
36 |   val valid = io.req.valid
37 |   val results = for (i <- 0 until lanes) yield {
38 |     val div_sqrt = Module(
39 |       new fudian.FDIV(
40 |         floatType.exp(),
41 |         floatType.sig()
42 |       )
43 |     )
44 |     div_sqrt.suggestName(s"div_sqrt${floatType.kind()}_${i}")
45 |     div_sqrt.io.a := io.req.bits.a(i)
46 |     div_sqrt.io.b := io.req.bits.b(i)
47 |     div_sqrt.io.specialIO.in_valid := io.req.valid
48 |     div_sqrt.io.specialIO.kill := false.B
49 | 
50 |     // TODO
51 |     div_sqrt.io.rm := 0.U
52 | 
53 |     val result = div_sqrt.io.result
54 |     val exception = Wire(UInt(5.W))
55 |     exception := div_sqrt.io.fflags
56 |     div_sqrt.io.specialIO.isSqrt := io.req.bits.op === IEEEFDivSqrtOp.SQRT
57 | 
58 |     // lanes might not complete in the same cycle
59 |     val resultReg = Reg(UInt(floatType.width().W))
60 |     val exceptionReg = Reg(UInt(5.W))
61 |     val resultValidReg = RegInit(false.B)
62 |     val done = Wire(Bool())
63 |     div_sqrt.io.specialIO.out_ready := true.B
64 |     when(div_sqrt.io.specialIO.out_valid) {
65 |       resultReg := result
66 |       exceptionReg := exception
67 |       resultValidReg := true.B
68 |     }
69 |     when(done) {
70 |       resultValidReg := false.B
71 |     }
72 | 
73 |     (
74 |       resultReg,
75 |       exceptionReg,
76 |       resultValidReg,
77 |       done,
78 |       div_sqrt.io.specialIO.in_ready
79 |     )
80 |   }
81 | 
82 |   io.req.ready := results.map(_._5).reduce(_ & _)
83 | 
84 |   // collect result
85 |   val res = results.map(_._1)
86 |   // exception flags
87 |   val exc = results.map(_._2)
88 | 
89 |   val resValid = results.map(_._3).reduce(_ & _)
90 |   // all done
91 |   for (lane <- results) {
92 |     lane._4 := resValid
93 |   }
94 | 
95 |   io.resp.valid := resValid
96 |   io.resp.bits.res := res
97 |   io.resp.bits.exc := exc
98 | }
99 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/fudian/IEEEFMA.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.fudian
  2 | 
  3 | import chisel3._
  4 | import chisel3.util._
  5 | import fpuwrapper.EmitChiselModule
  6 | import fpuwrapper.FloatH
  7 | import fpuwrapper.FloatS
  8 | import fpuwrapper.FloatD
  9 | import fpuwrapper.AddPrefix
 10 | import fpuwrapper.FloatType
 11 | import fpuwrapper.Synthesis
 12 | 
 13 | class IEEEFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle {
 14 |   val operands = Vec(3, Vec(lanes, UInt(floatType.width().W)))
 15 | }
 16 | 
 17 | class IEEEFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
 18 |   // result
 19 |   val res = Vec(lanes, UInt(floatType.width().W))
 20 |   // exception status
 21 |   val exc = Vec(lanes, Bits(5.W))
 22 | }
 23 | 
 24 | // adapted from fudian.FCMA
 25 | // insert pipeline stages between FMUL and FCMA_ADD
 26 | class FCMAPipe(val expWidth: Int, val precision: Int, val stages: Int)
 27 |     extends Module {
 28 |   val io = IO(new Bundle() {
 29 |     val a, b, c = Input(UInt((expWidth + precision).W))
 30 |     val rm = Input(UInt(3.W))
 31 |     val result = Output(UInt((expWidth + precision).W))
 32 |     val fflags = Output(UInt(5.W))
 33 |   })
 34 | 
 35 |   val fmul = Module(new fudian.FMUL(expWidth, precision))
 36 |   val fadd = Module(new fudian.FCMA_ADD(expWidth, 2 * precision, precision))
 37 | 
 38 |   fmul.io.a := io.a
 39 |   fmul.io.b := io.b
 40 |   fmul.io.rm := io.rm
 41 | 
 42 |   val mul_to_fadd = ShiftRegister(fmul.io.to_fadd, stages)
 43 |   fadd.io.a := ShiftRegister(Cat(io.c, 0.U(precision.W)), stages)
 44 |   fadd.io.b := mul_to_fadd.fp_prod.asUInt
 45 |   fadd.io.b_inter_valid := true.B
 46 |   fadd.io.b_inter_flags := mul_to_fadd.inter_flags
 47 |   fadd.io.rm := ShiftRegister(io.rm, stages)
 48 | 
 49 |   io.result := fadd.io.result
 50 |   io.fflags := fadd.io.fflags
 51 | }
 52 | 
 53 | class IEEEFMA(
 54 |     floatType: FloatType,
 55 |     lanes: Int,
 56 |     stages: Int,
 57 |     prefix: String = ""
 58 | ) extends Module {
 59 |   AddPrefix(this, prefix)
 60 |   val io = IO(new Bundle {
 61 |     val req = Flipped(Valid(new IEEEFMARequest(floatType, lanes)))
 62 |     val resp = Valid(new IEEEFMAResponse(floatType, lanes))
 63 |   })
 64 | 
 65 |   val internalStages = if (stages > 1) 1 else 0
 66 |   val inputStages = (stages - internalStages) / 2
 67 |   val outputStages = stages - internalStages - inputStages
 68 | 
 69 |   val reqValid = io.req.valid
 70 |   val results = for (i <- 0 until lanes) yield {
 71 |     val fma = Module(
 72 |       new fudian.FCMA(
 73 |         floatType.exp(),
 74 |         floatType.sig()
 75 |       )
 76 |     )
 77 |     fma.suggestName(s"fma_${floatType.kind()}_${i}")
 78 |     fma.io.a := Pipe(
 79 |       reqValid,
 80 |       io.req.bits.operands(0)(i),
 81 |       inputStages
 82 |     ).bits
 83 |     fma.io.b := Pipe(
 84 |       reqValid,
 85 |       io.req.bits.operands(1)(i),
 86 |       inputStages
 87 |     ).bits
 88 |     fma.io.c := Pipe(
 89 |       reqValid,
 90 |       io.req.bits.operands(2)(i),
 91 |       inputStages
 92 |     ).bits
 93 | 
 94 |     // TODO
 95 |     fma.io.rm := 0.U
 96 | 
 97 |     val res =
 98 |       Pipe(true.B, fma.io.result, outputStages).bits
 99 |     val exc = Pipe(true.B, fma.io.fflags, outputStages).bits
100 |     (res, exc)
101 |   }
102 | 
103 |   // collect result
104 |   val res = results.map(_._1)
105 |   // exception flags
106 |   val exc = results.map(_._2)
107 | 
108 |   val resValid = ShiftRegister(reqValid, stages)
109 | 
110 |   io.resp.valid := resValid
111 |   io.resp.bits.res := res
112 |   io.resp.bits.exc := exc
113 | }
114 | 
115 | object IEEEFMA extends EmitChiselModule {
116 |   emitChisel(
117 |     (floatType, lanes, stages, _) => new IEEEFMA(floatType, lanes, stages),
118 |     "IEEEFMA",
119 |     "fudian"
120 |   )
121 | }
122 | 
123 | object IEEEFMASynth extends EmitChiselModule {
124 |   for (floatType <- Seq(FloatH, FloatS, FloatD)) {
125 |     val floatName = floatType.kind().toString()
126 |     for (stages <- Seq(2, 3, 4)) {
127 |       emitChisel(
128 |         (floatType, lanes, stages, prefix) =>
129 |           new IEEEFMA(floatType, lanes, stages, prefix),
130 |         "IEEEFMA",
131 |         "fudian",
132 |         allStages = Seq(stages),
133 |         floatTypes = Seq(floatType),
134 |         lanes = Seq(1)
135 |       )
136 |       val name = s"IEEEFMA_${floatName}1l${stages}s_fudian"
137 |       Synthesis.build(
138 |         Seq(s"${name}.sv"),
139 |         s"${name}_IEEEFMA",
140 |         s"${name}"
141 |       )
142 |     }
143 |   }
144 | }
145 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/FMACommon.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.hardfloat
  2 | 
  3 | import chisel3._
  4 | import chisel3.util._
  5 | 
  6 | object FMAOp extends ChiselEnum {
  7 |   // 1 * op[1] + op[2]
  8 |   val FADD = Value
  9 |   // 1 * op[1] - op[2]
 10 |   val FSUB = Value
 11 |   // op[0] * op[1] + 0
 12 |   val FMUL = Value
 13 |   // op[0] * op[1] + op[2]
 14 |   val FMADD = Value
 15 |   // op[0] * op[1] - op[2]
 16 |   val FMSUB = Value
 17 |   // -(op[0] * op[1] - op[2])
 18 |   val FNMSUB = Value
 19 |   // -(op[0] * op[1] + op[2])
 20 |   val FNMADD = Value
 21 | 
 22 |   val NOP = FADD
 23 | }
 24 | 
 25 | // https://github.com/chipsalliance/rocket-chip/blob/master/src/main/scala/tile/FPU.scala
 26 | // with modifications of extra stages
 27 | class MulAddRecFNPipe(latency: Int, expWidth: Int, sigWidth: Int)
 28 |     extends Module {
 29 |   require(latency <= 2)
 30 | 
 31 |   val io = IO(new Bundle {
 32 |     val validin = Input(Bool())
 33 |     val op = Input(Bits(2.W))
 34 |     val a = Input(Bits((expWidth + sigWidth + 1).W))
 35 |     val b = Input(Bits((expWidth + sigWidth + 1).W))
 36 |     val c = Input(Bits((expWidth + sigWidth + 1).W))
 37 |     val roundingMode = Input(UInt(3.W))
 38 |     val detectTininess = Input(UInt(1.W))
 39 |     val out = Output(Bits((expWidth + sigWidth + 1).W))
 40 |     val exceptionFlags = Output(Bits(5.W))
 41 |     val validout = Output(Bool())
 42 |   })
 43 | 
 44 |   // ------------------------------------------------------------------------
 45 |   // ------------------------------------------------------------------------
 46 | 
 47 |   val mulAddRecFNToRaw_preMul = Module(
 48 |     new _root_.hardfloat.MulAddRecFNToRaw_preMul(expWidth, sigWidth)
 49 |   )
 50 |   val mulAddRecFNToRaw_postMul = Module(
 51 |     new _root_.hardfloat.MulAddRecFNToRaw_postMul(expWidth, sigWidth)
 52 |   )
 53 | 
 54 |   mulAddRecFNToRaw_preMul.io.op := io.op
 55 |   mulAddRecFNToRaw_preMul.io.a := io.a
 56 |   mulAddRecFNToRaw_preMul.io.b := io.b
 57 |   mulAddRecFNToRaw_preMul.io.c := io.c
 58 | 
 59 |   val mulAddResult =
 60 |     (mulAddRecFNToRaw_preMul.io.mulAddA *
 61 |       mulAddRecFNToRaw_preMul.io.mulAddB) +&
 62 |       mulAddRecFNToRaw_preMul.io.mulAddC
 63 | 
 64 |   val valid_stage0 = Wire(Bool())
 65 |   val roundingMode_stage0 = Wire(UInt(3.W))
 66 |   val detectTininess_stage0 = Wire(UInt(1.W))
 67 | 
 68 |   val postmul_regs = if (latency > 0) 1 else 0
 69 |   mulAddRecFNToRaw_postMul.io.fromPreMul := Pipe(
 70 |     io.validin,
 71 |     mulAddRecFNToRaw_preMul.io.toPostMul,
 72 |     postmul_regs
 73 |   ).bits
 74 |   mulAddRecFNToRaw_postMul.io.mulAddResult := Pipe(
 75 |     io.validin,
 76 |     mulAddResult,
 77 |     postmul_regs
 78 |   ).bits
 79 |   mulAddRecFNToRaw_postMul.io.roundingMode := Pipe(
 80 |     io.validin,
 81 |     io.roundingMode,
 82 |     postmul_regs
 83 |   ).bits
 84 |   roundingMode_stage0 := Pipe(io.validin, io.roundingMode, postmul_regs).bits
 85 |   detectTininess_stage0 := Pipe(
 86 |     io.validin,
 87 |     io.detectTininess,
 88 |     postmul_regs
 89 |   ).bits
 90 |   valid_stage0 := Pipe(io.validin, false.B, postmul_regs).valid
 91 | 
 92 |   // ------------------------------------------------------------------------
 93 |   // ------------------------------------------------------------------------
 94 | 
 95 |   val roundRawFNToRecFN = Module(
 96 |     new _root_.hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0)
 97 |   )
 98 | 
 99 |   val round_regs = if (latency == 2) 1 else 0
100 |   roundRawFNToRecFN.io.invalidExc := Pipe(
101 |     valid_stage0,
102 |     mulAddRecFNToRaw_postMul.io.invalidExc,
103 |     round_regs
104 |   ).bits
105 |   roundRawFNToRecFN.io.in := Pipe(
106 |     valid_stage0,
107 |     mulAddRecFNToRaw_postMul.io.rawOut,
108 |     round_regs
109 |   ).bits
110 |   roundRawFNToRecFN.io.roundingMode := Pipe(
111 |     valid_stage0,
112 |     roundingMode_stage0,
113 |     round_regs
114 |   ).bits
115 |   roundRawFNToRecFN.io.detectTininess := Pipe(
116 |     valid_stage0,
117 |     detectTininess_stage0,
118 |     round_regs
119 |   ).bits
120 |   io.validout := Pipe(valid_stage0, false.B, round_regs).valid
121 | 
122 |   roundRawFNToRecFN.io.infiniteExc := false.B
123 | 
124 |   io.out := roundRawFNToRecFN.io.out
125 |   io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags
126 | }
127 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/HFFCmp.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.hardfloat
  2 | 
  3 | import _root_.hardfloat.CompareRecFN
  4 | import chisel3._
  5 | import chisel3.util._
  6 | import fpuwrapper._
  7 | 
  8 | object HFFCmpOp extends ChiselEnum {
  9 |   val EQ = Value
 10 |   val NE = Value
 11 |   val LT = Value
 12 |   val LE = Value
 13 |   val GT = Value
 14 |   val GE = Value
 15 | 
 16 |   val NOP = EQ
 17 | }
 18 | 
 19 | class HFFCmpRequest(val floatType: FloatType, val lanes: Int) extends Bundle {
 20 |   val op = HFFCmpOp()
 21 |   val r1 = Vec(lanes, UInt(floatType.widthHardfloat().W))
 22 |   val r2 = Vec(lanes, UInt(floatType.widthHardfloat().W))
 23 | }
 24 | 
 25 | class HFFCmpResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
 26 |   // result
 27 |   val res = Vec(lanes, UInt(floatType.width().W))
 28 |   // exception status
 29 |   val exc = Vec(lanes, Bits(5.W))
 30 | }
 31 | 
 32 | class HFFCmp(floatType: FloatType, lanes: Int, stages: Int) extends Module {
 33 |   val io = IO(new Bundle {
 34 |     val req = Flipped(Valid(new HFFCmpRequest(floatType, lanes)))
 35 |     val resp = Valid(new HFFCmpResponse(floatType, lanes))
 36 |   })
 37 | 
 38 |   // replicate small units for higher throughput
 39 |   val valid = io.req.valid
 40 |   val results = for (i <- 0 until lanes) yield {
 41 |     val cmp = Module(
 42 |       new CompareRecFN(
 43 |         floatType.exp(),
 44 |         floatType.sig()
 45 |       )
 46 |     )
 47 |     cmp.suggestName(s"cmp${floatType.kind()}_${i}")
 48 |     cmp.io.a := io.req.bits.r1(i)
 49 |     cmp.io.b := io.req.bits.r2(i)
 50 |     cmp.io.signaling := true.B
 51 | 
 52 |     val result = Wire(UInt(floatType.width().W))
 53 |     val exception = Wire(UInt(5.W))
 54 |     exception := cmp.io.exceptionFlags
 55 |     result := 0.U
 56 |     switch(io.req.bits.op) {
 57 |       is(HFFCmpOp.EQ) {
 58 |         when(cmp.io.eq) {
 59 |           result := 1.U
 60 |         }
 61 |       }
 62 |       is(HFFCmpOp.NE) {
 63 |         when(!cmp.io.eq) {
 64 |           result := 1.U
 65 |         }
 66 |       }
 67 |       is(HFFCmpOp.GE) {
 68 |         when(cmp.io.gt || cmp.io.eq) {
 69 |           result := 1.U
 70 |         }
 71 |       }
 72 |       is(HFFCmpOp.LE) {
 73 |         when(cmp.io.lt || cmp.io.eq) {
 74 |           result := 1.U
 75 |         }
 76 |       }
 77 |       is(HFFCmpOp.GT) {
 78 |         when(cmp.io.gt) {
 79 |           result := 1.U
 80 |         }
 81 |       }
 82 |       is(HFFCmpOp.LT) {
 83 |         when(cmp.io.lt) {
 84 |           result := 1.U
 85 |         }
 86 |       }
 87 |     }
 88 | 
 89 |     // stages
 90 |     val res = Pipe(
 91 |       valid,
 92 |       result,
 93 |       stages
 94 |     ).bits
 95 |     val exc = Pipe(
 96 |       valid,
 97 |       exception,
 98 |       stages
 99 |     ).bits
100 |     (res, exc)
101 |   }
102 | 
103 |   // collect result
104 |   val res = results.map(_._1)
105 |   // exception flags
106 |   val exc = results.map(_._2)
107 | 
108 |   val resValid = ShiftRegister(valid, stages)
109 | 
110 |   io.resp.valid := resValid
111 |   io.resp.bits.res := res
112 |   io.resp.bits.exc := exc
113 | }
114 | 
115 | object HFFCmp extends EmitChiselModule {
116 |   emitChisel(
117 |     (floatType, lanes, stages, _) => new HFFCmp(floatType, lanes, stages),
118 |     "HFFCmp",
119 |     "hardfloat"
120 |   )
121 | }
122 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/HFFDivSqrt.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.hardfloat
  2 | 
  3 | import chisel3._
  4 | import chisel3.ChiselEnum
  5 | import chisel3.util._
  6 | import fpuwrapper.EmitChiselModule
  7 | import fpuwrapper.FloatType
  8 | import hardfloat.DivSqrtRecFN_small
  9 | 
 10 | object HFFDivSqrtOp extends ChiselEnum {
 11 |   val DIV = Value
 12 |   val SQRT = Value
 13 | 
 14 |   val NOP = DIV
 15 | }
 16 | 
 17 | class HFFDivSqrtRequest(val floatType: FloatType, val lanes: Int)
 18 |     extends Bundle {
 19 |   val op = HFFDivSqrtOp()
 20 |   val a = Vec(lanes, UInt(floatType.widthHardfloat().W))
 21 |   val b = Vec(lanes, UInt(floatType.widthHardfloat().W))
 22 | }
 23 | 
 24 | class HFFDivSqrtResponse(val floatType: FloatType, val lanes: Int)
 25 |     extends Bundle {
 26 |   // result
 27 |   val res = Vec(lanes, UInt(floatType.widthHardfloat().W))
 28 |   // exception status
 29 |   val exc = Vec(lanes, Bits(5.W))
 30 | }
 31 | 
 32 | class HFFDivSqrt(val floatType: FloatType, val lanes: Int)
 33 |     extends Module
 34 |     with RequireAsyncReset {
 35 |   val io = IO(new Bundle {
 36 |     val req = Flipped(Decoupled(new HFFDivSqrtRequest(floatType, lanes)))
 37 |     val resp = Valid(new HFFDivSqrtResponse(floatType, lanes))
 38 |   })
 39 | 
 40 |   // replicate small units for higher throughput
 41 |   val valid = io.req.valid
 42 |   val results = for (i <- 0 until lanes) yield {
 43 |     val div_sqrt = Module(
 44 |       new DivSqrtRecFN_small(
 45 |         floatType.exp(),
 46 |         floatType.sig(),
 47 |         0
 48 |       )
 49 |     )
 50 |     div_sqrt.suggestName(s"div_sqrt${floatType.kind()}_${i}")
 51 |     div_sqrt.io.a := io.req.bits.a(i)
 52 |     div_sqrt.io.b := io.req.bits.b(i)
 53 |     div_sqrt.io.inValid := io.req.valid
 54 | 
 55 |     // TODO
 56 |     div_sqrt.io.roundingMode := 0.U
 57 |     div_sqrt.io.detectTininess := 0.U
 58 | 
 59 |     val result = div_sqrt.io.out
 60 |     val exception = Wire(UInt(5.W))
 61 |     exception := div_sqrt.io.exceptionFlags
 62 |     div_sqrt.io.sqrtOp := io.req.bits.op === HFFDivSqrtOp.SQRT
 63 | 
 64 |     // lanes might not complete in the same cycle
 65 |     val resultReg = Reg(UInt(floatType.widthHardfloat().W))
 66 |     val exceptionReg = Reg(UInt(5.W))
 67 |     val resultValidReg = RegInit(false.B)
 68 |     val done = Wire(Bool())
 69 |     when(div_sqrt.io.outValid_div | div_sqrt.io.outValid_sqrt) {
 70 |       resultReg := result
 71 |       exceptionReg := exception
 72 |       resultValidReg := true.B
 73 |     }
 74 |     when(done) {
 75 |       resultValidReg := false.B
 76 |     }
 77 | 
 78 |     (
 79 |       resultReg,
 80 |       exceptionReg,
 81 |       resultValidReg,
 82 |       done,
 83 |       div_sqrt.io.inReady
 84 |     )
 85 |   }
 86 | 
 87 |   io.req.ready := results.map(_._5).reduce(_ & _)
 88 | 
 89 |   // collect result
 90 |   val res = results.map(_._1)
 91 |   // exception flags
 92 |   val exc = results.map(_._2)
 93 | 
 94 |   val resValid = results.map(_._3).reduce(_ & _)
 95 |   // all done
 96 |   for (lane <- results) {
 97 |     lane._4 := resValid
 98 |   }
 99 | 
100 |   io.resp.valid := resValid
101 |   io.resp.bits.res := res
102 |   io.resp.bits.exc := exc
103 | }
104 | 
105 | object HFFDivSqrt extends EmitChiselModule {
106 |   emitChisel(
107 |     (floatType, lanes, _, _) => new HFFDivSqrt(floatType, lanes),
108 |     "HFFDivSqrt",
109 |     "hardfloat"
110 |   )
111 | }
112 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/HFFMA.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.hardfloat
  2 | 
  3 | import chisel3._
  4 | import chisel3.util._
  5 | import fpuwrapper._
  6 | import chisel3.experimental.annotate
  7 | import sifive.enterprise.firrtl.NestedPrefixModulesAnnotation
  8 | import chisel3.experimental.ChiselAnnotation
  9 | 
 10 | class HFFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle {
 11 |   val op = FMAOp()
 12 |   val operands = Vec(3, Vec(lanes, UInt(floatType.widthHardfloat().W)))
 13 | }
 14 | 
 15 | class HFFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
 16 |   // result
 17 |   val res = Vec(lanes, UInt(floatType.widthHardfloat().W))
 18 |   // exception status
 19 |   val exc = Vec(lanes, Bits(5.W))
 20 | }
 21 | 
 22 | class HFFMA(
 23 |     floatType: FloatType,
 24 |     lanes: Int,
 25 |     stages: Int,
 26 |     prefix: String = ""
 27 | ) extends Module {
 28 |   AddPrefix(this, prefix)
 29 | 
 30 |   val io = IO(new Bundle {
 31 |     val req = Flipped(Valid(new HFFMARequest(floatType, lanes)))
 32 |     val resp = Valid(new HFFMAResponse(floatType, lanes))
 33 |   })
 34 | 
 35 |   val one = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 36 |   val zero = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 37 |   for (i <- 0 until lanes) {
 38 |     one(i) := floatType.oneHardfloatChisel()
 39 |     zero(i) := 0.U
 40 |   }
 41 | 
 42 |   // fma: neg * (op[0] * op[1]) + sign * op[2]
 43 |   // neg: {0 => 1, 1 => -1}
 44 |   // sub: {0 => 1, 1 => -1}
 45 |   val op1 = WireInit(io.req.bits.operands(0))
 46 |   val op2 = WireInit(io.req.bits.operands(1))
 47 |   val op3 = WireInit(io.req.bits.operands(2))
 48 |   val neg = WireInit(false.B)
 49 |   val sign = WireInit(false.B)
 50 | 
 51 |   // see the definition of FMAOp for more detail
 52 |   switch(io.req.bits.op) {
 53 |     is(FMAOp.FADD) {
 54 |       op1 := one
 55 |     }
 56 |     is(FMAOp.FSUB) {
 57 |       op1 := one
 58 |       sign := true.B
 59 |     }
 60 |     is(FMAOp.FMUL) {
 61 |       op3 := zero
 62 |     }
 63 |     is(FMAOp.FMADD) {
 64 |       // do nothing
 65 |     }
 66 |     is(FMAOp.FMSUB) {
 67 |       sign := true.B
 68 |     }
 69 |     is(FMAOp.FNMSUB) {
 70 |       neg := true.B
 71 |     }
 72 |     is(FMAOp.FNMADD) {
 73 |       neg := true.B
 74 |       sign := true.B
 75 |     }
 76 |   }
 77 | 
 78 |   // when stages > 3, add extra stages
 79 |   val extraStages = (stages - 2) max 0
 80 |   val inputStages = (extraStages + 1) / 2
 81 |   val outputStages = extraStages - inputStages
 82 | 
 83 |   // replicate small units for higher throughput
 84 |   val reqValid = io.req.valid
 85 |   val results = for (i <- 0 until lanes) yield {
 86 |     // MulAddRecFNPipe only support stages <= 2
 87 |     val fma = Module(
 88 |       new MulAddRecFNPipe(
 89 |         stages min 2,
 90 |         floatType.exp(),
 91 |         floatType.sig()
 92 |       )
 93 |     )
 94 |     fma.suggestName(s"fma_${floatType.kind()}_${i}")
 95 |     fma.io.validin := Pipe(reqValid, reqValid, inputStages).bits
 96 |     fma.io.a := Pipe(
 97 |       reqValid,
 98 |       op1(i),
 99 |       inputStages
100 |     ).bits
101 |     fma.io.b := Pipe(
102 |       reqValid,
103 |       op2(i),
104 |       inputStages
105 |     ).bits
106 |     fma.io.c := Pipe(
107 |       reqValid,
108 |       op3(i),
109 |       inputStages
110 |     ).bits
111 | 
112 |     fma.io.op := Pipe(
113 |       reqValid,
114 |       Cat(neg, sign),
115 |       inputStages
116 |     ).bits
117 |     // TODO
118 |     fma.io.roundingMode := 0.U
119 |     fma.io.detectTininess := 0.U
120 | 
121 |     val res = Pipe(true.B, fma.io.out, outputStages).bits
122 |     val exc = Pipe(true.B, fma.io.exceptionFlags, outputStages).bits
123 |     (res, exc)
124 |   }
125 | 
126 |   // collect result
127 |   val res = results.map(_._1)
128 |   // exception flags
129 |   val exc = results.map(_._2)
130 | 
131 |   val resValid = ShiftRegister(reqValid, stages)
132 | 
133 |   io.resp.valid := resValid
134 |   io.resp.bits.res := res
135 |   io.resp.bits.exc := exc
136 | }
137 | 
138 | object HFFMA extends EmitChiselModule {
139 |   emitChisel(
140 |     (floatType, lanes, stages, prefix) =>
141 |       new HFFMA(floatType, lanes, stages, prefix),
142 |     "HFFMA",
143 |     "hardfloat"
144 |   )
145 | }
146 | 
147 | object HFFMASynth extends EmitChiselModule {
148 |   for (floatType <- Seq(FloatS)) {
149 |     val floatName = floatType.kind().toString()
150 |     for (stages <- Seq(3)) {
151 |       emitChisel(
152 |         (floatType, lanes, stages, _) => new HFFMA(floatType, lanes, stages),
153 |         "HFFMA",
154 |         "hardfloat",
155 |         allStages = Seq(stages),
156 |         floatTypes = Seq(floatType),
157 |         lanes = Seq(1)
158 |       )
159 |       val name = s"Hardfloat_HFFMA_${floatName}1l${stages}s"
160 |       Synthesis.build(Seq(s"${name}.v"), s"${name}_HFFMA", s"hardfloat_${name}")
161 |     }
162 |   }
163 | }
164 | 
165 | object HFFMABench extends EmitChiselModule with VivadoBench {
166 |   val library = "hardfloat"
167 |   val moduleName = "HFFMA"
168 |   for (floatType <- Seq(FloatS)) {
169 |     for (stages <- Seq(3)) {
170 |       for (lanes <- Seq(2)) {
171 |         val floatName = floatType.kind().toString()
172 |         emitChisel(
173 |           (floatType, lanes, stages, prefix) =>
174 |             new HFFMA(floatType, lanes, stages, prefix),
175 |           moduleName,
176 |           library,
177 |           allStages = Seq(stages),
178 |           floatTypes = Seq(floatType),
179 |           lanes = Seq(lanes)
180 |         )
181 |         val name = s"${moduleName}_${floatName}${lanes}l${stages}s"
182 |         bench(
183 |           s"${name}_${library}",
184 |           Seq(s"${name}_${library}.sv"),
185 |           s"${name}_${library}_${moduleName}"
186 |         )
187 |       }
188 |     }
189 |   }
190 | }
191 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/HFFMul.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.hardfloat
  2 | 
  3 | import chisel3._
  4 | import chisel3.util._
  5 | import fpuwrapper._
  6 | 
  7 | class HFFMulRequest(val floatType: FloatType, val lanes: Int) extends Bundle {
  8 |   val a = Vec(lanes, UInt(floatType.widthHardfloat().W))
  9 |   val b = Vec(lanes, UInt(floatType.widthHardfloat().W))
 10 | }
 11 | 
 12 | class HFFMulResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
 13 |   // result
 14 |   val res = Vec(lanes, UInt(floatType.widthHardfloat().W))
 15 |   // exception status
 16 |   val exc = Vec(lanes, Bits(5.W))
 17 | }
 18 | 
 19 | class HFFMul(floatType: FloatType, lanes: Int, stages: Int) extends Module {
 20 |   val io = IO(new Bundle {
 21 |     val req = Flipped(Valid(new HFFMulRequest(floatType, lanes)))
 22 |     val resp = Valid(new HFFMulResponse(floatType, lanes))
 23 |   })
 24 | 
 25 |   // when stages > 1, add extra stages
 26 |   val extraStages = (stages - 1) max 0
 27 |   val inputStages = extraStages / 2
 28 |   val outputStages = extraStages - inputStages
 29 | 
 30 |   // replicate small units for higher throughput
 31 |   val reqValid = io.req.valid
 32 |   val results = for (i <- 0 until lanes) yield {
 33 |     // MulRecFNPipe stages <= 1
 34 |     val fmul = Module(
 35 |       new MulRecFNPipe(
 36 |         floatType.exp(),
 37 |         floatType.sig(),
 38 |         stages min 1
 39 |       )
 40 |     )
 41 |     fmul.suggestName(s"fmul_${floatType.kind()}_${i}")
 42 |     fmul.io.validin := Pipe(reqValid, reqValid, inputStages).bits
 43 |     fmul.io.a := Pipe(
 44 |       reqValid,
 45 |       io.req.bits.a(i),
 46 |       inputStages
 47 |     ).bits
 48 |     fmul.io.b := Pipe(
 49 |       reqValid,
 50 |       io.req.bits.b(i),
 51 |       inputStages
 52 |     ).bits
 53 |     // TODO
 54 |     fmul.io.roundingMode := 0.U
 55 |     fmul.io.detectTininess := 0.U
 56 | 
 57 |     val res = Pipe(true.B, fmul.io.out, outputStages).bits
 58 |     val exc = Pipe(true.B, fmul.io.exceptionFlags, outputStages).bits
 59 |     (res, exc)
 60 |   }
 61 | 
 62 |   // collect result
 63 |   val res = results.map(_._1)
 64 |   // exception flags
 65 |   val exc = results.map(_._2)
 66 | 
 67 |   val resValid = ShiftRegister(reqValid, stages)
 68 | 
 69 |   io.resp.valid := resValid
 70 |   io.resp.bits.res := res
 71 |   io.resp.bits.exc := exc
 72 | }
 73 | 
 74 | object HFFMul extends EmitChiselModule {
 75 |   emitChisel(
 76 |     (floatType, lanes, stages, _) => new HFFMul(floatType, lanes, stages),
 77 |     "HFFMul",
 78 |     "hardfloat"
 79 |   )
 80 | }
 81 | 
 82 | object HFFMulSynth extends EmitChiselModule {
 83 |   for (floatType <- Seq(FloatS)) {
 84 |     val floatName = floatType.kind().toString()
 85 |     for (stages <- Seq(1, 2, 3)) {
 86 |       emitChisel(
 87 |         (floatType, lanes, stages, _) => new HFFMul(floatType, lanes, stages),
 88 |         "HFFMul",
 89 |         "hardfloat",
 90 |         allStages = Seq(stages),
 91 |         floatTypes = Seq(floatType),
 92 |         lanes = Seq(1)
 93 |       )
 94 |       val name = s"Hardfloat_HFFMul_${floatName}1l${stages}s"
 95 |       Synthesis.build(
 96 |         Seq(s"${name}.v"),
 97 |         s"${name}_HFFMul",
 98 |         s"hardfloat_${name}"
 99 |       )
100 |     }
101 |   }
102 | }
103 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/HFToIEEE.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.util.ShiftRegister
 5 | import chisel3.util.Valid
 6 | import fpuwrapper._
 7 | 
 8 | class HFToIEEE(floatType: FloatType, lanes: Int, stages: Int) extends Module {
 9 |   val io = IO(new Bundle {
10 |     val hardfloat = Input(Valid(Vec(lanes, Bits(floatType.widthHardfloat().W))))
11 |     val float = Output(Valid(Vec(lanes, Bits(floatType.width().W))))
12 |   })
13 | 
14 |   io.float.valid := ShiftRegister(io.hardfloat.valid, stages)
15 |   for (i <- 0 until lanes) {
16 |     io.float.bits(i) := ShiftRegister(
17 |       floatType.fromHardfloat(io.hardfloat.bits(i)),
18 |       stages
19 |     )
20 |   }
21 | }
22 | 
23 | object HFToIEEE extends EmitChiselModule {
24 |   emitChisel(
25 |     (floatType, lanes, stages, _) => new HFToIEEE(floatType, lanes, stages),
26 |     "HFToIEEE",
27 |     "hardfloat"
28 |   )
29 | }
30 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/IEEEFMA.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.hardfloat
  2 | 
  3 | import chisel3._
  4 | import chisel3.util._
  5 | import fpuwrapper._
  6 | 
  7 | class IEEEFMARequest(val floatType: FloatType, val lanes: Int) extends Bundle {
  8 |   val op = FMAOp()
  9 |   val operands = Vec(3, Vec(lanes, UInt(floatType.width().W)))
 10 | }
 11 | 
 12 | class IEEEFMAResponse(val floatType: FloatType, val lanes: Int) extends Bundle {
 13 |   // result
 14 |   val res = Vec(lanes, UInt(floatType.width().W))
 15 |   // exception status
 16 |   val exc = Vec(lanes, Bits(5.W))
 17 | }
 18 | 
 19 | class IEEEFMA(
 20 |     floatType: FloatType,
 21 |     lanes: Int,
 22 |     stages: Int,
 23 |     prefix: String = ""
 24 | ) extends Module {
 25 |   AddPrefix(this, prefix)
 26 |   val io = IO(new Bundle {
 27 |     val req = Flipped(Valid(new IEEEFMARequest(floatType, lanes)))
 28 |     val resp = Valid(new IEEEFMAResponse(floatType, lanes))
 29 |   })
 30 | 
 31 |   val one = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 32 |   val zero = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 33 |   for (i <- 0 until lanes) {
 34 |     one(i) := floatType.oneHardfloatChisel()
 35 |     zero(i) := 0.U
 36 |   }
 37 | 
 38 |   // fma: neg * (op[0] * op[1]) + sign * op[2]
 39 |   // neg: {0 => 1, 1 => -1}
 40 |   // sub: {0 => 1, 1 => -1}
 41 |   val op1 = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 42 |   val op2 = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 43 |   val op3 = Wire(Vec(lanes, UInt(floatType.widthHardfloat().W)))
 44 |   for (i <- 0 until lanes) {
 45 |     op1(i) := floatType.toHardfloat(io.req.bits.operands(0)(i))
 46 |     op2(i) := floatType.toHardfloat(io.req.bits.operands(1)(i))
 47 |     op3(i) := floatType.toHardfloat(io.req.bits.operands(2)(i))
 48 |   }
 49 |   val neg = WireInit(false.B)
 50 |   val sign = WireInit(false.B)
 51 | 
 52 |   // see the definition of FMAOp for more detail
 53 |   switch(io.req.bits.op) {
 54 |     is(FMAOp.FADD) {
 55 |       op1 := one
 56 |     }
 57 |     is(FMAOp.FSUB) {
 58 |       op1 := one
 59 |       sign := true.B
 60 |     }
 61 |     is(FMAOp.FMUL) {
 62 |       op3 := zero
 63 |     }
 64 |     is(FMAOp.FMADD) {
 65 |       // do nothing
 66 |     }
 67 |     is(FMAOp.FMSUB) {
 68 |       sign := true.B
 69 |     }
 70 |     is(FMAOp.FNMSUB) {
 71 |       neg := true.B
 72 |     }
 73 |     is(FMAOp.FNMADD) {
 74 |       neg := true.B
 75 |       sign := true.B
 76 |     }
 77 |   }
 78 | 
 79 |   // when stages > 3, add extra stages
 80 |   val extraStages = (stages - 2) max 0
 81 |   val inputStages = extraStages / 2
 82 |   val outputStages = extraStages - inputStages
 83 | 
 84 |   // replicate small units for higher throughput
 85 |   val reqValid = io.req.valid
 86 |   val results = for (i <- 0 until lanes) yield {
 87 |     // MulAddRecFNPipe only support stages <= 2
 88 |     val fma = Module(
 89 |       new MulAddRecFNPipe(
 90 |         stages min 2,
 91 |         floatType.exp(),
 92 |         floatType.sig()
 93 |       )
 94 |     )
 95 |     fma.suggestName(s"fma_${floatType.kind()}_${i}")
 96 |     fma.io.validin := Pipe(reqValid, reqValid, inputStages).bits
 97 |     fma.io.a := Pipe(
 98 |       reqValid,
 99 |       op1(i),
100 |       inputStages
101 |     ).bits
102 |     fma.io.b := Pipe(
103 |       reqValid,
104 |       op2(i),
105 |       inputStages
106 |     ).bits
107 |     fma.io.c := Pipe(
108 |       reqValid,
109 |       op3(i),
110 |       inputStages
111 |     ).bits
112 | 
113 |     fma.io.op := Pipe(
114 |       reqValid,
115 |       Cat(neg, sign),
116 |       inputStages
117 |     ).bits
118 |     // TODO
119 |     fma.io.roundingMode := 0.U
120 |     fma.io.detectTininess := 0.U
121 | 
122 |     val res =
123 |       Pipe(true.B, floatType.fromHardfloat(fma.io.out), outputStages).bits
124 |     val exc = Pipe(true.B, fma.io.exceptionFlags, outputStages).bits
125 |     (res, exc)
126 |   }
127 | 
128 |   // collect result
129 |   val res = results.map(_._1)
130 |   // exception flags
131 |   val exc = results.map(_._2)
132 | 
133 |   val resValid = ShiftRegister(reqValid, stages)
134 | 
135 |   io.resp.valid := resValid
136 |   io.resp.bits.res := res
137 |   io.resp.bits.exc := exc
138 | }
139 | 
140 | object IEEEFMA extends EmitChiselModule {
141 |   emitChisel(
142 |     (floatType, lanes, stages, prefix) =>
143 |       new IEEEFMA(floatType, lanes, stages, prefix),
144 |     "IEEEFMA",
145 |     "hardfloat"
146 |   )
147 | }
148 | 
149 | object IEEEFMASynth extends EmitChiselModule {
150 |   for (floatType <- Seq(FloatH, FloatS, FloatD)) {
151 |     val floatName = floatType.kind().toString()
152 |     for (stages <- Seq(4)) {
153 |       emitChisel(
154 |         (floatType, lanes, stages, prefix) =>
155 |           new IEEEFMA(floatType, lanes, stages, prefix),
156 |         "IEEEFMA",
157 |         "hardfloat",
158 |         allStages = Seq(stages),
159 |         floatTypes = Seq(floatType),
160 |         lanes = Seq(1)
161 |       )
162 |       val name = s"IEEEFMA_${floatName}1l${stages}s_hardfloat"
163 |       Synthesis.build(
164 |         Seq(s"${name}.sv"),
165 |         s"${name}_IEEEFMA",
166 |         name
167 |       )
168 |     }
169 |   }
170 | }
171 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/IEEEToHF.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.util.ShiftRegister
 5 | import chisel3.util.Valid
 6 | import fpuwrapper._
 7 | 
 8 | class IEEEToHF(floatType: FloatType, lanes: Int, stages: Int) extends Module {
 9 |   val io = IO(new Bundle {
10 |     val float = Input(Valid(Vec(lanes, Bits(floatType.width().W))))
11 |     val hardfloat =
12 |       Output(Valid(Vec(lanes, Bits(floatType.widthHardfloat().W))))
13 |   })
14 | 
15 |   io.hardfloat.valid := ShiftRegister(io.float.valid, stages)
16 |   for (i <- 0 until lanes) {
17 |     io.hardfloat.bits(i) := ShiftRegister(
18 |       floatType.toHardfloat(io.float.bits(i)),
19 |       stages
20 |     )
21 |   }
22 | }
23 | 
24 | object IEEEToHF extends EmitChiselModule {
25 |   emitChisel(
26 |     (floatType, lanes, stages, _) => new IEEEToHF(floatType, lanes, stages),
27 |     "IEEEToHF",
28 |     "hardfloat"
29 |   )
30 | }
31 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/hardfloat/MulCommon.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.util._
 5 | 
 6 | class MulRecFNPipe(expWidth: Int, sigWidth: Int, latency: Int) extends Module {
 7 |   val io = IO(new Bundle {
 8 |     val validin = Input(Bool())
 9 |     val a = Input(UInt((expWidth + sigWidth + 1).W))
10 |     val b = Input(UInt((expWidth + sigWidth + 1).W))
11 |     val roundingMode = Input(UInt(3.W))
12 |     val detectTininess = Input(Bool())
13 | 
14 |     val out = Output(UInt((expWidth + sigWidth + 1).W))
15 |     val exceptionFlags = Output(UInt(5.W))
16 |     val validout = Output(Bool())
17 |   })
18 | 
19 |   // ------------------------------------------------------------------------
20 |   // ------------------------------------------------------------------------
21 |   val mulRawFN = Module(new _root_.hardfloat.MulRawFN(expWidth, sigWidth))
22 | 
23 |   mulRawFN.io.a := _root_.hardfloat.rawFloatFromRecFN(expWidth, sigWidth, io.a)
24 |   mulRawFN.io.b := _root_.hardfloat.rawFloatFromRecFN(expWidth, sigWidth, io.b)
25 | 
26 |   // ------------------------------------------------------------------------
27 |   // ------------------------------------------------------------------------
28 |   val roundRawFNToRecFN =
29 |     Module(new _root_.hardfloat.RoundRawFNToRecFN(expWidth, sigWidth, 0))
30 |   roundRawFNToRecFN.io.invalidExc := Pipe(
31 |     io.validin,
32 |     mulRawFN.io.invalidExc,
33 |     latency
34 |   ).bits
35 |   roundRawFNToRecFN.io.infiniteExc := false.B
36 |   roundRawFNToRecFN.io.in := Pipe(
37 |     io.validin,
38 |     mulRawFN.io.rawOut,
39 |     latency
40 |   ).bits
41 |   roundRawFNToRecFN.io.roundingMode := Pipe(
42 |     io.validin,
43 |     io.roundingMode,
44 |     latency
45 |   ).bits
46 |   roundRawFNToRecFN.io.detectTininess := Pipe(
47 |     io.validin,
48 |     io.detectTininess,
49 |     latency
50 |   ).bits
51 | 
52 |   io.validout := Pipe(io.validin, false.B, latency).valid
53 |   io.out := roundRawFNToRecFN.io.out
54 |   io.exceptionFlags := roundRawFNToRecFN.io.exceptionFlags
55 | }
56 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/opencores/IEEEFPU.scala:
--------------------------------------------------------------------------------
  1 | package fpuwrapper.opencores
  2 | 
  3 | import fpuwrapper._
  4 | import spinal.core._
  5 | import spinal.lib._
  6 | 
  7 | import java.nio.file.Paths
  8 | 
  9 | object IEEEFPUOp extends SpinalEnum {
 10 |   val FADD = newElement()
 11 |   val FSUB = newElement()
 12 |   val FMUL = newElement()
 13 |   val FDIV = newElement()
 14 |   val INT2FP = newElement()
 15 |   val FP2INT = newElement()
 16 | 
 17 |   val NOP = FADD
 18 | }
 19 | 
 20 | class IEEEFPURequest(val floatType: FloatType) extends Bundle {
 21 |   val op = IEEEFPUOp()
 22 |   val operands = Vec(UInt(floatType.width() bits), 2)
 23 | }
 24 | 
 25 | class IEEEFPUResponse(val floatType: FloatType) extends Bundle {
 26 |   // result
 27 |   val res = UInt(floatType.width() bits)
 28 | }
 29 | 
 30 | class IEEEFPU extends Component {
 31 |   val floatType = FloatS
 32 |   val stages = 4
 33 |   val io = new Bundle {
 34 |     val req = slave(Flow(new IEEEFPURequest(floatType)))
 35 |     val resp = master(Flow(new IEEEFPUResponse(floatType)))
 36 |   }
 37 | 
 38 |   val fpu = new IEEEFPUBlackBox(floatType)
 39 |   fpu.rmode := 0
 40 |   fpu.fpu_op := io.req.op.asBits.resized
 41 |   fpu.opa := io.req.operands(0).asBits
 42 |   fpu.opb := io.req.operands(1).asBits
 43 |   io.resp.res := fpu.out.asUInt
 44 | 
 45 |   io.resp.valid := Delay(io.req.valid, stages)
 46 | }
 47 | 
 48 | class IEEEFPUBlackBox(val floatType: FloatType) extends BlackBox {
 49 |   val clk = in(Bool())
 50 |   val rmode = in(Bits(2 bits))
 51 |   val fpu_op = in(Bits(3 bits))
 52 |   val opa = in(Bits(floatType.width() bits))
 53 |   val opb = in(Bits(floatType.width() bits))
 54 | 
 55 |   val out = spinal.core.out(Bits(floatType.width() bits))
 56 |   val inf = spinal.core.out(Bool())
 57 |   val snan = spinal.core.out(Bool())
 58 |   val qnan = spinal.core.out(Bool())
 59 |   val ine = spinal.core.out(Bool())
 60 |   val overflow = spinal.core.out(Bool())
 61 |   val underflow = spinal.core.out(Bool())
 62 |   val zero = spinal.core.out(Bool())
 63 |   val div_by_zero = spinal.core.out(Bool())
 64 | 
 65 |   setDefinitionName("fpu")
 66 | 
 67 |   // Map the clk
 68 |   mapCurrentClockDomain(
 69 |     clock = clk
 70 |   )
 71 | 
 72 |   val files = Seq(
 73 |     "except.v",
 74 |     "fpu.v",
 75 |     "post_norm.v",
 76 |     "pre_norm_fmul.v",
 77 |     "pre_norm.v",
 78 |     "primitives.v"
 79 |   )
 80 |   for (file <- files) {
 81 |     val res = getClass().getResource(s"/opencores/${file}");
 82 |     addRTLPath(Paths.get(res.toURI()).toFile().getAbsolutePath())
 83 |   }
 84 | }
 85 | 
 86 | object IEEEFPU extends App {
 87 |   val verilog = spinal.core.SpinalConfig(netlistFileName = "OpencoresIEEEFPU.v")
 88 |   verilog.generateVerilog(new IEEEFPU())
 89 | }
 90 | 
 91 | object IEEEFPUSynth extends App {
 92 |   val files = Seq(
 93 |     "except.v",
 94 |     "fpu.v",
 95 |     "post_norm.v",
 96 |     "pre_norm_fmul.v",
 97 |     "pre_norm.v",
 98 |     "primitives.v"
 99 |   )
100 |   val sources = for (file <- files) yield {
101 |     s"./fpu-wrappers/resources/opencores/${file}"
102 |   }
103 | 
104 |   Synthesis.build(
105 |     Seq(
106 |       s"OpencoresIEEEFPU.v"
107 |     ) ++ sources,
108 |     s"IEEEFPU_1",
109 |     s"opencores_IEEEFPU"
110 |   )
111 | }
112 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/sifive.scala:
--------------------------------------------------------------------------------
 1 | package sifive {
 2 |   package enterprise {
 3 |     package firrtl {
 4 |       import _root_.firrtl.annotations._
 5 | 
 6 |       case class NestedPrefixModulesAnnotation(
 7 |           val target: Target,
 8 |           prefix: String,
 9 |           inclusive: Boolean
10 |       ) extends SingleTargetAnnotation[Target] {
11 | 
12 |         def duplicate(n: Target): Annotation =
13 |           NestedPrefixModulesAnnotation(target, prefix, inclusive)
14 |       }
15 |     }
16 | 
17 |   }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/fpu-wrappers/src/fpuwrapper/synthesis.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper
 2 | 
 3 | import java.nio.charset.StandardCharsets
 4 | import java.nio.file.Files
 5 | import java.nio.file.Paths
 6 | import java.nio.file.StandardCopyOption
 7 | import scala.io.Source
 8 | import scala.sys.process._
 9 | import scala.language.postfixOps
10 | 
11 | /** Synthesize code with Synopsys Design Compiler
12 |   */
13 | object Synthesis {
14 |   def build(
15 |       sources: Seq[String],
16 |       toplevelName: String,
17 |       folderName: String = null
18 |   ) = {
19 |     val actualFolderName = if (folderName == null) {
20 |       toplevelName
21 |     } else {
22 |       folderName
23 |     }
24 | 
25 |     val dir = s"synWorkspace/${actualFolderName}/"
26 |     Files.createDirectories(Paths.get(dir))
27 | 
28 |     // copy files to synWorkspace
29 |     val names = (for (file <- sources) yield {
30 |       val name = Paths.get(file).getFileName()
31 |       Files.copy(
32 |         Paths.get(file),
33 |         Paths.get(s"${dir}/${name}"),
34 |         StandardCopyOption.REPLACE_EXISTING
35 |       )
36 |       name.toString()
37 |     }).toList
38 | 
39 |     // apply template
40 |     var template = Source.fromResource("syn.tcl").mkString
41 |     template = template.replace(
42 |       "INPUT_VERILOG",
43 |       names.filter((s) => s.endsWith(".v")).mkString(" ")
44 |     )
45 |     template = template.replace(
46 |       "INPUT_SYSTEMVERILOG",
47 |       names.filter((s) => s.endsWith(".sv")).mkString(" ")
48 |     )
49 |     template = template.replace(
50 |       "INPUT_VHDL",
51 |       names.filter((s) => s.endsWith(".vhdl")).mkString(" ")
52 |     )
53 |     template = template.replace("TOPLEVEL_NAME", toplevelName)
54 | 
55 |     Files.write(
56 |       Paths.get(s"${dir}/syn.tcl"),
57 |       template.getBytes(StandardCharsets.UTF_8)
58 |     )
59 | 
60 |     Process("dc_shell -f syn.tcl", new java.io.File(dir)) !
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/common.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper
 2 | 
 3 | import svsim._
 4 | import chisel3.RawModule
 5 | import chisel3.simulator._
 6 | import java.nio.file.Files
 7 | import java.io.File
 8 | import scala.reflect.io.Directory
 9 | 
10 | // custom EphemeralSimulator to add options to verilator
11 | 
12 | object Simulator extends PeekPokeAPI {
13 | 
14 |   def simulate[T <: RawModule](
15 |       module: => T
16 |   )(body: (T) => Unit): Unit = {
17 |     makeSimulator.simulate(module)({ module => body(module.wrapped) }).result
18 |   }
19 | 
20 |   private class DefaultSimulator(val workspacePath: String)
21 |       extends SingleBackendSimulator[verilator.Backend] {
22 |     val backend = verilator.Backend.initializeFromProcessEnvironment()
23 |     val tag = "default"
24 |     val commonCompilationSettings = CommonCompilationSettings()
25 |     val backendSpecificCompilationSettings =
26 |       verilator.Backend.CompilationSettings(
27 |         traceStyle =
28 |           Some(verilator.Backend.CompilationSettings.TraceStyle.Vcd()),
29 |         // for fpnew
30 |         disabledWarnings = Seq(
31 |           "UNOPTFLAT",
32 |           "CASEOVERLAP",
33 |           "UNSIGNED",
34 |           "WIDTHTRUNC",
35 |           "WIDTHEXPAND",
36 |           "ASCRANGE",
37 |           "PINMISSING"
38 |         )
39 |       )
40 | 
41 |     // Try to clean up temporary workspace if possible
42 |     sys.addShutdownHook {
43 |       (new Directory(new File(workspacePath))).deleteRecursively()
44 |     }
45 |   }
46 |   private def makeSimulator: DefaultSimulator = {
47 |     // TODO: Use ProcessHandle when we can drop Java 8 support
48 |     // val id = ProcessHandle.current().pid().toString()
49 |     val id = java.lang.management.ManagementFactory.getRuntimeMXBean().getName()
50 |     val className = getClass().getName().stripSuffix("$")
51 |     new DefaultSimulator(
52 |       Files.createTempDirectory(s"${className}_${id}_").toString
53 |     )
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/flopoco/FPCFExpTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import org.scalatest.funsuite.AnyFunSuite
 4 | import spinal.core._
 5 | import spinal.core.sim._
 6 | import fpuwrapper.FloatS
 7 | 
 8 | // FPCFExp's testbench
 9 | class FPCFExpTest extends AnyFunSuite {
10 |   test("FPCFExp") {
11 |     val stages = 1
12 |     SimConfig.withWave.withIVerilog
13 |       .doSim(
14 |         new FPCFExp(
15 |           FloatS,
16 |           2,
17 |           stages
18 |         )
19 |       ) { dut =>
20 |         dut.clockDomain.forkStimulus(period = 10)
21 |         dut.clockDomain.waitRisingEdge()
22 | 
23 |         var cycles = 0
24 |         dut.clockDomain.onRisingEdges {
25 |           cycles = cycles + 1
26 |         }
27 | 
28 |         dut.io.req.valid #= false
29 |         sleep(160)
30 | 
31 |         dut.clockDomain.waitRisingEdge()
32 |         dut.io.req.valid #= true
33 |         dut.io.req.a(0) #= BigInt("13f800000", 16) // 1.0
34 |         dut.io.req.a(1) #= BigInt("140000000", 16) // 2.0
35 | 
36 |         val beginCycles = cycles
37 |         dut.clockDomain.waitFallingEdgeWhere {
38 |           dut.io.resp.valid.toBoolean
39 |         }
40 |         assert(cycles - beginCycles == stages)
41 |         assert(
42 |           dut.io.resp.res(0).toBigInt == BigInt("1402df854", 16)
43 |         ) // 2.718281828459045
44 |         assert(
45 |           dut.io.resp.res(1).toBigInt == BigInt("140ec7326", 16)
46 |         ) // 7.38905609893065
47 | 
48 |         sleep(100)
49 |       }
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/flopoco/IEEEFExpTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import org.scalatest.funsuite.AnyFunSuite
 4 | import spinal.core._
 5 | import spinal.core.sim._
 6 | import fpuwrapper.FloatS
 7 | 
 8 | // IEEEFExp's testbench
 9 | class IEEEFExpTest extends AnyFunSuite {
10 |   test("IEEEFExp") {
11 |     val stages = 1
12 |     SimConfig.withWave.withIVerilog
13 |       .doSim(
14 |         new IEEEFExp(
15 |           FloatS,
16 |           2,
17 |           stages
18 |         )
19 |       ) { dut =>
20 |         dut.clockDomain.forkStimulus(period = 10)
21 |         dut.clockDomain.waitRisingEdge()
22 | 
23 |         var cycles = 0
24 |         dut.clockDomain.onRisingEdges {
25 |           cycles = cycles + 1
26 |         }
27 | 
28 |         dut.io.req.valid #= false
29 |         sleep(160)
30 | 
31 |         dut.clockDomain.waitRisingEdge()
32 |         dut.io.req.valid #= true
33 |         dut.io.req.a(0) #= BigInt("3f800000", 16) // 1.0
34 |         dut.io.req.a(1) #= BigInt("40000000", 16) // 2.0
35 | 
36 |         val beginCycles = cycles
37 |         dut.clockDomain.waitFallingEdgeWhere {
38 |           dut.io.resp.valid.toBoolean
39 |         }
40 |         assert(cycles - beginCycles == stages)
41 |         assert(
42 |           dut.io.resp.res(0).toBigInt == BigInt("402df854", 16)
43 |         ) // 2.718281828459045
44 |         assert(
45 |           dut.io.resp.res(1).toBigInt == BigInt("40ec7326", 16)
46 |         ) // 7.38905609893065
47 | 
48 |         sleep(100)
49 |       }
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/flopoco/IEEEFMATest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.flopoco
 2 | 
 3 | import org.scalatest.funsuite.AnyFunSuite
 4 | import spinal.core._
 5 | import spinal.core.sim._
 6 | import fpuwrapper.FloatS
 7 | 
 8 | // IEEEFMA's testbench
 9 | class IEEEFMATest extends AnyFunSuite {
10 |   test("IEEEFMA") {
11 |     val stages = 3
12 |     SimConfig.withWave.withIVerilog
13 |       .doSim(
14 |         new IEEEFMA(
15 |           FloatS,
16 |           2,
17 |           stages
18 |         )
19 |       ) { dut =>
20 |         dut.clockDomain.forkStimulus(period = 10)
21 |         dut.clockDomain.waitRisingEdge()
22 | 
23 |         var cycles = 0
24 |         dut.clockDomain.onRisingEdges {
25 |           cycles = cycles + 1
26 |         }
27 | 
28 |         dut.io.req.valid #= false
29 |         sleep(160)
30 | 
31 |         dut.clockDomain.waitRisingEdge()
32 |         dut.io.req.valid #= true
33 |         dut.io.req.operands(0)(0) #= BigInt("3f800000", 16) // 1.0
34 |         dut.io.req.operands(1)(0) #= BigInt("40000000", 16) // 2.0
35 |         dut.io.req.operands(2)(0) #= BigInt("40400000", 16) // 3.0
36 |         dut.io.req.operands(0)(1) #= BigInt("40800000", 16) // 4.0
37 |         dut.io.req.operands(1)(1) #= BigInt("40a00000", 16) // 5.0
38 |         dut.io.req.operands(2)(1) #= BigInt("40c00000", 16) // 6.0
39 |         dut.io.req.op #= IEEEFMAOp.FMADD
40 | 
41 |         val beginCycles = cycles
42 |         dut.clockDomain.waitFallingEdgeWhere {
43 |           dut.io.resp.valid.toBoolean
44 |         }
45 |         assert(cycles - beginCycles == stages)
46 |         assert(dut.io.resp.res(0).toBigInt == BigInt("40a00000", 16)) // 5.0
47 |         assert(dut.io.resp.res(1).toBigInt == BigInt("41d00000", 16)) // 26.0
48 | 
49 |         sleep(100)
50 |       }
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/fpnew/IEEEFPUTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.fpnew
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import fpuwrapper.Simulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | class IEEEFPUTest extends AnyFreeSpec {
10 |   // fpnew does not support icarus verilog
11 |   for (stages <- 1 to 5) {
12 |     s"IEEEFPU of ${stages} stages should work" in {
13 |       simulate(new IEEEFPU(FloatS, 2, stages)) { dut =>
14 |         dut.clock.step(16)
15 | 
16 |         def enqueueReq() = {
17 |           dut.io.req.valid.poke(true.B)
18 |           while (dut.io.req.ready.peek().litToBoolean == false) {
19 |             dut.clock.step(1)
20 |           }
21 |           dut.clock.step(1)
22 |           dut.io.req.valid.poke(false.B)
23 |         }
24 | 
25 |         def expectResp()(x: IEEEFPU => Unit) = {
26 |           val expectedCycles = stages - 1
27 |           var cycles = 0
28 |           dut.io.resp.ready.poke(true.B)
29 |           while (dut.io.resp.valid.peek().litToBoolean == false) {
30 |             dut.clock.step(1)
31 |             cycles += 1
32 |           }
33 |           dut.io.resp.valid.expect(true.B)
34 |           x(dut)
35 |           dut.io.resp.ready.poke(true.B)
36 |           dut.clock.step(1)
37 |           assert(
38 |             cycles == expectedCycles,
39 |             s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
40 |           )
41 |         }
42 | 
43 |         dut.io.req.bits.operands(0).poke("h3f8000003f800000".U) // 1
44 |         dut.io.req.bits.operands(1).poke("h4000000040000000".U) // 2
45 |         dut.io.req.bits.operands(2).poke("h4040000040400000".U) // 3
46 |         dut.io.req.bits.op.poke(FPOperation.FMADD)
47 |         dut.io.req.bits.srcFormat.poke(FPFloatFormat.Fp32)
48 |         dut.io.req.bits.dstFormat.poke(FPFloatFormat.Fp32)
49 |         enqueueReq()
50 |         expectResp() { dut =>
51 |           dut.io.resp.bits.result.expect("h40a0000040a00000".U)
52 |         } // 5
53 |       }
54 |     }
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/fudian/IEEEFAddTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.fudian
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | class IEEEFAddTest extends AnyFreeSpec {
10 |   for (stages <- 1 to 5) {
11 |     s"IEEEFAdd of ${stages} stages should work" in {
12 |       simulate(new IEEEFAdd(FloatS, 2, stages)) { dut =>
13 |         dut.clock.step(16)
14 | 
15 |         def enqueueReq() = {
16 |           dut.io.req.valid.poke(true.B)
17 |           dut.clock.step(1)
18 |           dut.io.req.valid.poke(false.B)
19 |         }
20 | 
21 |         def expectResp()(x: IEEEFAdd => Unit) = {
22 |           val expectedCycles = stages - 1
23 |           var cycles = 0
24 |           while (dut.io.resp.valid.peek().litToBoolean == false) {
25 |             dut.clock.step(1)
26 |             cycles += 1
27 |           }
28 |           dut.io.resp.valid.expect(true.B)
29 |           x(dut)
30 |           dut.clock.step(1)
31 |           assert(
32 |             cycles == expectedCycles,
33 |             s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
34 |           )
35 |         }
36 | 
37 |         dut.io.req.bits.a(0).poke("h40000000".U) // 2
38 |         dut.io.req.bits.b(0).poke("h40400000".U) // 3
39 |         dut.io.req.bits.a(1).poke("h40800000".U) // 4
40 |         dut.io.req.bits.b(1).poke("h40a00000".U) // 5
41 |         enqueueReq()
42 |         expectResp() { dut =>
43 |           dut.io.resp.bits.res(0).expect("h40a00000".U) // 5
44 |           dut.io.resp.bits.res(1).expect("h41100000".U) // 9
45 |         }
46 |       }
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/fudian/IEEEFDivSqrtTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.fudian
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import fpuwrapper.Simulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | class IEEEFDivSqrtTest extends AnyFreeSpec {
10 |   s"IEEEFDivSqrt should work" in {
11 |     simulate(new IEEEFDivSqrt(FloatS, 2)) { dut =>
12 |       dut.reset.poke(true.B)
13 |       dut.clock.step()
14 |       dut.reset.poke(false.B)
15 |       dut.clock.step()
16 | 
17 |       dut.clock.step(16)
18 | 
19 |       def enqueueReq() = {
20 |         dut.io.req.valid.poke(true.B)
21 |         dut.clock.step(1)
22 |         dut.io.req.valid.poke(false.B)
23 |       }
24 | 
25 |       def expectResp()(x: IEEEFDivSqrt => Unit) = {
26 |         while (dut.io.resp.valid.peek().litToBoolean == false) {
27 |           dut.clock.step(1)
28 |         }
29 |         dut.io.resp.valid.expect(true.B)
30 |         x(dut)
31 |         dut.clock.step(1)
32 |       }
33 | 
34 |       dut.io.req.bits.a(0).poke("h3f800000".U) // 1.0
35 |       dut.io.req.bits.b(0).poke("h40000000".U) // 2.0
36 |       dut.io.req.bits.a(1).poke("h40400000".U) // 3.0
37 |       dut.io.req.bits.b(1).poke("h40800000".U) // 4.0
38 |       dut.io.req.bits.op.poke(IEEEFDivSqrtOp.DIV)
39 |       enqueueReq()
40 |       expectResp() { dut =>
41 |         dut.io.resp.bits.res(0).expect("h3f000000".U) // 0.5
42 |         dut.io.resp.bits.res(1).expect("h3f400000".U) // 0.75
43 |       }
44 | 
45 |       dut.io.req.bits.a(0).poke("h40800000".U) // 4.0
46 |       dut.io.req.bits.b(0).poke("h00000000".U) // 0.0
47 |       dut.io.req.bits.a(1).poke("h41100000".U) // 9.0
48 |       dut.io.req.bits.b(1).poke("h00000000".U) // 0.0
49 |       dut.io.req.bits.op.poke(IEEEFDivSqrtOp.SQRT)
50 |       enqueueReq()
51 |       expectResp() { dut =>
52 |         dut.io.resp.bits.res(0).expect("h40000000".U) // 2.0
53 |         dut.io.resp.bits.res(1).expect("h40400000".U) // 3.0
54 |       }
55 |     }
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/fudian/IEEEFMATest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.fudian
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | class IEEEFMATest extends AnyFreeSpec {
10 |   for (stages <- 1 to 5) {
11 |     s"IEEEFMA of ${stages} stages should work" in {
12 |       simulate(new IEEEFMA(FloatS, 2, stages)) { dut =>
13 |         dut.clock.step(16)
14 | 
15 |         def enqueueReq() = {
16 |           dut.io.req.valid.poke(true.B)
17 |           dut.clock.step(1)
18 |           dut.io.req.valid.poke(false.B)
19 |         }
20 | 
21 |         def expectResp()(x: IEEEFMA => Unit) = {
22 |           val expectedCycles = stages - 1
23 |           var cycles = 0
24 |           while (dut.io.resp.valid.peek().litToBoolean == false) {
25 |             dut.clock.step(1)
26 |             cycles += 1
27 |           }
28 |           dut.io.resp.valid.expect(true.B)
29 |           x(dut)
30 |           dut.clock.step(1)
31 |           assert(
32 |             cycles == expectedCycles,
33 |             s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
34 |           )
35 |         }
36 | 
37 |         dut.io.req.bits.operands(0)(0).poke("h3f800000".U) // 1
38 |         dut.io.req.bits.operands(1)(0).poke("h40000000".U) // 2
39 |         dut.io.req.bits.operands(2)(0).poke("h40400000".U) // 3
40 |         dut.io.req.bits.operands(0)(1).poke("h40800000".U) // 4
41 |         dut.io.req.bits.operands(1)(1).poke("h40a00000".U) // 5
42 |         dut.io.req.bits.operands(2)(1).poke("h40c00000".U) // 6
43 |         enqueueReq()
44 |         expectResp() { dut =>
45 |           dut.io.resp.bits.res(0).expect("h40a00000".U) // 5
46 |           dut.io.resp.bits.res(1).expect("h41d00000".U) // 26
47 |         }
48 |       }
49 |     }
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/hardfloat/HFFCmpTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | 
10 | class HFFCmpTest extends AnyFreeSpec {
11 |   for (stages <- 1 to 5) {
12 |     s"HFFCmp of ${stages} stages should work" in {
13 |       simulate(new HFFCmp(FloatS, 2, stages)) { dut =>
14 |           dut.clock.step(16)
15 | 
16 |           def enqueueReq() = {
17 |             dut.io.req.valid.poke(true.B)
18 |             dut.clock.step(1)
19 |             dut.io.req.valid.poke(false.B)
20 |           }
21 | 
22 |           def expectResp()(x: HFFCmp => Unit) = {
23 |             val expectedCycles = stages - 1
24 |             var cycles = 0
25 |             while (dut.io.resp.valid.peek().litToBoolean == false) {
26 |               dut.clock.step(1)
27 |               cycles += 1
28 |             }
29 |             dut.io.resp.valid.expect(true.B)
30 |             x(dut)
31 |             dut.clock.step(1)
32 |             assert(
33 |               cycles == expectedCycles,
34 |               s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
35 |             )
36 |           }
37 | 
38 |           dut.io.req.bits.r1(0).poke("h080000000".U) // 1
39 |           dut.io.req.bits.r2(0).poke("h000000000".U) // 0
40 |           dut.io.req.bits.r1(1).poke("h083480000".U) // 100
41 |           dut.io.req.bits.r2(1).poke("h083460000".U) // 99
42 |           dut.io.req.bits.op.poke(HFFCmpOp.GE)
43 |           enqueueReq()
44 |           expectResp() { dut =>
45 |             dut.io.resp.bits.res(0).expect("h00000001".U) // true
46 |             dut.io.resp.bits.res(1).expect("h00000001".U) // true
47 |           }
48 | 
49 |           dut.io.req.bits.r1(0).poke("h000000000".U) // 0
50 |           dut.io.req.bits.r2(0).poke("h080000000".U) // 1
51 |           dut.io.req.bits.r1(1).poke("h083460000".U) // 99
52 |           dut.io.req.bits.r2(1).poke("h083480000".U) // 100
53 |           dut.io.req.bits.op.poke(HFFCmpOp.GE)
54 |           enqueueReq()
55 |           expectResp() { dut =>
56 |             dut.io.resp.bits.res(0).expect("h00000000".U) // false
57 |             dut.io.resp.bits.res(1).expect("h00000000".U) // false
58 |           }
59 | 
60 |           dut.io.req.bits.r1(0).poke("h180000000".U) // -1
61 |           dut.io.req.bits.r2(0).poke("h000000000".U) // 0
62 |           dut.io.req.bits.r1(1).poke("h183460000".U) // -99
63 |           dut.io.req.bits.r2(1).poke("h183480000".U) // -100
64 |           dut.io.req.bits.op.poke(HFFCmpOp.LT)
65 |           enqueueReq()
66 |           expectResp() { dut =>
67 |             dut.io.resp.bits.res(0).expect("h00000001".U) // true
68 |             dut.io.resp.bits.res(1).expect("h00000000".U) // false
69 |           }
70 |         }
71 |     }
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/hardfloat/HFFDivSqrtTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import fpuwrapper.Simulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | class HFFDivSqrtTest extends AnyFreeSpec {
10 |   s"HFFDivSqrt should work" in {
11 |     simulate(new HFFDivSqrt(FloatS, 2)) { dut =>
12 |       dut.reset.poke(true.B)
13 |       dut.clock.step()
14 |       dut.reset.poke(false.B)
15 |       dut.clock.step()
16 | 
17 |       dut.clock.step(16)
18 | 
19 |       def enqueueReq() = {
20 |         dut.io.req.valid.poke(true.B)
21 |         dut.clock.step(1)
22 |         dut.io.req.valid.poke(false.B)
23 |       }
24 | 
25 |       def expectResp()(x: HFFDivSqrt => Unit) = {
26 |         while (dut.io.resp.valid.peek().litToBoolean == false) {
27 |           dut.clock.step(1)
28 |         }
29 |         dut.io.resp.valid.expect(true.B)
30 |         x(dut)
31 |         dut.clock.step(1)
32 |       }
33 | 
34 |       dut.io.req.bits.a(0).poke("h080000000".U) // 1.0
35 |       dut.io.req.bits.b(0).poke("h080800000".U) // 2.0
36 |       dut.io.req.bits.a(1).poke("h080c00000".U) // 3.0
37 |       dut.io.req.bits.b(1).poke("h081000000".U) // 4.0
38 |       dut.io.req.bits.op.poke(HFFDivSqrtOp.DIV)
39 |       enqueueReq()
40 |       expectResp() { dut =>
41 |         dut.io.resp.bits.res(0).expect("h07f800000".U) // 0.5
42 |         dut.io.resp.bits.res(1).expect("h07fc00000".U) // 0.75
43 |       }
44 | 
45 |       dut.io.req.bits.a(0).poke("h080000000".U) // 1.0
46 |       dut.io.req.bits.b(0).poke("h180800000".U) // -2.0
47 |       dut.io.req.bits.a(1).poke("h080c00000".U) // 3.0
48 |       dut.io.req.bits.b(1).poke("h181000000".U) // -4.0
49 |       dut.io.req.bits.op.poke(HFFDivSqrtOp.DIV)
50 |       enqueueReq()
51 |       expectResp() { dut =>
52 |         dut.io.resp.bits.res(0).expect("h17f800000".U) // -0.5
53 |         dut.io.resp.bits.res(1).expect("h17fc00000".U) // -0.75
54 |       }
55 | 
56 |       dut.io.req.bits.a(0).poke("h081000000".U) // 4.0
57 |       dut.io.req.bits.b(0).poke("h000000000".U) // 0.0
58 |       dut.io.req.bits.a(1).poke("h081900000".U) // 9.0
59 |       dut.io.req.bits.b(1).poke("h000000000".U) // 0.0
60 |       dut.io.req.bits.op.poke(HFFDivSqrtOp.SQRT)
61 |       enqueueReq()
62 |       expectResp() { dut =>
63 |         dut.io.resp.bits.res(0).expect("h080800000".U) // 2.0
64 |         dut.io.resp.bits.res(1).expect("h080c00000".U) // 3.0
65 |       }
66 |     }
67 |   }
68 | }
69 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/hardfloat/HFFMATest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | 
10 | class HFFMATest extends AnyFreeSpec {
11 |   for (stages <- 1 to 5) {
12 |     s"HFFMA of ${stages} stages should work" in {
13 |       simulate(new HFFMA(FloatS, 2, stages)) { dut =>
14 |           dut.clock.step(16)
15 | 
16 |           def enqueueReq() = {
17 |             dut.io.req.valid.poke(true.B)
18 |             dut.clock.step(1)
19 |             dut.io.req.valid.poke(false.B)
20 |           }
21 | 
22 |           def expectResp()(x: HFFMA => Unit) = {
23 |             val expectedCycles = stages - 1
24 |             var cycles = 0
25 |             while (dut.io.resp.valid.peek().litToBoolean == false) {
26 |               dut.clock.step(1)
27 |               cycles += 1
28 |             }
29 |             dut.io.resp.valid.expect(true.B)
30 |             x(dut)
31 |             dut.clock.step(1)
32 |             assert(
33 |               cycles == expectedCycles,
34 |               s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
35 |             )
36 |           }
37 | 
38 |           dut.io.req.bits.operands(0)(0).poke("h080000000".U) // 1
39 |           dut.io.req.bits.operands(1)(0).poke("h080800000".U) // 2
40 |           dut.io.req.bits.operands(2)(0).poke("h080c00000".U) // 3
41 |           dut.io.req.bits.operands(0)(1).poke("h081000000".U) // 4
42 |           dut.io.req.bits.operands(1)(1).poke("h081200000".U) // 5
43 |           dut.io.req.bits.operands(2)(1).poke("h081400000".U) // 6
44 |           dut.io.req.bits.op.poke(FMAOp.FMADD)
45 |           enqueueReq()
46 |           expectResp() { dut =>
47 |             dut.io.resp.bits.res(0).expect("h081200000".U) // 5
48 |             dut.io.resp.bits.res(1).expect("h082500000".U) // 26
49 |           }
50 |         }
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/hardfloat/HFMulTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | 
10 | class HFFMulTest extends AnyFreeSpec {
11 |   for (stages <- 1 to 5) {
12 |     s"HFFMul of ${stages} stages should work" in {
13 |       simulate(new HFFMul(FloatS, 2, stages)) { dut =>
14 |           dut.clock.step(16)
15 | 
16 |           def enqueueReq(): Unit = {
17 |             dut.io.req.valid.poke(true.B)
18 |             dut.clock.step(1)
19 |             dut.io.req.valid.poke(false.B)
20 |           }
21 | 
22 |           def expectResp()(x: HFFMul => Unit) = {
23 |             val expectedCycles = stages - 1
24 |             var cycles = 0
25 |             while (dut.io.resp.valid.peek().litToBoolean == false) {
26 |               dut.clock.step(1)
27 |               cycles += 1
28 |             }
29 |             dut.io.resp.valid.expect(true.B)
30 |             x(dut)
31 |             dut.clock.step(1)
32 |             assert(
33 |               cycles == expectedCycles,
34 |               s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
35 |             )
36 |           }
37 | 
38 |           dut.io.req.bits.a(0).poke("h080000000".U) // 1
39 |           dut.io.req.bits.b(0).poke("h080800000".U) // 2
40 |           dut.io.req.bits.a(1).poke("h080c00000".U) // 3
41 |           dut.io.req.bits.b(1).poke("h081000000".U) // 4
42 |           enqueueReq()
43 |           expectResp() { dut =>
44 |             dut.io.resp.bits.res(0).expect("h080800000".U) // 2
45 |             dut.io.resp.bits.res(1).expect("h081c00000".U) // 12
46 |           }
47 |         }
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/hardfloat/IEEEFMATest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | 
10 | class IEEEFMATest extends AnyFreeSpec {
11 |   for (stages <- 1 to 5) {
12 |     s"IEEEFMA of ${stages} stages should work" in {
13 |       simulate(new IEEEFMA(FloatS, 2, stages)) { dut =>
14 |           dut.clock.step(16)
15 | 
16 |           def enqueueReq() = {
17 |             dut.io.req.valid.poke(true.B)
18 |             dut.clock.step(1)
19 |             dut.io.req.valid.poke(false.B)
20 |           }
21 | 
22 |           def expectResp()(x: IEEEFMA => Unit) = {
23 |             val expectedCycles = stages - 1
24 |             var cycles = 0
25 |             while (dut.io.resp.valid.peek().litToBoolean == false) {
26 |               dut.clock.step(1)
27 |               cycles += 1
28 |             }
29 |             dut.io.resp.valid.expect(true.B)
30 |             x(dut)
31 |             dut.clock.step(1)
32 |             assert(
33 |               cycles == expectedCycles,
34 |               s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
35 |             )
36 |           }
37 | 
38 |           dut.io.req.bits.operands(0)(0).poke("h3f800000".U) // 1
39 |           dut.io.req.bits.operands(1)(0).poke("h40000000".U) // 2
40 |           dut.io.req.bits.operands(2)(0).poke("h40400000".U) // 3
41 |           dut.io.req.bits.operands(0)(1).poke("h40800000".U) // 4
42 |           dut.io.req.bits.operands(1)(1).poke("h40a00000".U) // 5
43 |           dut.io.req.bits.operands(2)(1).poke("h40c00000".U) // 6
44 |           dut.io.req.bits.op.poke(FMAOp.FMADD)
45 |           enqueueReq()
46 |           expectResp() { dut =>
47 |             dut.io.resp.bits.res(0).expect("h40a00000".U) // 5
48 |             dut.io.resp.bits.res(1).expect("h41d00000".U) // 26
49 |           }
50 |         }
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/hardfloat/IEEEToHFTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.hardfloat
 2 | 
 3 | import chisel3._
 4 | import chisel3.experimental.BundleLiterals._
 5 | import chisel3.simulator.EphemeralSimulator._
 6 | import org.scalatest.freespec.AnyFreeSpec
 7 | import fpuwrapper.FloatS
 8 | 
 9 | 
10 | class IEEEToHFTest extends AnyFreeSpec {
11 |   for (stages <- 1 to 5) {
12 |     s"IEEEToHF of ${stages} stages should work" in {
13 |       simulate(new IEEEToHF(FloatS, 2, stages)) { dut =>
14 |           dut.clock.step(16)
15 | 
16 |           def enqueueReq() = {
17 |             dut.io.float.valid.poke(true.B)
18 |             dut.clock.step(1)
19 |             dut.io.float.valid.poke(false.B)
20 |           }
21 | 
22 |           def expectResp()(x: IEEEToHF => Unit) = {
23 |             val expectedCycles = stages - 1
24 |             var cycles = 0
25 |             while (dut.io.hardfloat.valid.peek().litToBoolean == false) {
26 |               dut.clock.step(1)
27 |               cycles += 1
28 |             }
29 |             dut.io.hardfloat.valid.expect(true.B)
30 |             x(dut)
31 |             dut.clock.step(1)
32 |             assert(
33 |               cycles == expectedCycles,
34 |               s"Response does not appear after expected cycles: ${cycles} != ${expectedCycles}"
35 |             )
36 |           }
37 | 
38 |           dut.io.float.bits(0).poke("h03f800000".U) // 1
39 |           dut.io.float.bits(1).poke("h042c80000".U) // 100
40 |           enqueueReq()
41 |           expectResp() { dut =>
42 |             dut.io.hardfloat.bits(0).expect("h080000000".U)
43 |             dut.io.hardfloat.bits(1).expect("h083480000".U)
44 |           }
45 |         }
46 |     }
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/fpu-wrappers/test/src/fpuwrapper/opencores/IEEEFPUTest.scala:
--------------------------------------------------------------------------------
 1 | package fpuwrapper.opencores
 2 | 
 3 | import org.scalatest.funsuite.AnyFunSuite
 4 | import spinal.core._
 5 | import spinal.core.sim._
 6 | 
 7 | // IEEEFPU's testbench
 8 | class IEEEFPUTest extends AnyFunSuite {
 9 |   test("IEEEFPU") {
10 |     SimConfig.withWave.withIVerilog
11 |       .doSim(
12 |         new IEEEFPU()
13 |       ) { dut =>
14 |         dut.clockDomain.forkStimulus(period = 10)
15 |         dut.clockDomain.waitRisingEdge()
16 | 
17 |         dut.io.req.valid #= false
18 |         sleep(160)
19 | 
20 |         dut.clockDomain.waitSampling()
21 |         dut.io.req.valid #= true
22 |         dut.io.req.operands(0) #= BigInt("3f800000", 16) // 1.0
23 |         dut.io.req.operands(1) #= BigInt("40000000", 16) // 2.0
24 |         dut.io.req.op #= IEEEFPUOp.FADD
25 | 
26 |         dut.clockDomain.waitSampling()
27 |         dut.io.req.valid #= false
28 |         dut.clockDomain.waitSamplingWhere {
29 |           dut.io.resp.valid.toBoolean
30 |         }
31 |         assert(dut.io.resp.res.toBigInt == BigInt("40400000", 16)) // 3.0
32 | 
33 |         dut.clockDomain.waitSampling()
34 |         dut.io.req.valid #= true
35 |         dut.io.req.operands(0) #= BigInt("40400000", 16) // 3.0
36 |         dut.io.req.operands(1) #= BigInt("40800000", 16) // 4.0
37 | 
38 |         dut.clockDomain.waitSampling()
39 |         dut.io.req.valid #= false
40 |         dut.clockDomain.waitSamplingWhere {
41 |           dut.io.resp.valid.toBoolean
42 |         }
43 |         assert(dut.io.resp.res.toBigInt == BigInt("40e00000", 16)) // 7.0
44 | 
45 |         sleep(100)
46 |       }
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
 1 | { pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/e42377bbe5ef06ffec13eebf7949d72793ed66f9.tar.gz") {}
 2 | }:
 3 | 
 4 | pkgs.mkShell {
 5 |   buildInputs = with pkgs; [
 6 |     circt # 1.59.0
 7 |     mill
 8 |     boost
 9 |     verilog # iverilog
10 |     verilator
11 |   ];
12 | }
13 | 


--------------------------------------------------------------------------------
/synWorkspace/.gitignore:
--------------------------------------------------------------------------------
 1 | alib-*
 2 | formality_svf
 3 | FM_WORK
 4 | *.log
 5 | *.svf
 6 | *.txt
 7 | *.ddc
 8 | *.sdc
 9 | *.sdf
10 | *.tcl
11 | *.syn
12 | *.mr
13 | *.vhdl
14 | *.vif
15 | *.lck
16 | 


--------------------------------------------------------------------------------
/synWorkspace/report.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import collections
 4 | 
 5 | data = collections.defaultdict(lambda :{})
 6 | 
 7 | for file_name in glob.glob('*/*_report_area.txt'):
 8 | 	parts = file_name.split('/')
 9 | 	key = parts[0]
10 | 	with open(file_name) as f:
11 | 		for line in f:
12 | 			line = line.strip()
13 | 			if line.startswith('Combinational area:'):
14 | 				area = line.split(' ')[-1]
15 | 				data[key]['comb_area'] = float(area)
16 | 			if line.startswith('Buf/Inv area:'):
17 | 				area = line.split(' ')[-1]
18 | 				data[key]['buf_area'] = float(area)
19 | 			if line.startswith('Noncombinational area:'):
20 | 				area = line.split(' ')[-1]
21 | 				data[key]['non_comb_area'] = float(area)
22 | 			if line.startswith('Macro/Black Box area:'):
23 | 				area = line.split(' ')[-1]
24 | 				data[key]['macro_area'] = float(area)
25 | 			if line.startswith('Total area:'):
26 | 				area = line.split(' ')[-1]
27 | 				data[key]['total_area'] = float(area)
28 | 			if line.startswith('Number of cells:'):
29 | 				cells = line.split(' ')[-1]
30 | 				data[key]['cells'] = int(cells)
31 | 
32 | for file_name in glob.glob('*/*_report_power.txt'):
33 | 	parts = file_name.split('/')
34 | 	key = parts[0]
35 | 	with open(file_name) as f:
36 | 		for line in f:
37 | 			line = line.strip()
38 | 			if '100.0' in line:
39 | 				parts = list(filter(lambda s: len(s) > 0, line.split(' ')))
40 | 				total_power = parts[-2]
41 | 				data[key]['total_power'] = float(total_power)
42 | 				leakage_power = parts[-3]
43 | 				data[key]['leakage_power'] = float(leakage_power)
44 | 				internal_power = parts[-4]
45 | 				data[key]['internal_power'] = float(internal_power)
46 | 				switch_power = parts[-5]
47 | 				data[key]['switch_power'] = float(switch_power)
48 | 
49 | for file_name in glob.glob('*/*_report_timing_setup.txt'):
50 | 	parts = file_name.split('/')
51 | 	key = parts[0]
52 | 	with open(file_name) as f:
53 | 		for line in f:
54 | 			line = line.strip()
55 | 			if 'data arrival time' in line:
56 | 				time = line.split(' ')[-1]
57 | 				data[key]['max_comb_delay'] = float(time)
58 | 				break
59 | 
60 | keys = data.keys()
61 | for key in sorted(keys):
62 | 	value = data[key]
63 | 	print('{}:'.format(key))
64 | 	print('  Cells: {}'.format(value['cells']))
65 | 	print('  Area: Comb={:.0f} Buf={:.0f} NonComb={:.0f} Macro={:.0f} Total={:.0f}'.format(value['comb_area'], value['buf_area'], value['non_comb_area'], value['macro_area'], value['total_area']))
66 | 	print('  Power:', end='')
67 | 	if 'switch_power' in value:
68 | 		print(' Switch({:.3f} mW)'.format(value['switch_power']), end='')
69 | 	if 'internal_power' in value:
70 | 		print(' Internal({:.3f} mW)'.format(value['internal_power']), end='')
71 | 	if 'leakage_power' in value:
72 | 		print(' Leakage({:.3f} mW)'.format(value['leakage_power'] / 1000), end='')
73 | 	if 'total_power' in value:
74 | 		print(' Total({:.3f} mW)'.format(value['total_power']), end='')
75 | 	print()
76 | 	print('  Max Freq: {:.0f} MHz ({:.2f} ns)'.format(1000.0 / value['max_comb_delay'], value['max_comb_delay']))


--------------------------------------------------------------------------------
/synWorkspace/report.yaml:
--------------------------------------------------------------------------------
 1 | IEEEFMA_D1l2s_fudian:
 2 |   Cells: 22859
 3 |   Area: Comb=39987 Buf=4012 NonComb=789 Macro=0 Total=40776
 4 |   Power: Switch(60.202 mW) Internal(21.814 mW) Leakage(1.450 mW) Total(83.467 mW)
 5 |   Max Freq: 690 MHz (1.45 ns)
 6 | IEEEFMA_D1l2s_hardfloat:
 7 |   Cells: 17432
 8 |   Area: Comb=30823 Buf=3175 NonComb=1110 Macro=0 Total=31933
 9 |   Power: Switch(32.621 mW) Internal(14.072 mW) Leakage(1.150 mW) Total(47.841 mW)
10 |   Max Freq: 662 MHz (1.51 ns)
11 | IEEEFMA_D1l3s_fudian:
12 |   Cells: 23930
13 |   Area: Comb=38593 Buf=3947 NonComb=4051 Macro=0 Total=42645
14 |   Power: Switch(39.656 mW) Internal(19.178 mW) Leakage(1.520 mW) Total(60.354 mW)
15 |   Max Freq: 980 MHz (1.02 ns)
16 | IEEEFMA_D1l3s_hardfloat:
17 |   Cells: 18451
18 |   Area: Comb=30849 Buf=3251 NonComb=2898 Macro=0 Total=33746
19 |   Power: Switch(26.427 mW) Internal(14.495 mW) Leakage(1.200 mW) Total(42.118 mW)
20 |   Max Freq: 862 MHz (1.16 ns)
21 | IEEEFMA_D1l4s_fudian:
22 |   Cells: 23244
23 |   Area: Comb=27459 Buf=2308 NonComb=3166 Macro=0 Total=30625
24 |   Power: Switch(29.996 mW) Internal(12.216 mW) Leakage(0.927 mW) Total(43.139 mW)
25 |   Max Freq: 1020 MHz (0.98 ns)
26 | IEEEFMA_D1l4s_hardfloat:
27 |   Cells: 18059
28 |   Area: Comb=21752 Buf=2167 NonComb=2968 Macro=0 Total=24720
29 |   Power: Switch(15.330 mW) Internal(7.890 mW) Leakage(0.786 mW) Total(24.006 mW)
30 |   Max Freq: 1020 MHz (0.98 ns)
31 | IEEEFMA_H1l2s_fudian:
32 |   Cells: 3909
33 |   Area: Comb=6650 Buf=803 NonComb=308 Macro=0 Total=6959
34 |   Power: Switch(7.892 mW) Internal(3.254 mW) Leakage(0.275 mW) Total(11.421 mW)
35 |   Max Freq: 980 MHz (1.02 ns)
36 | IEEEFMA_H1l2s_hardfloat:
37 |   Cells: 2724
38 |   Area: Comb=4096 Buf=435 NonComb=315 Macro=0 Total=4411
39 |   Power: Switch(3.905 mW) Internal(1.788 mW) Leakage(0.161 mW) Total(5.854 mW)
40 |   Max Freq: 1020 MHz (0.98 ns)
41 | IEEEFMA_H1l3s_fudian:
42 |   Cells: 3797
43 |   Area: Comb=4331 Buf=476 NonComb=524 Macro=0 Total=4856
44 |   Power: Switch(4.396 mW) Internal(1.837 mW) Leakage(0.162 mW) Total(6.395 mW)
45 |   Max Freq: 1042 MHz (0.96 ns)
46 | IEEEFMA_H1l3s_hardfloat:
47 |   Cells: 2614
48 |   Area: Comb=2864 Buf=297 NonComb=476 Macro=0 Total=3340
49 |   Power: Switch(2.124 mW) Internal(1.127 mW) Leakage(0.106 mW) Total(3.357 mW)
50 |   Max Freq: 1020 MHz (0.98 ns)
51 | IEEEFMA_H1l4s_fudian:
52 |   Cells: 3546
53 |   Area: Comb=4155 Buf=471 NonComb=586 Macro=0 Total=4741
54 |   Power: Switch(3.649 mW) Internal(1.651 mW) Leakage(0.158 mW) Total(5.458 mW)
55 |   Max Freq: 1042 MHz (0.96 ns)
56 | IEEEFMA_H1l4s_hardfloat:
57 |   Cells: 2521
58 |   Area: Comb=2467 Buf=242 NonComb=598 Macro=0 Total=3065
59 |   Power: Switch(1.937 mW) Internal(1.093 mW) Leakage(0.090 mW) Total(3.121 mW)
60 |   Max Freq: 1053 MHz (0.95 ns)
61 | IEEEFMA_S1l2s_fudian:
62 |   Cells: 9280
63 |   Area: Comb=17082 Buf=1891 NonComb=446 Macro=0 Total=17528
64 |   Power: Switch(21.939 mW) Internal(8.593 mW) Leakage(0.670 mW) Total(31.202 mW)
65 |   Max Freq: 833 MHz (1.20 ns)
66 | IEEEFMA_S1l2s_hardfloat:
67 |   Cells: 6829
68 |   Area: Comb=11081 Buf=1122 NonComb=606 Macro=0 Total=11687
69 |   Power: Switch(10.686 mW) Internal(4.941 mW) Leakage(0.431 mW) Total(16.057 mW)
70 |   Max Freq: 806 MHz (1.24 ns)
71 | IEEEFMA_S1l3s_fudian:
72 |   Cells: 9163
73 |   Area: Comb=11887 Buf=1165 NonComb=1704 Macro=0 Total=13590
74 |   Power: Switch(11.780 mW) Internal(5.466 mW) Leakage(0.459 mW) Total(17.705 mW)
75 |   Max Freq: 1020 MHz (0.98 ns)
76 | IEEEFMA_S1l3s_hardfloat:
77 |   Cells: 7191
78 |   Area: Comb=10342 Buf=1083 NonComb=1199 Macro=0 Total=11541
79 |   Power: Switch(8.059 mW) Internal(4.300 mW) Leakage(0.405 mW) Total(12.764 mW)
80 |   Max Freq: 1020 MHz (0.98 ns)
81 | IEEEFMA_S1l4s_fudian:
82 |   Cells: 9166
83 |   Area: Comb=9920 Buf=1011 NonComb=1263 Macro=0 Total=11182
84 |   Power: Switch(10.482 mW) Internal(4.270 mW) Leakage(0.340 mW) Total(15.092 mW)
85 |   Max Freq: 1053 MHz (0.95 ns)
86 | IEEEFMA_S1l4s_hardfloat:
87 |   Cells: 6424
88 |   Area: Comb=6892 Buf=746 NonComb=1201 Macro=0 Total=8094
89 |   Power: Switch(5.456 mW) Internal(2.716 mW) Leakage(0.248 mW) Total(8.419 mW)
90 |   Max Freq: 1020 MHz (0.98 ns)
91 | 


--------------------------------------------------------------------------------