├── .cirrus.yml
├── .cirrus
    ├── Dockerfile.ubuntu16.04
    └── upload.sh
├── .gitignore
├── LICENSE.txt
├── README.md
├── celllibs
    ├── README
    ├── simple
    │   ├── README
    │   └── simple.lib
    └── supergate
    │   ├── README
    │   ├── supergate.lib
    │   └── supergate.v
├── scripts
    ├── database_html.sh
    ├── database_make.py
    ├── yosys-ice40-flopcount.sh
    ├── yosys-ice40-lutcount.sh
    ├── yosys-sanity.sh
    ├── yosys-simplelib.sh
    └── yosys-supergatelib.sh
├── verilog
    ├── benchmarks_large
    │   ├── .gitignore
    │   ├── boom
    │   │   ├── MediumBoom.v.gz
    │   │   ├── MediumOctoBoom.v.gz
    │   │   ├── MegaOctoBoom.v.gz
    │   │   ├── README.md
    │   │   ├── SmallBoom.v.gz
    │   │   └── SmallQuadBoom.v.gz
    │   ├── cam
    │   │   ├── README.md
    │   │   ├── cam_bram_top.v
    │   │   ├── cam_srl_top.v
    │   │   └── generate.py
    │   ├── cordic
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── cordic.template
    │   │   ├── generate.py
    │   │   └── run_cordic_tb.sh
    │   ├── dspfilters
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── fastfir_dynamictaps.v
    │   │   ├── fastfir_fixedtaps.v
    │   │   ├── generate.py
    │   │   ├── slowfil_fixedtaps.v
    │   │   ├── slowfil_srl.vh
    │   │   ├── slowfil_srl_fixedtaps.v
    │   │   └── taps.hex
    │   ├── ethernet
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── generate.py
    │   │   └── udp_complete_64_top.v
    │   ├── marlann
    │   │   ├── README.md
    │   │   └── marlann_compute.v
    │   ├── mux
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── common.py
    │   │   └── generate.py
    │   ├── opensparc
    │   │   ├── README.md
    │   │   └── t2.v.gz
    │   ├── picosoc
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── generate.py
    │   │   ├── picorv32.vh
    │   │   ├── picorv32_large.v
    │   │   ├── picorv32_regular.v
    │   │   ├── picorv32_small.v
    │   │   ├── picosoc.vh
    │   │   ├── picosoc_top.v
    │   │   ├── simpleuart.vh
    │   │   ├── spimemio.vh
    │   │   └── synth_area_top.vh
    │   ├── riscv-bitmanip
    │   │   ├── README.md
    │   │   └── generate.py
    │   ├── sddac
    │   │   ├── README.md
    │   │   ├── architecture.png
    │   │   ├── config.json
    │   │   ├── genspectrumplot.py
    │   │   ├── run_sddac_tb.sh
    │   │   ├── sddac.v
    │   │   └── sddac_tb.v
    │   ├── vexriscv
    │   │   ├── README.md
    │   │   └── vexriscv.demo.GenFull.v
    │   └── wb2axip
    │   │   ├── README.md
    │   │   └── generate.py
    └── benchmarks_small
    │   ├── addertree
    │       ├── .gitignore
    │       ├── README.md
    │       └── generate.py
    │   ├── arith_ops
    │       ├── .gitignore
    │       ├── README.md
    │       └── generate.py
    │   ├── cic
    │       ├── .gitginore
    │       ├── README.md
    │       ├── cic5.v
    │       ├── cic5_tb.v
    │       └── run_testbench.sh
    │   ├── decoder
    │       ├── .gitignore
    │       └── generate.py
    │   ├── dspmac
    │       ├── .gitignore
    │       ├── README.md
    │       ├── dspmac.template
    │       ├── dspmac_16_40_tb.v
    │       ├── generate.py
    │       └── run_testbench.sh
    │   ├── lfsr
    │       ├── .gitignore
    │       ├── README.md
    │       └── generate.py
    │   ├── macc
    │       ├── .gitignore
    │       ├── common.py
    │       └── generate.py
    │   ├── mul
    │       ├── .gitignore
    │       ├── README.md
    │       ├── common.py
    │       └── generate.py
    │   ├── muladd
    │       ├── .gitignore
    │       ├── common.py
    │       └── generate.py
    │   ├── mux
    │       ├── .gitignore
    │       ├── README.md
    │       ├── common.py
    │       └── generate.py
    │   ├── onehot
    │       ├── .gitignore
    │       ├── README.md
    │       └── generate.py
    │   ├── popcount
    │       ├── .gitignore
    │       ├── README.md
    │       └── generate.py
    │   ├── priodecode
    │       ├── .gitignore
    │       ├── README.md
    │       └── generate.py
    │   ├── ram
    │       ├── .gitignore
    │       ├── dualport_syncram.template
    │       ├── generate.py
    │       ├── syncram.template
    │       └── syncram_tw.template
    │   └── various
    │       ├── .gitignore
    │       ├── README.md
    │       ├── crc32.v
    │       ├── latch.v
    │       ├── pwm256.v
    │       ├── pwm256_tb.v
    │       └── run_testbench.sh
└── vhdl
    ├── benchmarks_large
        └── cordic
        │   ├── .gitignore
        │   ├── cordic.template
        │   ├── cordic_tb.vhdl
        │   ├── generate.py
        │   └── run_cordic_tb.sh
    └── benchmarks_small
        ├── cic
            ├── .gitignore
            ├── README.md
            ├── cic5.m.vhdl
            ├── cic5_tb.m.vhdl
            └── run_testbench.sh
        └── various
            ├── .gitignore
            ├── pwm256.m.vhdl
            ├── pwm256_tb.m.vhdl
            └── run_testbench.sh


/.cirrus.yml:
--------------------------------------------------------------------------------
 1 | task:
 2 |   name: sanity-test-ubuntu1604
 3 |   container:
 4 |     cpu: 1
 5 |     memory: 16
 6 |     dockerfile: .cirrus/Dockerfile.ubuntu16.04
 7 |   env:
 8 |     GITHUB_TOKEN: ENCRYPTED[c86a89228a6bd63329c6e28f2228d48e79a47a9fa0481d3e15f0f30b0e74fa26b5748afd1e44523e90450d53911e67f4]
 9 |   permission_script: mkdir reports && chmod +x ./scripts/database_make.py && chmod +x ./scripts/database_html.sh && chmod +x .cirrus/upload.sh
10 |   sanity_script: ./scripts/database_make.py yosys-sanity ./verilog/ && ./scripts/database_html.sh && cp ./database/index.html ./reports/benchmarks_sanity.html
11 |   push_script:  .cirrus/upload.sh
12 | 
13 | task:
14 |   name: small-test-ubuntu1604
15 |   container:
16 |     cpu: 1
17 |     memory: 16
18 |     dockerfile: .cirrus/Dockerfile.ubuntu16.04
19 |   env:
20 |     GITHUB_TOKEN: ENCRYPTED[c86a89228a6bd63329c6e28f2228d48e79a47a9fa0481d3e15f0f30b0e74fa26b5748afd1e44523e90450d53911e67f4]
21 |   permission_script: mkdir reports && chmod +x ./scripts/database_make.py && chmod +x ./scripts/database_html.sh && chmod +x .cirrus/upload.sh
22 |   small_script: ./scripts/database_make.py yosys-ice40-lutcount ./verilog/benchmarks_small/ && ./scripts/database_html.sh && cp ./database/index.html ./reports/benchmarks_small.html
23 |   push_script:  .cirrus/upload.sh
24 | 
25 | task:
26 |   name: large-test-ubuntu1604
27 |   container:
28 |     cpu: 1
29 |     memory: 16
30 |     dockerfile: .cirrus/Dockerfile.ubuntu16.04
31 |   env:
32 |     GITHUB_TOKEN: ENCRYPTED[c86a89228a6bd63329c6e28f2228d48e79a47a9fa0481d3e15f0f30b0e74fa26b5748afd1e44523e90450d53911e67f4]
33 |     EXTRA_FLAGS: -noflatten
34 |   permission_script: mkdir reports && chmod +x ./scripts/database_make.py && chmod +x ./scripts/database_html.sh && chmod +x .cirrus/upload.sh
35 |   large_script: ./scripts/database_make.py yosys-ice40-lutcount ./verilog/benchmarks_large/ && ./scripts/database_html.sh && cp ./database/index.html ./reports/benchmarks_large.html
36 |   push_script:  .cirrus/upload.sh
37 | 
38 | 


--------------------------------------------------------------------------------
/.cirrus/Dockerfile.ubuntu16.04:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:xenial-20181113
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | #RUN set -e -x ;\
 6 | #    apt-get -y update ;\
 7 | #    apt-get -y upgrade ;\
 8 | #    apt-get -y install \
 9 | #        build-essential autoconf cmake clang bison wget flex gperf \
10 | #        libreadline-dev gawk tcl-dev libffi-dev graphviz xdot python3-dev \
11 | #        libboost-all-dev qt5-default git libftdi-dev pkg-config
12 | 
13 | RUN set -e -x ;\
14 |     apt-get -y update ;\
15 |     apt-get -y upgrade ;\
16 |     apt-get -y install \
17 |         build-essential autoconf cmake clang bison wget flex gperf \
18 |         libreadline-dev gawk tcl-dev libffi-dev graphviz xdot python3-dev \
19 |         libboost-all-dev git libftdi-dev pkg-config
20 | 
21 | # get most recent release version of Yosys
22 | RUN set -e -x ;\
23 |     mkdir -p /usr/local/src ;\
24 |     cd /usr/local/src ;\
25 |     git clone --recursive https://github.com/YosysHQ/yosys.git ;\
26 |     cd yosys ;\
27 |     latestTag=$(git describe --tags `git rev-list --tags --max-count=1`) ;\
28 |     git checkout $latestTag ;\
29 |     make -j $(nproc) ;\
30 |     make install ;\
31 |     rm -rf /usr/local/src/yosys
32 | 
33 | 


--------------------------------------------------------------------------------
/.cirrus/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [[ "$CIRRUS_RELEASE" == "" ]]; then
 4 |   echo "Not a release. No need to deploy!"
 5 |   exit 0
 6 | fi
 7 | 
 8 | if [[ "$GITHUB_TOKEN" == "" ]]; then
 9 |   echo "Please provide GitHub access token via GITHUB_TOKEN environment variable!"
10 |   exit 1
11 | fi
12 | 
13 | file_content_type="application/octet-stream"
14 | files_to_upload=(
15 |   ./reports/benchmarks_small.html
16 |   ./reports/benchmarks_large.html
17 | )
18 | 
19 | for fpath in $files_to_upload
20 | do
21 |   echo "Uploading $fpath..."
22 |   name=$(basename "$fpath")
23 |   url_to_upload="https://uploads.github.com/repos/$CIRRUS_REPO_FULL_NAME/releases/$CIRRUS_RELEASE/assets?name=$name"
24 |   curl -X POST \
25 |     --data-binary @$fpath \
26 |     --header "Authorization: token $GITHUB_TOKEN" \
27 |     --header "Content-Type: $file_content_type" \
28 |     $url_to_upload
29 | done
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /database
2 | __pycache__
3 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2018, Symbiotic EDA GmbH.
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Yosys-bench
 2 | 
 3 | This is a collection of Verilog designs of different type and size, used as benchmarks in Yosys development.
 4 | 
 5 | Create a PR if you think you have an interesting benchmark.
 6 | 
 7 | ### benchmarks_small
 8 | 
 9 | This directory contains small (mostly synthetic) benchmarks that can be used
10 | to analyse and compare the performance of the tools in specific situations.
11 | 
12 | 
13 | ### benchmarks_large
14 | 
15 | This directory contains larger "real-world" designs. They can be used for
16 | estimating the overall performance of the tools.
17 | 
18 | # Running the benchmarks
19 | 
20 | Benchmarks are processed by the ```./scripts/database_make.py``` Python3 script. The script performs the following steps:
21 | 
22 | * It traverses the given directories and executes the `generate.py` Python script, if there is one. These scripts generate Verilog or VHDL files for some testbenches. 
23 | * It checks for a `config.json` file. If there is one, it loads the configuration and reads which HDL files it should use for the testbench.
24 | * If there wasn't a `config.json` file, it simply uses all the `.v` and `.vhdl` files it can find for the testbench.
25 | 
26 | example:
27 | ```./scripts/database_make.py yosys-ice40-lutcount <directory1> <directory2>```
28 | 
29 | Each benchmark produces an entry in the `./database` directory. Running `./scripts/database_html.sh` will generate a .html file with the results in the `./database` directory.
30 | 
31 | # Adding benchmarks
32 | To add a benchmark, simply create a directory in the `benchmarks_small` or `benchmarks_large` directory, optionally supply a `generate.py` and/or `config.json` and add your HDL files.
33 | 
34 | Please also add a `README.md` file to your benchmark so others know what it is you are benchmarking.
35 | 
36 | # The `config.json` file
37 | The `config.json` file lists the HDL files that you want to benchmark. Each file will be benchmarked separately.
38 | 
39 | Example:
40 | 
41 | ```
42 | {
43 |     "files": 
44 |     [
45 |         "sddac.v", "sddac2.v"
46 |     ]
47 | }
48 | ```
49 | 


--------------------------------------------------------------------------------
/celllibs/README:
--------------------------------------------------------------------------------
1 | Put your proprietary cell libs in subdirs here...


--------------------------------------------------------------------------------
/celllibs/simple/README:
--------------------------------------------------------------------------------
 1 | ========================================
 2 |       Simple Cell Lib version 1.0
 3 | ========================================
 4 | 
 5 | This is a minimal cell library for benchmarking purposes.
 6 | It contains process-agnostic cells with just one drive strength.
 7 | Cell sizes are given in units of the inverter size.
 8 | This library is solely meant to be uses for benchmarking and is unfit for any purpose.
 9 | 
10 | The following cells are present in the library:
11 | 
12 | * inverter.
13 | * tri-state inverter.
14 | * 2-input nand.
15 | * 2-input nor.
16 | * 2-input xor.
17 | * 2-input inverting mux.
18 | * D-type flip-flop with reset and preset.
19 | 


--------------------------------------------------------------------------------
/celllibs/simple/simple.lib:
--------------------------------------------------------------------------------
  1 | /********************************************/
  2 | /*                                          */
  3 | /* Simple cell library for Bench marking    */
  4 | /*                                          */
  5 | /* Symbiotic EDA GmbH / Moseley Instruments */
  6 | /* Niels A. Moseley                         */
  7 | /*                                          */
  8 | /* Process: none                            */
  9 | /*                                          */
 10 | /* Date   : 12-9-2018                       */
 11 | /* Version: 1.1                             */
 12 | /*                                          */
 13 | /* Changelog:                               */
 14 | /*    1.0     NAM     Initial version       */
 15 | /*    1.1     NAM     Added latch           */
 16 | /*                                          */
 17 | /********************************************/
 18 | 
 19 | library(simple) 
 20 | {
 21 |   technology (cmos);
 22 |   revision : 1.0;
 23 |   
 24 |   time_unit                     : "1ps";
 25 |   pulling_resistance_unit       : "1kohm";  
 26 |   voltage_unit                  : "1V";
 27 |   current_unit                  : "1uA";  
 28 |   
 29 |   capacitive_load_unit(1,ff);
 30 |   
 31 |   default_inout_pin_cap         :  7.0;
 32 |   default_input_pin_cap         :  7.0;
 33 |   default_output_pin_cap        :  0.0;
 34 |   default_fanout_load           :  1.0;
 35 | 
 36 |   default_wire_load_capacitance : 0.1;
 37 |   default_wire_load_resistance  : 1.0e-3;
 38 |   default_wire_load_area        : 0.0;
 39 | 
 40 |   nom_process                   :  1.0;
 41 |   nom_temperature               : 25.0;
 42 |   nom_voltage                   :  1.2;
 43 |   
 44 |   delay_model                   : generic_cmos;
 45 |   
 46 |   /* Inverter */
 47 |   cell (inv)
 48 |   {
 49 |     area : 1;
 50 |     pin(A)
 51 |     {
 52 |       direction : input;
 53 |     }
 54 |     pin(Y)
 55 |     {
 56 |       direction : output;
 57 |       function : "A'";
 58 |     }
 59 |   }
 60 |   
 61 |   /* tri-state inverter */
 62 |   cell (tri_inv)
 63 |   {
 64 |     area : 4;
 65 |     pin(A)
 66 |     {
 67 |       direction : input;    
 68 |     }
 69 |     pin(S)
 70 |     {
 71 |       direction : input;
 72 |     }
 73 |     pin(Z)
 74 |     {
 75 |       direction : output;
 76 |       function  : "A'";
 77 |       three_State : "S'";
 78 |     }
 79 |   }
 80 |   
 81 |   cell (buf)
 82 |   {
 83 |     area : 5;
 84 |     pin(A)
 85 |     {
 86 |       direction : input;
 87 |     }
 88 |     pin(Y)
 89 |     {
 90 |       direction : output;
 91 |       function : "A";
 92 |     }
 93 |   }  
 94 |   
 95 |   /* 2-input NAND gate */
 96 |   cell (nand2)
 97 |   {
 98 |     area : 3;
 99 |     pin(A)
100 |     {
101 |       direction : input;
102 |     }
103 |     pin(B)
104 |     {
105 |       direction : input;
106 |     }
107 |     pin(Y)
108 |     {
109 |       direction: output;
110 |       function : "(A * B)'";
111 |     }
112 |   }
113 |   
114 |   /* 2-input NOR gate */
115 |   cell (nor2)
116 |   {
117 |     area : 3;
118 |     pin(A)
119 |     {
120 |       direction : input;
121 |     }
122 |     pin(B)
123 |     {
124 |       direction : input;
125 |     }
126 |     pin(Y)
127 |     {
128 |       direction: output;
129 |       function : "(A + B)'";
130 |     }
131 |   }
132 |   
133 |   /* 2-input XOR */
134 |   cell (xor2)
135 |   {
136 |     area : 6;
137 |     pin(A)
138 |     {
139 |       direction : input;
140 |     }
141 |     pin(B)
142 |     {
143 |       direction : input;
144 |     }
145 |     pin(Y)
146 |     {
147 |       direction: output;
148 |       function : "(A *B') + (A' * B)";
149 |     }
150 |   }
151 |   
152 |   /* 2-input inverting MUX */
153 |   cell (imux2)
154 |   {
155 |     area : 5;
156 |     pin(A)
157 |     {
158 |       direction : input;
159 |     }
160 |     pin(B)
161 |     {
162 |       direction : input;
163 |     }
164 |     pin(S)
165 |     {
166 |       direction : input;
167 |     } 
168 |     pin(Y)
169 |     {
170 |       direction: output;
171 |       function : "( (A * S) + (B * S') )'";
172 |     }
173 |   }
174 |   
175 |   /* D-type flip-flop with asynchronous reset and preset */
176 |   cell (dff)
177 |   {
178 |     area : 83;
179 |     ff("IQ", "IQN") 
180 |     {
181 |       next_state : "D";
182 |       clocked_on : "CLK";
183 |       clear      : "RESET";
184 |       preset     : "PRESET";
185 |       clear_preset_var1 : L;
186 |       clear_preset_var2 : L;
187 |     } 
188 |     pin(D)
189 |     {
190 |       direction : input;
191 |     }
192 |     pin(CLK)
193 |     {
194 |       direction : input;
195 |     }
196 |     pin(RESET)
197 |     {
198 |       direction : input;
199 |     }
200 |     pin(PRESET)
201 |     {
202 |       direction : input;
203 |     }
204 |     pin(Q)
205 |     {
206 |       direction: output;
207 |       function : "IQ";
208 |       timing() {
209 |         timing_type : rising_edge;
210 |         intrinsic_rise : 65;
211 |         intrinsic_fall : 65;
212 |         rise_resistance : 0;
213 |         fall_resistance : 0; 
214 |         related_pin : "CLK";
215 |       }
216 |       timing () {
217 |         timing_type : clear;
218 |         timing_sense : positive_unate;
219 |         intrinsic_fall : 75;
220 |         related_pin : "RESET";
221 |       }
222 |       timing () {
223 |         timing_type : preset;
224 |         timing_sense : negative_unate;
225 |         intrinsic_rise : 75;
226 |         related_pin : "PRESET";
227 |       }      
228 |     }
229 |     pin(QN)
230 |     {
231 |       direction: output;
232 |       function : "IQN";
233 |       timing() {
234 |         timing_type : rising_edge;
235 |         intrinsic_rise : 65;
236 |         intrinsic_fall : 65;
237 |         rise_resistance : 0;
238 |         fall_resistance : 0; 
239 |         related_pin : "CLK";
240 |       }
241 |       timing () {
242 |         timing_type : preset;
243 |         timing_sense : negative_unate;
244 |         intrinsic_rise : 75;
245 |         related_pin : "RESET";
246 |       }
247 |       timing () {
248 |         timing_type : clear;
249 |         timing_sense : positive_unate;
250 |         intrinsic_fall : 75;
251 |         related_pin : "PRESET";
252 |       }      
253 |     } 
254 |   }
255 | 
256 |   /* Latch with asynchronous reset and preset */
257 |   cell(latch) 
258 |   {
259 |     area : 5;
260 |     latch ("IQ","IQN") 
261 |     {
262 |       enable : "G";
263 |       data_in : "D";
264 |     }
265 | 
266 |     pin(D) 
267 |     {
268 |       direction : input;
269 |     }
270 |     pin(G) 
271 |     {
272 |       direction : input;
273 |     }
274 |  
275 |     pin(Q) 
276 |     {
277 |       direction : output;
278 |       function : "IQ";
279 |       internal_node : "Q";
280 |       
281 |       timing() 
282 |       {
283 |         timing_type : rising_edge;
284 |         intrinsic_rise : 65;
285 |         intrinsic_fall : 65;
286 |         rise_resistance : 0;
287 |         fall_resistance : 0;
288 |         related_pin : "G";
289 |       }
290 |       
291 |       timing() 
292 |       {
293 |         timing_sense : positive_unate;
294 |         intrinsic_rise : 65;
295 |         intrinsic_fall : 65;
296 |         rise_resistance : 0;
297 |         fall_resistance : 0;
298 |         related_pin : "D";
299 |       }
300 |     }
301 |     
302 |     pin(QN) 
303 |     {
304 |       direction : output;
305 |       function : "IQN";
306 |       internal_node : "QN";
307 |       
308 |       timing() 
309 |       {
310 |         timing_type : rising_edge;
311 |         intrinsic_rise : 65;
312 |         intrinsic_fall : 65;
313 |         rise_resistance : 0;
314 |         fall_resistance : 0;
315 |         related_pin : "G";
316 |       }
317 |       
318 |       timing() 
319 |       {
320 |         timing_sense : negative_unate;
321 |         intrinsic_rise : 65;
322 |         intrinsic_fall : 65;
323 |         rise_resistance : 0;
324 |         fall_resistance : 0;
325 |         related_pin : "D";
326 |       }
327 |     }
328 |   }
329 | 
330 | } /* end */
331 | 


--------------------------------------------------------------------------------
/celllibs/supergate/README:
--------------------------------------------------------------------------------
 1 | ========================================
 2 |       Supergate Cell Lib version 1.0
 3 | ========================================
 4 | 
 5 | This is a cell library for benchmarking purposes.
 6 | The library is based on the 'simple' cell library but additionally
 7 | contains a half-adder, full-adder, AOI221 and OAI221 cells.
 8 | 
 9 | The cells are process-agnostic with just one drive strength.
10 | Cell sizes are given in units of the inverter size.
11 | This library is solely meant to be uses for benchmarking and is unfit for any purpose.
12 | 
13 | The following cells are present in the library:
14 | 
15 | * inverter.
16 | * tri-state inverter.
17 | * 2-input nand.
18 | * 2-input nor.
19 | * 2-input xor.
20 | * 2-input inverting mux.
21 | * D-type flip-flop with reset and preset.
22 | * half-adder cell.
23 | * full-adder cell.
24 | * and-or-invert 211 cell.
25 | * or-and-invert 211 cell.
26 | 
27 | 


--------------------------------------------------------------------------------
/celllibs/supergate/supergate.lib:
--------------------------------------------------------------------------------
  1 | /********************************************/
  2 | /*                                          */
  3 | /* Supergate cell library for Bench marking */
  4 | /*                                          */
  5 | /* Symbiotic EDA GmbH / Moseley Instruments */
  6 | /* Niels A. Moseley                         */
  7 | /*                                          */
  8 | /* Process: none                            */
  9 | /*                                          */
 10 | /* Date   : 02-11-2018                      */
 11 | /* Version: 1.0                             */
 12 | /*                                          */
 13 | /********************************************/
 14 | 
 15 | library(supergate) {
 16 |   technology (cmos);
 17 |   revision : 1.0;
 18 |   
 19 |   time_unit                     : "1ps";
 20 |   pulling_resistance_unit       : "1kohm";  
 21 |   voltage_unit                  : "1V";
 22 |   current_unit                  : "1uA";  
 23 |   
 24 |   capacitive_load_unit(1,ff);
 25 |   
 26 |   default_inout_pin_cap         :  7.0;
 27 |   default_input_pin_cap         :  7.0;
 28 |   default_output_pin_cap        :  0.0;
 29 |   default_fanout_load           :  1.0;
 30 | 
 31 |   default_wire_load_capacitance : 0.1;
 32 |   default_wire_load_resistance  : 1.0e-3;
 33 |   default_wire_load_area        : 0.0;
 34 | 
 35 |   nom_process                   :  1.0;
 36 |   nom_temperature               : 25.0;
 37 |   nom_voltage                   :  1.2;
 38 |   
 39 |   delay_model                   : generic_cmos;
 40 |   
 41 |   /* Inverter */
 42 |   cell (inv) {
 43 |     area : 1;
 44 |     pin(A) {
 45 |       direction : input;
 46 |     }
 47 |     
 48 |     pin(Y) {
 49 |       direction : output;
 50 |       function : "A'";
 51 |     }
 52 |   }
 53 |   
 54 |   /* tri-state inverter */
 55 |   cell (tri_inv) {
 56 |     area : 4;
 57 |     pin(A) {
 58 |       direction : input;    
 59 |     }
 60 |     pin(S) {
 61 |       direction : input;
 62 |     }
 63 |     pin(Z) {
 64 |       direction : output;
 65 |       function  : "A'";
 66 |       three_State : "S'";
 67 |     }
 68 |   }
 69 |   
 70 |   cell (buffer) {
 71 |     area : 5;
 72 |     pin(A) {
 73 |       direction : input;
 74 |     }
 75 |     pin(Y) {
 76 |       direction : output;
 77 |       function : "A";
 78 |     }
 79 |   }  
 80 |   
 81 |   /* 2-input NAND gate */
 82 |   cell (nand2) {
 83 |     area : 3;
 84 |     pin(A) {
 85 |       direction : input;
 86 |     }
 87 |     pin(B) {
 88 |       direction : input;
 89 |     }
 90 |     pin(Y) {
 91 |       direction: output;
 92 |       function : "(A * B)'";
 93 |     }
 94 |   }
 95 |   
 96 |   /* 2-input NOR gate */
 97 |   cell (nor2) {
 98 |     area : 3;
 99 |     pin(A) {
100 |       direction : input;
101 |     }
102 |     pin(B) {
103 |       direction : input;
104 |     }
105 |     pin(Y) {
106 |       direction: output;
107 |       function : "(A + B)'";
108 |     }
109 |   }
110 |   
111 |   /* 2-input XOR */
112 |   cell (xor2) {
113 |     area : 6;
114 |     pin(A) {
115 |       direction : input;
116 |     }
117 |     pin(B) {
118 |       direction : input;
119 |     }
120 |     pin(Y) {
121 |       direction: output;
122 |       function : "(A *B') + (A' * B)";
123 |     }
124 |   }
125 |   
126 |   /* 2-input inverting MUX */
127 |   cell (imux2) {
128 |     area : 5;
129 |     pin(A) {
130 |       direction : input;
131 |     }
132 |     pin(B) {
133 |       direction : input;
134 |     }
135 |     pin(S) {
136 |       direction : input;
137 |     } 
138 |     pin(Y) {
139 |       direction: output;
140 |       function : "( (A * S) + (B * S') )'";
141 |     }
142 |   }
143 |   
144 |   /* D-type flip-flop with asynchronous reset and preset */
145 |   cell (dff) 
146 |   {
147 |     area : 6;
148 |     ff("IQ", "IQN") {
149 |       next_state : "D";
150 |       clocked_on : "CLK";
151 |       clear      : "RESET";
152 |       preset     : "PRESET";
153 |       clear_preset_var1 : L;
154 |       clear_preset_var2 : L;
155 |     } 
156 |     pin(D) {
157 |       direction : input;
158 |     }
159 |     pin(CLK) {
160 |       direction : input;
161 |     }
162 |     pin(RESET) {
163 |       direction : input;
164 |     }
165 |     pin(PRESET) {
166 |       direction : input;
167 |     }
168 |     pin(Q) {
169 |       direction: output;
170 |       function : "IQ";
171 |       timing() {
172 |         timing_type : rising_edge;
173 |         intrinsic_rise : 65;
174 |         intrinsic_fall : 65;
175 |         rise_resistance : 0;
176 |         fall_resistance : 0; 
177 |         related_pin : "CLK";
178 |       }
179 |       timing () {
180 |         timing_type : clear;
181 |         timing_sense : positive_unate;
182 |         intrinsic_fall : 75;
183 |         related_pin : "RESET";
184 |       }
185 |       timing () {
186 |         timing_type : preset;
187 |         timing_sense : negative_unate;
188 |         intrinsic_rise : 75;
189 |         related_pin : "PRESET";
190 |       }      
191 |     }
192 |     pin(QN) {
193 |       direction: output;
194 |       function : "IQN";
195 |       timing() {
196 |         timing_type : rising_edge;
197 |         intrinsic_rise : 65;
198 |         intrinsic_fall : 65;
199 |         rise_resistance : 0;
200 |         fall_resistance : 0; 
201 |         related_pin : "CLK";
202 |       }
203 |       timing () {
204 |         timing_type : preset;
205 |         timing_sense : negative_unate;
206 |         intrinsic_rise : 75;
207 |         related_pin : "RESET";
208 |       }
209 |       timing () {
210 |         timing_type : clear;
211 |         timing_sense : positive_unate;
212 |         intrinsic_fall : 75;
213 |         related_pin : "PRESET";
214 |       }      
215 |     } 
216 |   }
217 | 
218 |   /* Latch */
219 |   cell(latch) {
220 |     area : 5;
221 |     latch ("IQ","IQN") {
222 |       enable : "G";
223 |       data_in : "D";
224 |     }
225 | 
226 |     pin(D) {
227 |       direction : input;
228 |     }
229 |     pin(G) {
230 |       direction : input;
231 |     }
232 |  
233 |     pin(Q) {
234 |       direction : output;
235 |       function : "IQ";
236 |       internal_node : "Q";
237 |       
238 |       timing() {
239 |         timing_type : rising_edge;
240 |         intrinsic_rise : 65;
241 |         intrinsic_fall : 65;
242 |         rise_resistance : 0;
243 |         fall_resistance : 0;
244 |         related_pin : "G";
245 |       }
246 |       
247 |       timing() {
248 |         timing_sense : positive_unate;
249 |         intrinsic_rise : 65;
250 |         intrinsic_fall : 65;
251 |         rise_resistance : 0;
252 |         fall_resistance : 0;
253 |         related_pin : "D";
254 |       }
255 |     }
256 |     
257 |     pin(QN) {
258 |       direction : output;
259 |       function : "IQN";
260 |       internal_node : "QN";
261 |       
262 |       timing() {
263 |         timing_type : rising_edge;
264 |         intrinsic_rise : 65;
265 |         intrinsic_fall : 65;
266 |         rise_resistance : 0;
267 |         fall_resistance : 0;
268 |         related_pin : "G";
269 |       }
270 |       
271 |       timing() {
272 |         timing_sense : negative_unate;
273 |         intrinsic_rise : 65;
274 |         intrinsic_fall : 65;
275 |         rise_resistance : 0;
276 |         fall_resistance : 0;
277 |         related_pin : "D";
278 |       }
279 |     }
280 |   }
281 | 
282 |   /* 3 input AND-OR-INVERT gate */
283 |   cell (aoi211) {
284 |     area : 3;
285 |     pin(A) {
286 |       direction : input;
287 |     }
288 |     pin(B) {
289 |       direction : input;
290 |     }
291 |     pin(C) {
292 |       direction : input;
293 |     }    
294 |     pin(Y) {
295 |       direction: output;
296 |       function : "((A * B) + C)'";
297 |     }
298 |   }
299 | 
300 | 
301 |   /* 3 input OR-AND-INVERT gate */
302 |   cell (oai211) {
303 |     area : 3;
304 |     pin(A) {
305 |       direction : input;
306 |     }
307 |     pin(B) {
308 |       direction : input;
309 |     }
310 |     pin(C) {
311 |       direction : input;
312 |     }    
313 |     pin(Y) {
314 |       direction: output;
315 |       function : "((A + B) * C)'";
316 |     }
317 |   }
318 | 
319 |   /* half adder */
320 |   cell (halfadder) {
321 |     area : 5;
322 |     pin(A) {
323 |       direction : input;
324 |     }
325 |     pin(B) {
326 |       direction : input;
327 |     }
328 |     pin(C) {
329 |       direction : output;
330 |       function  : "(A * B)";
331 |     }    
332 |     pin(Y) {
333 |       direction: output;
334 |       function : "(A *B') + (A' * B)";
335 |     }    
336 |   }
337 | 
338 |   /* full adder */
339 |   cell (fulladder) {
340 |     area : 8;
341 |     pin(A) {
342 |       direction : input;
343 |     }
344 |     pin(B) {
345 |       direction : input;
346 |     }
347 |     pin(CI) {
348 |       direction : input;
349 |     }    
350 |     pin(CO) {
351 |       direction : output;
352 |       function : "(((A * B)+(B * CI))+(CI * A))";
353 |     }
354 |     pin(Y) {
355 |       direction: output;
356 |       function : "((A^B)^CI)";
357 |     }    
358 |   }
359 | 
360 | } /* end */
361 | 


--------------------------------------------------------------------------------
/celllibs/supergate/supergate.v:
--------------------------------------------------------------------------------
  1 | /********************************************/
  2 | /*                                          */
  3 | /* Supergate cell library for Bench marking */
  4 | /*                                          */
  5 | /* Symbiotic EDA GmbH / Moseley Instruments */
  6 | /* Niels A. Moseley                         */
  7 | /*                                          */
  8 | /* Process: none                            */
  9 | /*                                          */
 10 | /* Date   : 02-11-2018                      */
 11 | /* Version: 1.0                             */
 12 | /*                                          */
 13 | /********************************************/
 14 | 
 15 | module inv(input A, output Y);
 16 |   assign Y = ~A;
 17 | endmodule
 18 | 
 19 | module tri_inv(input A, input S, output reg Y);
 20 |   always@(*)
 21 |   begin
 22 |     if (S==1'b0)
 23 |       begin
 24 |         Y <= 1'bz;
 25 |       end
 26 |     else  
 27 |       begin
 28 |         Y <= ~A;
 29 |       end
 30 |   end
 31 | endmodule
 32 | 
 33 | module buffer(input A, output Y);
 34 |   assign Y = A;
 35 | endmodule
 36 | 
 37 | module nand2(input A, input B, output Y);
 38 |   assign Y = ~(A & B);
 39 | endmodule
 40 | 
 41 | module nor2(input A, input B, output Y);
 42 |   assign Y = ~(A | B);
 43 | endmodule
 44 | 
 45 | module xor2(input A, input B, output Y);
 46 |   assign Y = A ^ B;
 47 | endmodule  
 48 | 
 49 | module imux2(input A, input B, input S, output Y);
 50 |   assign Y = ~(S ? A : B);
 51 | endmodule
 52 | 
 53 | module dff(input CLK, input D, input RESET, input PRESET, output reg Q, output reg QN);
 54 |   always@(CLK or RESET or PRESET)
 55 |   begin
 56 |     if (RESET)
 57 |       begin
 58 |         Q  <= 1'b0;
 59 |         QN <= 1'b1;
 60 |       end
 61 |     else
 62 |     if (PRESET)
 63 |       begin
 64 |         Q  <= 1'b1;
 65 |         QN <= 1'b0;
 66 |       end      
 67 |     else 
 68 |     if (CLK)
 69 |       begin
 70 |         Q  <= D;
 71 |         QN <= ~D;
 72 |       end
 73 |   end      
 74 | endmodule
 75 | 
 76 | module latch(input G, input D, output reg Q, output reg QN);
 77 |   always@(G or D)
 78 |   begin
 79 |     if (G)
 80 |       begin
 81 |         Q  <= D;
 82 |         QN <= ~D;
 83 |       end
 84 |   end      
 85 | endmodule
 86 | 
 87 | 
 88 | module aoi211(input A, input B, input C, output Y);
 89 |   assign Y = ~((A&B)|C);
 90 | endmodule
 91 | 
 92 | module oai211(input A, input B, input C, output Y);
 93 |   assign Y = ~((A|B)&C);
 94 | endmodule
 95 | 
 96 | module halfadder(input A, input B, output C, output Y);
 97 |   assign Y = A^B;
 98 |   assign C = A&B;
 99 | endmodule
100 | 
101 | module fulladder(input A, input B, input CI, output CO, output Y);
102 |   assign Y = (A^B)^CI;
103 |   assign CO = ((A&B)|(B&CI))|(CI&A);
104 | endmodule
105 | 


--------------------------------------------------------------------------------
/scripts/database_html.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | tables="$(cd database; ls -d */ | sed 's,/$,,;')"
 4 | tests="$(cd database; ls */*.dat | sed 's,.*/,,; s,\.dat$,,;' | sort -n)"
 5 | 
 6 | exec > database/index.html
 7 | 
 8 | echo "<table border>"
 9 | echo "<tr>"
10 | echo "<th>Test</th>"
11 | for tab in $tables; do echo "<th>$tab</th>"; done
12 | echo "</tr>"
13 | for tst in $tests; do
14 | 	echo "<tr>"
15 | 	echo "<td>$tst</td>"
16 | 	for tab in $tables; do
17 | 		if test -f database/$tab/$tst.dat; then
18 | 			echo "<td>$( cat database/$tab/$tst.dat )</td>"
19 | 		else
20 | 			echo "<td>N/A</td>"
21 | 		fi
22 | 	done
23 | 	echo "</tr>"
24 | done
25 | echo "</table>"
26 | 


--------------------------------------------------------------------------------
/scripts/database_make.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | import sys
  5 | import subprocess
  6 | import json
  7 | 
  8 | ## execute a JSON configuration
  9 | def executeConfig(cellibpath, shellScriptName, dbpath, subdir, config):
 10 |     for fileName in config["files"]:
 11 |         hdlsrc = os.path.join(subdir, fileName)
 12 |         filewithoutext, file_extension = os.path.splitext(fileName)
 13 |         datfile = open(os.path.join(dbpath, filewithoutext + ".dat"), "wt")
 14 |         print("  Running HDL file " + fileName)
 15 |         retval = subprocess.check_call([os.path.abspath("./scripts/"+shellScriptName+".sh"), os.path.abspath("./" +hdlsrc), celllibpath],
 16 |                                 cwd=os.path.abspath(subdir),
 17 |                                 stdout=datfile,
 18 |                                 stderr=sys.stderr
 19 |                                 )
 20 |         datfile.close()
 21 |     return
 22 | 
 23 | ##########################################################################################
 24 | ## MAIN PROGRAM STARTS HERE
 25 | ##########################################################################################
 26 | 
 27 | # Check the number of arguments to provide help, if needed.
 28 | if (len(sys.argv) < 3):
 29 |     print("Usage: database_make <mode> <dir1> .. <dirN>")
 30 |     sys.exit(1)
 31 | 
 32 | shellScriptName = sys.argv[1]
 33 | dbpath = os.path.abspath("./database/"+shellScriptName)
 34 | celllibpath = os.path.abspath("./celllibs")
 35 | 
 36 | os.system("rm -rf "+dbpath)
 37 | os.system("mkdir -p "+dbpath)
 38 | 
 39 | # call all generate.py scripts
 40 | for dir in sys.argv[2:]:
 41 |     for subdir, dirs, files in os.walk(dir):
 42 |         for file in files:
 43 |             if (file == "generate.py"):
 44 |                 script = os.path.join(subdir, file)
 45 |                 print("Executing " + script)
 46 |                 retval = subprocess.check_call(["python3","generate.py"], 
 47 |                                                cwd=os.path.abspath(subdir),
 48 |                                                stdout=sys.stdout,
 49 |                                                stderr=sys.stderr
 50 |                                                )
 51 | 
 52 | # execute all .v or .vhdl scripts specified in the config.json file
 53 | # or if there is no config.json, simply walk the directory.
 54 | dir = sys.argv[2]
 55 | queue = [ dir ]
 56 | print("Processing directory: " + dir)
 57 | while queue:
 58 |     subdir = queue.pop()
 59 |     listdir = os.listdir(subdir)
 60 |     # Do not enter git repositories
 61 |     if '.git' in listdir: continue
 62 |     for item in listdir:
 63 |         path = os.path.join(subdir, item)
 64 |         if os.path.isdir(path):
 65 |             queue.append(path)
 66 |         elif os.path.isfile(path):
 67 |             # check if there is a config.json file
 68 |             if item == 'config.json':
 69 |                 print("  Running config file: " + item)
 70 |                 with open(path, 'r') as configFile:
 71 |                     try:
 72 |                         config = json.load(configFile)
 73 |                         executeConfig(celllibpath, shellScriptName, dbpath, subdir, config)
 74 |                     except ValueError as error:
 75 |                         print("  --- ERROR PARSING CONFIG.JSON ---")
 76 |                         pass
 77 |             if (item.endswith(".v")):
 78 |                 # skip all files that end in _tb.v as they are testbench files
 79 |                 # containing unsynthesizable code
 80 |                 if (item.endswith("_tb.v")):
 81 |                     print("  Skipping Verilog testbench file " + item)
 82 |                     continue
 83 |                 # skip any netlist files that might have been produced in 
 84 |                 # previous runs
 85 |                 if (item.endswith("_netlist.v")):
 86 |                     print("  Skipping Verilog netlist file " + item)
 87 |                     continue
 88 |                 verilogsrc = os.path.join(subdir, item)
 89 |                 filewithoutext, file_extension = os.path.splitext(item)
 90 |                 datfile = open(os.path.join(dbpath, filewithoutext + ".dat"), "wt")
 91 |                 print("  Running Verilog file " + item)
 92 |                 retval = subprocess.check_call([os.path.abspath("./scripts/"+shellScriptName+".sh"), os.path.abspath("./" +verilogsrc), celllibpath],
 93 |                                             cwd=os.path.abspath(subdir),
 94 |                                             stdout=datfile,
 95 |                                             stderr=sys.stderr
 96 |                                             )
 97 |                 datfile.close()
 98 |                     
 99 |             if (item.endswith(".vhdl")):
100 |                 vhdlsrc = os.path.join(subdir, item)
101 |                 filewithoutext, file_extension = os.path.splitext(item)
102 |                 datfile = open(os.path.join(dbpath, filewithoutext + ".dat"), "wt")
103 |                 print("  Running VHDL file " + item)
104 |                 retval = subprocess.check_call([os.path.abspath("./scripts/"+shellScriptName+".sh"),os.path.abspath("./" +vhdlsrc), celllibpath],
105 |                                             cwd=os.path.abspath(subdir),
106 |                                             stdout=datfile,
107 |                                             stderr=sys.stderr
108 |                                             )
109 |                 datfile.close()
110 | 


--------------------------------------------------------------------------------
/scripts/yosys-ice40-flopcount.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # mode script for ICE40 FPGA FF count
 5 | #
 6 | 
 7 | logfile=$( mktemp )
 8 | scriptpath=$( pwd )
 9 | 
10 | # create synthesis script
11 | myfile="$1"
12 | if [ ${myfile: -5} == ".vhdl" ]
13 | then
14 |     topmodule=$( basename -s .vhdl "$1" )
15 |     echo "read -vhdl $1" > script.yos
16 | else
17 |     topmodule=$( basename -s .v "$1")
18 |     echo "read -vlog2k $1" > script.yos
19 | fi
20 | echo "synth_ice40 -top $topmodule" >> script.yos
21 | 
22 | # run tools
23 | yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null
24 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_DFF/ { s/.* //; p; }; }; d;' $logfile
25 | rm -f $logfile
26 | rm -f script.yos
27 | 


--------------------------------------------------------------------------------
/scripts/yosys-ice40-lutcount.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # mode script for ICE40 FPGA LUT count
 5 | #
 6 | 
 7 | logfile=$( mktemp )
 8 | scriptpath=$( pwd )
 9 | 
10 | # create synthesis script
11 | myfile="$1"
12 | if [ ${myfile: -5} == ".vhdl" ]
13 | then
14 |     topmodule=$( basename -s .vhdl "$1" )
15 |     echo "read -vhdl $1" > script.yos
16 | else
17 |     topmodule=$( basename -s .v "$1")
18 |     echo "read -vlog2k $1" > script.yos
19 | fi
20 | echo "synth_ice40 -top $topmodule $EXTRA_FLAGS" >> script.yos
21 | 
22 | # run tools
23 | yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null
24 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_LUT4/ { s/.* //; p; }; }; d;' $logfile
25 | rm -f $logfile
26 | rm -f script.yos
27 | 


--------------------------------------------------------------------------------
/scripts/yosys-sanity.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # mode script for simple ASIC cell library
 5 | #
 6 | # Using custom ABC script so we can get the area of the circuit:
 7 | #  strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put
 8 | #
 9 | 
10 | logfile=$( mktemp )
11 | scriptpath=$( pwd )
12 | 
13 | # create synthesis script
14 | myfile="$1"
15 | celllibpath="$2"
16 | 
17 | #mkdir -p netlists
18 | 
19 | if [ ${myfile: -5} == ".vhdl" ]
20 | then
21 |     topmodule=$( basename -s .vhdl "$1" )
22 |     echo "read -vhdl $1" > script.yos
23 | else
24 |     topmodule=$( basename -s .v "$1")
25 |     echo "read -vlog2k $1" > script.yos
26 | fi
27 | echo "hierarchy -check -top $topmodule" >> script.yos
28 | 
29 | yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null
30 | echo $?
31 | rm -f $logfile
32 | rm -f script.yos
33 | 


--------------------------------------------------------------------------------
/scripts/yosys-simplelib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # mode script for simple ASIC cell library
 5 | #
 6 | # Using custom ABC script so we can get the area of the circuit:
 7 | #  strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put
 8 | #
 9 | 
10 | logfile=$( mktemp )
11 | scriptpath=$( pwd )
12 | 
13 | # create synthesis script
14 | myfile="$1"
15 | celllibpath="$2"
16 | 
17 | #mkdir -p netlists
18 | 
19 | if [ ${myfile: -5} == ".vhdl" ]
20 | then
21 |     topmodule=$( basename -s .vhdl "$1" )
22 |     echo "read -vhdl $1" > script.yos
23 | else
24 |     topmodule=$( basename -s .v "$1")
25 |     echo "read -vlog2k $1" > script.yos
26 | fi
27 | echo "hierarchy; proc; fsm; opt; memory; opt" >> script.yos
28 | echo "techmap; opt" >> script.yos
29 | echo "dfflibmap -liberty $celllibpath/simple/simple.lib" >> script.yos
30 | echo "abc -liberty $celllibpath/simple/simple.lib" >> script.yos
31 | echo "write_verilog /$1_netlist.v" >> script.yos
32 | echo "stat -liberty $celllibpath/simple/simple.lib" >> script.yos
33 | #echo "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put" > abc.script
34 | 
35 | # run tools
36 | #yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null
37 | yosys -l $logfile -p "script $scriptpath/script.yos" >/dev/null
38 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_LUT4/ { s/.* //; p; }; }; d;' $logfile
39 | cp $logfile $celllibpath/../log.txt
40 | rm -f $logfile
41 | rm -f script.yos
42 | #rm -f abc.script
43 | 


--------------------------------------------------------------------------------
/scripts/yosys-supergatelib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # mode script for simple ASIC cell library
 5 | #
 6 | # Using custom ABC script so we can get the area of the circuit:
 7 | #  strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put
 8 | #
 9 | 
10 | logfile=$( mktemp )
11 | scriptpath=$( pwd )
12 | 
13 | # create synthesis script
14 | myfile="$1"
15 | celllibpath="$2"
16 | 
17 | #mkdir -p netlists
18 | 
19 | if [ ${myfile: -5} == ".vhdl" ]
20 | then
21 |     topmodule=$( basename -s .vhdl "$1" )
22 |     echo "read -vhdl $1" > script.yos
23 | else
24 |     topmodule=$( basename -s .v "$1")
25 |     echo "read -vlog2k $1" > script.yos
26 | fi
27 | #echo "read_liberty $celllibpath/supergate/supergate.lib" >> script.yos
28 | echo "hierarchy; proc; fsm; opt; memory; opt" >> script.yos
29 | echo "techmap; opt" >> script.yos
30 | echo "dfflibmap -liberty $celllibpath/supergate/supergate.lib" >> script.yos
31 | echo "abc -liberty $celllibpath/supergate/supergate.lib" >> script.yos
32 | echo "write_verilog /$1_netlist.v" >> script.yos
33 | echo "stat -liberty $celllibpath/supergate/supergate.lib" >> script.yos
34 | #echo "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put" > abc.script
35 | 
36 | # run tools
37 | #yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null
38 | yosys -l $logfile -p "script $scriptpath/script.yos" >/dev/null
39 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_LUT4/ { s/.* //; p; }; }; d;' $logfile
40 | cp $logfile $celllibpath/../log.txt
41 | rm -f $logfile
42 | rm -f script.yos
43 | #rm -f abc.script
44 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/boom/MediumBoom.v.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/MediumBoom.v.gz


--------------------------------------------------------------------------------
/verilog/benchmarks_large/boom/MediumOctoBoom.v.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/MediumOctoBoom.v.gz


--------------------------------------------------------------------------------
/verilog/benchmarks_large/boom/MegaOctoBoom.v.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/MegaOctoBoom.v.gz


--------------------------------------------------------------------------------
/verilog/benchmarks_large/boom/README.md:
--------------------------------------------------------------------------------
 1 | # BOOM RISC-V core
 2 | 
 3 | Generated from https://github.com/riscv-boom/boom-template commit 8241911d3fa13ab81df276899c2ab839fd8b3912
 4 | 
 5 | SmallBoom and MediumBoom are default single-core configs. Other multi-core configs are custom:
 6 | ```scala
 7 | class SmallQuadBoomConfig extends Config(
 8 |   new WithRVC ++
 9 |   new WithSmallBooms ++
10 |   new DefaultBoomConfig ++
11 |   new WithNBoomCores(4) ++
12 |   new WithoutTLMonitors ++
13 |   new freechips.rocketchip.system.BaseConfig)
14 | 
15 | class MediumOctoBoomConfig extends Config(
16 |   new WithRVC ++
17 |   new WithMediumBooms ++
18 |   new DefaultBoomConfig ++
19 |   new WithNBoomCores(8) ++
20 |   new WithoutTLMonitors ++
21 |   new freechips.rocketchip.system.BaseConfig)
22 | 
23 | class MegaOctoBoomConfig extends Config(
24 |   new WithRVC ++
25 |   new WithMegaBooms ++
26 |   new DefaultBoomConfig ++
27 |   new WithNBoomCores(8) ++
28 |   new WithoutTLMonitors ++
29 |   new freechips.rocketchip.system.BaseConfig)
30 | ```
31 | 
32 | Note that MegaOctoBoomConfig is primarily intended as a torture test rather than a useful benchmark,
33 | as a large percentage of the final resource usage is used for bit-blasted 16-write-port memories.
34 | 
35 | Copyright:
36 | ```
37 | 
38 | Copyright (c) 2017, The Regents of the University of California (Regents).
39 | All Rights Reserved.
40 | 
41 | Redistribution and use in source and binary forms, with or without
42 | modification, are permitted provided that the following conditions are met:
43 | 
44 | 1. Redistributions of source code must retain the above copyright
45 |    notice, this list of conditions and the following disclaimer.
46 | 
47 | 2. Redistributions in binary form must reproduce the above copyright
48 |    notice, this list of conditions and the following disclaimer in the
49 |    documentation and/or other materials provided with the distribution.
50 | 
51 | 3. Neither the name of the Regents nor the
52 |    names of its contributors may be used to endorse or promote products
53 |    derived from this software without specific prior written permission.
54 | 
55 | IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
56 | SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING
57 | OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS
58 | BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 | 
60 | REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
61 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 | PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
63 | HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
64 | MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
65 | ```
66 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/boom/SmallBoom.v.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/SmallBoom.v.gz


--------------------------------------------------------------------------------
/verilog/benchmarks_large/boom/SmallQuadBoom.v.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/SmallQuadBoom.v.gz


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cam/README.md:
--------------------------------------------------------------------------------
 1 | # Verilog CAM: Content Addressable Memory
 2 | 
 3 | Source: https://github.com/alexforencich/verilog-cam
 4 | 
 5 | Two designs:
 6 | 
 7 | - **CAM_SRL_TOP** 64 bit data content by 32 entry content addressable memory
 8 |   built out of shift registers.
 9 | 
10 | - **CAM_BRAM_TOP** 64 bit data content by 32 entry content addressable memory
11 |   built out of block RAMs.
12 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cam/cam_bram_top.v:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | Copyright (c) 2015-2016 Alex Forencich
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | of this software and associated documentation files (the "Software"), to deal
  7 | in the Software without restriction, including without limitation the rights
  8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | copies of the Software, and to permit persons to whom the Software is
 10 | furnished to do so, subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in
 13 | all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | THE SOFTWARE.
 22 | 
 23 | */
 24 | 
 25 | // Language: Verilog 2001
 26 | 
 27 | `timescale 1ns / 1ps
 28 | 
 29 | /*
 30 |  * Content Addressable Memory
 31 |  */
 32 | module cam_bram_top #(
 33 |     // search data bus width
 34 |     parameter DATA_WIDTH = 64,
 35 |     // memory size in log2(words)
 36 |     parameter ADDR_WIDTH = 5,
 37 |     // CAM style (SRL, BRAM)
 38 |     parameter CAM_STYLE = "BRAM",
 39 |     // width of data bus slices
 40 |     parameter SLICE_WIDTH = 4
 41 | )
 42 | (
 43 |     input  wire                     clk,
 44 |     input  wire                     rst,
 45 | 
 46 |     input  wire [ADDR_WIDTH-1:0]    write_addr,
 47 |     input  wire [DATA_WIDTH-1:0]    write_data,
 48 |     input  wire                     write_delete,
 49 |     input  wire                     write_enable,
 50 |     output wire                     write_busy,
 51 | 
 52 |     input  wire [DATA_WIDTH-1:0]    compare_data,
 53 |     output wire [2**ADDR_WIDTH-1:0] match_many,
 54 |     output wire [2**ADDR_WIDTH-1:0] match_single,
 55 |     output wire [ADDR_WIDTH-1:0]    match_addr,
 56 |     output wire                     match
 57 | );
 58 | 
 59 | generate
 60 |     if (CAM_STYLE == "SRL") begin
 61 |         cam_srl #(
 62 |             .DATA_WIDTH(DATA_WIDTH),
 63 |             .ADDR_WIDTH(ADDR_WIDTH),
 64 |             .SLICE_WIDTH(SLICE_WIDTH)
 65 |         )
 66 |         cam_inst (
 67 |             .clk(clk),
 68 |             .rst(rst),
 69 |             .write_addr(write_addr),
 70 |             .write_data(write_data),
 71 |             .write_delete(write_delete),
 72 |             .write_enable(write_enable),
 73 |             .write_busy(write_busy),
 74 |             .compare_data(compare_data),
 75 |             .match_many(match_many),
 76 |             .match_single(match_single),
 77 |             .match_addr(match_addr),
 78 |             .match(match)
 79 |         );
 80 |     end else if (CAM_STYLE == "BRAM") begin
 81 |         cam_bram #(
 82 |             .DATA_WIDTH(DATA_WIDTH),
 83 |             .ADDR_WIDTH(ADDR_WIDTH),
 84 |             .SLICE_WIDTH(SLICE_WIDTH)
 85 |         )
 86 |         cam_inst (
 87 |             .clk(clk),
 88 |             .rst(rst),
 89 |             .write_addr(write_addr),
 90 |             .write_data(write_data),
 91 |             .write_delete(write_delete),
 92 |             .write_enable(write_enable),
 93 |             .write_busy(write_busy),
 94 |             .compare_data(compare_data),
 95 |             .match_many(match_many),
 96 |             .match_single(match_single),
 97 |             .match_addr(match_addr),
 98 |             .match(match)
 99 |         );
100 |     end
101 | endgenerate
102 | 
103 | endmodule
104 | 
105 | `include "cam_bram.vh"
106 | `include "priority_encoder.vh"
107 | `include "ram_dp.vh"
108 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cam/cam_srl_top.v:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | Copyright (c) 2015-2016 Alex Forencich
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | of this software and associated documentation files (the "Software"), to deal
  7 | in the Software without restriction, including without limitation the rights
  8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | copies of the Software, and to permit persons to whom the Software is
 10 | furnished to do so, subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in
 13 | all copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | THE SOFTWARE.
 22 | 
 23 | */
 24 | 
 25 | // Language: Verilog 2001
 26 | 
 27 | `timescale 1ns / 1ps
 28 | 
 29 | /*
 30 |  * Content Addressable Memory
 31 |  */
 32 | module cam_srl_top #(
 33 |     // search data bus width
 34 |     parameter DATA_WIDTH = 64,
 35 |     // memory size in log2(words)
 36 |     parameter ADDR_WIDTH = 5,
 37 |     // CAM style (SRL, BRAM)
 38 |     parameter CAM_STYLE = "SRL",
 39 |     // width of data bus slices
 40 |     parameter SLICE_WIDTH = 4
 41 | )
 42 | (
 43 |     input  wire                     clk,
 44 |     input  wire                     rst,
 45 | 
 46 |     input  wire [ADDR_WIDTH-1:0]    write_addr,
 47 |     input  wire [DATA_WIDTH-1:0]    write_data,
 48 |     input  wire                     write_delete,
 49 |     input  wire                     write_enable,
 50 |     output wire                     write_busy,
 51 | 
 52 |     input  wire [DATA_WIDTH-1:0]    compare_data,
 53 |     output wire [2**ADDR_WIDTH-1:0] match_many,
 54 |     output wire [2**ADDR_WIDTH-1:0] match_single,
 55 |     output wire [ADDR_WIDTH-1:0]    match_addr,
 56 |     output wire                     match
 57 | );
 58 | 
 59 | generate
 60 |     if (CAM_STYLE == "SRL") begin
 61 |         cam_srl #(
 62 |             .DATA_WIDTH(DATA_WIDTH),
 63 |             .ADDR_WIDTH(ADDR_WIDTH),
 64 |             .SLICE_WIDTH(SLICE_WIDTH)
 65 |         )
 66 |         cam_inst (
 67 |             .clk(clk),
 68 |             .rst(rst),
 69 |             .write_addr(write_addr),
 70 |             .write_data(write_data),
 71 |             .write_delete(write_delete),
 72 |             .write_enable(write_enable),
 73 |             .write_busy(write_busy),
 74 |             .compare_data(compare_data),
 75 |             .match_many(match_many),
 76 |             .match_single(match_single),
 77 |             .match_addr(match_addr),
 78 |             .match(match)
 79 |         );
 80 |     end else if (CAM_STYLE == "BRAM") begin
 81 |         cam_bram #(
 82 |             .DATA_WIDTH(DATA_WIDTH),
 83 |             .ADDR_WIDTH(ADDR_WIDTH),
 84 |             .SLICE_WIDTH(SLICE_WIDTH)
 85 |         )
 86 |         cam_inst (
 87 |             .clk(clk),
 88 |             .rst(rst),
 89 |             .write_addr(write_addr),
 90 |             .write_data(write_data),
 91 |             .write_delete(write_delete),
 92 |             .write_enable(write_enable),
 93 |             .write_busy(write_busy),
 94 |             .compare_data(compare_data),
 95 |             .match_many(match_many),
 96 |             .match_single(match_single),
 97 |             .match_addr(match_addr),
 98 |             .match(match)
 99 |         );
100 |     end
101 | endgenerate
102 | 
103 | endmodule
104 | 
105 | `include "cam_srl.vh"
106 | `include "priority_encoder.vh"
107 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cam/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import urllib.request
4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/cam_srl.v', 'cam_srl.vh')
5 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/cam_bram.v', 'cam_bram.vh')
6 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/priority_encoder.v', 'priority_encoder.vh')
7 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/ram_dp.v', 'ram_dp.vh')
8 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cordic/.gitignore:
--------------------------------------------------------------------------------
1 | cordic_*.v
2 | *.vvp
3 | *.vcd


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cordic/README.md:
--------------------------------------------------------------------------------
 1 | # CORDIC - Coordinate Rotation DIgital Computer
 2 | 
 3 | The CORDIC is a class of algorithm is used to efficiently calculate sine, cosine, tangent, arcsine, arccos, arctangent, vector magnitude and more.
 4 | 
 5 | Here, a pipelined CORDIC algorithm is used to calculate sine and cosine.
 6 | 
 7 | The Python script generates multiple versions varying in the number of CORDIC stages and input widths.
 8 | 
 9 | Reference: https://en.wikipedia.org/wiki/CORDIC
10 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cordic/cordic.template:
--------------------------------------------------------------------------------
  1 | // pipelined CORDIC algorithm to calculate sin/cos pair from a given angle (0..1)
  2 | // Author: Niels A. Moseley
  3 | //
  4 | 
  5 | 
  6 | // one stage of the cordic iteration with registered outputs
  7 | module cordic_stage_<w>(clk, rst_n, x_in, y_in, angle_in, angle_adj, x_out, y_out, angle_out);
  8 |     parameter SHIFT = 1;
  9 | 
 10 |     // inputs
 11 |     input clk;
 12 |     input rst_n;
 13 |     input signed [<w>-1:0] x_in;
 14 |     input signed [<w>-1:0] y_in;
 15 |     input signed [<w>-1:0] angle_in;
 16 |     input signed [<w>-1:0] angle_adj;
 17 | 
 18 |     // outputs
 19 |     output reg signed [<w>-1:0] x_out;
 20 |     output reg signed [<w>-1:0] y_out;
 21 |     output reg signed [<w>-1:0] angle_out;
 22 | 
 23 |     // internal signal
 24 |     reg signed [<w>-1:0] new_x;
 25 |     reg signed [<w>-1:0] new_y;
 26 |     reg signed [<w>-1:0] new_angle;
 27 | 
 28 |     wire sign;
 29 |     wire signed [<w>-1:0] shifted_x;
 30 |     wire signed [<w>-1:0] shifted_y;
 31 | 
 32 |     assign sign = angle_in[<w>-1];  // angle sign bit
 33 |     assign shifted_x = x_in >>> SHIFT;
 34 |     assign shifted_y = y_in >>> SHIFT;
 35 | 
 36 |     always @(*)
 37 |     begin
 38 |         new_x = sign ? (x_in + shifted_y) : (x_in - shifted_y);
 39 |         new_y = sign ? (y_in - shifted_x) : (y_in + shifted_x);
 40 |         new_angle = sign ? (angle_in + angle_adj) : (angle_in - angle_adj);
 41 |     end
 42 | 
 43 |     always @(posedge clk)
 44 |     begin
 45 |         if (rst_n == 1'b0)
 46 |         begin
 47 |             x_out <= 0;
 48 |             y_out <= 0;
 49 |             angle_out <= 0;
 50 |         end
 51 |         else begin
 52 |             x_out <= new_x;
 53 |             y_out <= new_y;
 54 |             angle_out <= new_angle;        
 55 |         end
 56 |     end
 57 | 
 58 | endmodule
 59 | 
 60 | 
 61 | module cordic_<s>_<w>(clk, rst_n, angle_in, cos_out, sin_out);
 62 | 
 63 |     // inputs
 64 |     input clk;
 65 |     input rst_n;
 66 |     input signed [<w>-1:0] angle_in;
 67 | 
 68 |     // outputs
 69 |     output signed [<w>-1:0] cos_out;
 70 |     output signed [<w>-1:0] sin_out;
 71 | 
 72 |     // internal signals
 73 |     reg signed [<w>-1:0] x_in; 
 74 |     reg signed [<w>-1:0] y_in;
 75 |     reg signed [<w>-1:0] z_in;
 76 | 
 77 |     wire signed [<w>-1:0] xbus [0:<s>-1];
 78 |     wire signed [<w>-1:0] ybus [0:<s>-1];
 79 |     wire signed [<w>-1:0] zbus [0:<s>-1];
 80 | 
 81 |     assign cos_out = xbus[<s>-1];
 82 |     assign sin_out = ybus[<s>-1];
 83 | 
 84 |     always @(*)
 85 |     begin
 86 |         case($unsigned(angle_in[<w>-1:<w>-2]))
 87 |             2'b00:
 88 |                 begin
 89 |                     x_in <= <v>;
 90 |                     y_in <= 0;
 91 |                     z_in <= angle_in;
 92 |                 end
 93 |             2'b11:
 94 |                 begin
 95 |                     x_in <= <v>;
 96 |                     y_in <= 0;
 97 |                     z_in <= angle_in;
 98 |                 end
 99 |             2'b01:
100 |                 begin
101 |                     x_in <= 0;
102 |                     y_in <= <v>;
103 |                     z_in <= $signed({2'b00, angle_in[<w>-3:0]});
104 |                 end
105 |             2'b10:
106 |                 begin
107 |                     x_in <= 0;
108 |                     y_in <= -<v>;
109 |                     z_in <= $signed({2'b11, angle_in[<w>-3:0]});
110 |                 end
111 |         endcase
112 |     end
113 | 
114 |     // generate instances of cordic_stage
115 |     <g>
116 | 
117 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cordic/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate a pipelined CORDIC with a certain number of iteration stages
 4 | ## The script must also generate the angle table
 5 | ##
 6 | ## <w> = bit width of cordic stage
 7 | ## <s> = number of stages
 8 | ## <v> = cordic vector start magnitude, approx 0.6199505
 9 | ## <g> = generated calls to cordic_stage
10 | ##
11 | 
12 | import math
13 | 
14 | def gen_cordic(stages, bits, template):
15 |     ## calculate the CORDIC gain so we can compensate this
16 |     ## by reducing the input vector length to avoid overflow.
17 |     ##
18 |     ## the Nth stage has a gain of sqrt(1.0 + 2^-2N) when counting
19 |     ## stages from 0.
20 |     ##
21 |     ## Total gain for 4 stages : 1.64248406575
22 |     ##                5 stages : 1.64568891576
23 |     ##                6 stages : 1.64649227871
24 |     ## 
25 | 
26 |     amp = 1.0
27 |     for I in range(0,stages):
28 |         amp = amp * math.sqrt(1.0 + math.pow(2.0,-2*I))
29 | 
30 |     startval = int( math.floor((2**(bits-1)-1) / amp) )
31 | 
32 |     template = template.replace("<w>", str(bits)).replace("<s>", str(stages))
33 |     template = template.replace("<v>", str(bits)+"'d"+str(startval))
34 | 
35 |     ## generate calls to cordic_stage    
36 |     
37 |     gen = "    cordic_stage_<w> #(0) stage0(clk, rst_n, x_in, y_in, z_in, <a>, xbus[0], ybus[0], zbus[0]);\n"
38 |     tanval = int( round((2**(bits)) * 0.125,0) )
39 |     gen = gen.replace("<a>", str(bits) + "'sd" + str(tanval))
40 |     gen = gen.replace("<w>", str(bits))
41 | 
42 |     s = "    cordic_stage_<w> #(<j>) stage<j>(clk, rst_n, xbus[<i>], ybus[<i>], zbus[<i>], <a>, xbus[<j>], ybus[<j>], zbus[<j>]);"
43 |     for I in range(1,stages):
44 |         tanval = int( round((2**(bits)) * math.atan(math.pow(2.0, -I))/(2.0*3.14159265359),0) )
45 |         gen_s = s.replace("<i>", str(I-1)).replace("<a>", str(bits) + "'sd" + str(tanval)) + "\n"
46 |         gen_s = gen_s.replace("<j>",str(I))
47 |         gen_s = gen_s.replace("<w>",str(bits))
48 |         gen = gen + gen_s
49 |         
50 | 
51 |     template = template.replace("<g>", gen)
52 | 
53 |     with open("cordic_%d_%d.v" % (stages, bits), "w") as f:
54 |         print(template, file=f)
55 | 
56 | with open('cordic.template','rt') as templatefile:
57 |     template = templatefile.read()
58 |     
59 |     for stages in [4,5,6,7,8,9,10]:
60 |         for bits in [8,12,16]:
61 |             gen_cordic(stages, bits, ''.join(template))
62 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/cordic/run_cordic_tb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | iverilog -o cordic_tb.vvp cordic_10_16.v cordic_4_8.v cordic_tb.v
4 | vvp cordic_tb.vvp
5 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/.gitignore:
--------------------------------------------------------------------------------
1 | *.vh
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/README.md:
--------------------------------------------------------------------------------
 1 | # DSP Filters - a selection of digital filters from @ZipCPU
 2 | 
 3 | Source: https://github.com/ZipCPU/dspfilters/tree/49b9a0235f88c34b9a997b1aa9a634ad130ea719
 4 | 
 5 | Currently three designs exist:
 6 | 
 7 | - **fastfir_fixedtaps** A 1-output per clock finite impulse response (FIR) filter,
 8 |   configured as a 12-bit 128-tap band-pass filter.
 9 | 
10 | - **slowfil_fixedtaps** A 1-output per number-of-taps clocks finite impulse response 
11 |   (FIR) filter, configured as a 12-bit 128-tap band-pass filter. This original variant 
12 |   uses a ring-buffer to store all input samples.
13 | 
14 | - **slowfil_srl_fixedtaps** A 1-output per number-of-taps clocks finite impulse
15 |   response (FIR) filter, configured as a 12-bit 128-tap band-pass filter. This is a
16 |   modified variant of the original slowfil that uses a shift-register approach to
17 |   store all input samples.
18 | 
19 | The 12-bit 128-tap band pass filter has the following performance characteristics:
20 | - 0-200Hz: -119.27dB
21 | - 300-500Hz: 0.00dB
22 | - 600-1000Hz: -119.27dB
23 | coefficients generated using http://t-filter.engineerjs.com
24 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/fastfir_dynamictaps.v:
--------------------------------------------------------------------------------
 1 | module  fastfir_dynamictaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_result);
 2 | `ifdef	FORMAL
 3 | 	parameter		NTAPS=16, IW=9, TW=IW, OW=2*IW+5;
 4 | `else
 5 | 	parameter		NTAPS=128, IW=12, TW=IW, OW=2*IW+7;
 6 | `endif
 7 | 	parameter [0:0]		FIXED_TAPS=0;
 8 | 	input	wire			i_clk, i_reset;
 9 | 	//
10 | 	input	wire			i_tap_wr;	// Ignored if FIXED_TAPS
11 | 	input	wire	[(TW-1):0]	i_tap;		// Ignored if FIXED_TAPS
12 | 	//
13 | 	input	wire			i_ce;
14 | 	input	wire	[(IW-1):0]	i_sample;
15 | 	output	wire	[(OW-1):0]	o_result;
16 | 
17 | 	fastfir #(.FIXED_TAPS(0), .NTAPS(NTAPS), .IW(IW), .TW(TW)) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_result(o_result));
18 | endmodule
19 | 
20 | `include "fastfir.vh"
21 | `include "firtap.vh"
22 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/fastfir_fixedtaps.v:
--------------------------------------------------------------------------------
 1 | module  fastfir_fixedtaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_result);
 2 | `ifdef	FORMAL
 3 | 	parameter		NTAPS=16, IW=9, TW=IW, OW=2*IW+5;
 4 | `else
 5 | 	parameter		NTAPS=128, IW=12, TW=IW, OW=2*IW+7;
 6 | `endif
 7 | 	parameter [0:0]		FIXED_TAPS=0;
 8 | 	input	wire			i_clk, i_reset;
 9 | 	//
10 | 	input	wire			i_tap_wr;	// Ignored if FIXED_TAPS
11 | 	input	wire	[(TW-1):0]	i_tap;		// Ignored if FIXED_TAPS
12 | 	//
13 | 	input	wire			i_ce;
14 | 	input	wire	[(IW-1):0]	i_sample;
15 | 	output	wire	[(OW-1):0]	o_result;
16 | 
17 | 	fastfir #(.FIXED_TAPS(1), .NTAPS(NTAPS), .IW(IW), .TW(TW)) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_result(o_result));
18 | endmodule
19 | 
20 | `include "fastfir.vh"
21 | `include "firtap.vh"
22 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import urllib.request
4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/dspfilters/75756b71d162ca621d6905224d2c836f45efa425/rtl/fastfir.v', 'fastfir.vh')
5 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/dspfilters/75756b71d162ca621d6905224d2c836f45efa425/rtl/slowfil.v', 'slowfil.vh')
6 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/dspfilters/75756b71d162ca621d6905224d2c836f45efa425/rtl/firtap.v', 'firtap.vh')
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/slowfil_fixedtaps.v:
--------------------------------------------------------------------------------
 1 | module  slowfil_fixedtaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_ce, o_result);
 2 | `ifdef	FORMAL
 3 | 	parameter		NTAPS=16, IW=9, TW=IW, OW=2*IW+5;
 4 | `else
 5 | 	parameter		NTAPS=128, IW=12, TW=IW, OW=2*IW+7;
 6 | `endif
 7 | 	parameter [0:0]		FIXED_TAPS=0;
 8 | 	input	wire			i_clk, i_reset;
 9 | 	//
10 | 	input	wire			i_tap_wr;	// Ignored if FIXED_TAPS
11 | 	input	wire	[(TW-1):0]	i_tap;		// Ignored if FIXED_TAPS
12 | 	//
13 | 	input	wire			i_ce;
14 | 	input	wire	[(IW-1):0]	i_sample;
15 | 	output	wire	          	o_ce;
16 | 	output	wire	[(OW-1):0]	o_result;
17 | 
18 | 	slowfil #(.FIXED_TAPS(1), .NTAPS(NTAPS), .IW(IW), .TW(TW), .INITIAL_COEFFS("taps.hex")) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_ce(o_ce), .o_result(o_result));
19 | endmodule
20 | 
21 | `include "slowfil.vh"
22 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/slowfil_srl.vh:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////////////////
  2 | //
  3 | // Filename: 	slowfil_srl.v
  4 | //
  5 | // Project:	DSP Filtering Example Project
  6 | //
  7 | // Purpose:	Unlike fastfir.v and genericfir.v, both of which require one
  8 | //		hardware multiply element per tap, this slowfil design requires
  9 | //	only one multiply element in total.  It is useful for those times and
 10 | //	cases when there are fewer taps than there are clock intervals between
 11 | //	incoming samples.  In all other respects, however, it remains quite
 12 | //	generic.
 13 | //
 14 | // Creator:	Dan Gisselquist, Ph.D.
 15 | //		Gisselquist Technology, LLC
 16 | //
 17 | // Note: This is a modified version of slowfil.v by Dan Gisselquist that
 18 | //       uses a shift-register based approach, over a memory-based one.
 19 | //
 20 | ////////////////////////////////////////////////////////////////////////////////
 21 | //
 22 | // Copyright (C) 2017-2019, Gisselquist Technology, LLC
 23 | //
 24 | // This file is part of the DSP filtering set of designs.
 25 | //
 26 | // The DSP filtering designs are free RTL designs: you can redistribute them
 27 | // and/or modify any of them under the terms of the GNU Lesser General Public
 28 | // License as published by the Free Software Foundation, either version 3 of
 29 | // the License, or (at your option) any later version.
 30 | //
 31 | // The DSP filtering designs are distributed in the hope that they will be
 32 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
 33 | // MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
 34 | // General Public License for more details.
 35 | //
 36 | // You should have received a copy of the GNU Lesser General Public License
 37 | // along with these designs.  (It's in the $(ROOT)/doc directory.  Run make
 38 | // with no target there if the PDF file isn't present.)  If not, see
 39 | // <http://www.gnu.org/licenses/> for a copy.
 40 | //
 41 | // License:	LGPL, v3, as defined and found on www.gnu.org,
 42 | //		http://www.gnu.org/licenses/lgpl.html
 43 | //
 44 | ////////////////////////////////////////////////////////////////////////////////
 45 | //
 46 | //
 47 | `default_nettype	none
 48 | //
 49 | module	slowfil_srl(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_ce, o_result);
 50 | 	parameter	LGNTAPS = 7, IW=16, TW=16, OW = IW+TW+LGNTAPS;
 51 | 	parameter	[LGNTAPS:0]	NTAPS = 110; // (1<<LGNTAPS);
 52 | 	parameter	[0:0]		FIXED_TAPS = 1'b0;
 53 | 	parameter			INITIAL_COEFFS  = "";
 54 | 	localparam	MEMSZ = (1<<LGNTAPS);
 55 | 	//
 56 | 	// Control inputs (wires)
 57 | 	input	wire		i_clk, i_reset;
 58 | 	//
 59 | 	// Coefficient control -- allows you to update coefficients in the
 60 | 	// filter
 61 | 	input	wire			i_tap_wr;
 62 | 	input	wire	[(TW-1):0]	i_tap;
 63 | 	//
 64 | 	// New sample input(s)--a new sample comes in any time i_ce is true.
 65 | 	// There must be at least NTAPS idle's between every pair of valid
 66 | 	// i_ce's.
 67 | 	input	wire			i_ce;
 68 | 	input	wire	[(IW-1):0]	i_sample;
 69 | 	//
 70 | 	// The output--valid any time o_ce is true.  Since it only changes
 71 | 	// once per interval, you can ignore the o_ce line if you choose and
 72 | 	// just use i_ce.
 73 | 	output	reg			o_ce;
 74 | 	output	reg	[(OW-1):0]	o_result;
 75 | 	//
 76 | 	//
 77 | 
 78 | 	reg	[(TW-1):0]	tapmem	[0:(MEMSZ-1)];	// Coef memory
 79 | 	reg signed [(TW-1):0]	tap;		// Value read from coef memory
 80 | 
 81 | 	reg	[(LGNTAPS-1):0]	tidx;		// Coefficient read index
 82 | 	reg	[(MEMSZ-1):0]		dsrl	[(IW-1):0];	// Data memory
 83 | 	reg signed [(IW-1):0]	data;		// Data value read from memory
 84 | 
 85 | 	// Traveling CE values
 86 | 	reg	d_ce, p_ce, m_ce;
 87 | 	//
 88 | 	// The product and accumulator values for the filter
 89 | 	reg	signed [(IW+TW-1):0]	product;
 90 | 	reg	signed [(OW-1):0]	r_acc;
 91 | 
 92 | 	//
 93 | 	//
 94 | 	// Allow the user to set the taps
 95 | 	//
 96 | 	//
 97 | 
 98 | 	// Starting at zero on reset, increment the tap write index on any
 99 | 	// write of a new tap.  This also means that changing coefficients
100 | 	// will require a reset.
101 | 	generate if (FIXED_TAPS)
102 | 	begin : FIXED_TAP_READMEM
103 | 		initial $readmemh(INITIAL_COEFFS, tapmem);
104 | 
105 | 		// Make Verilators -Wall happy
106 | 		// Verilator lint_off UNUSED
107 | 		wire	[TW:0]	ignored_inputs;
108 | 		assign	ignored_inputs = { i_tap_wr, i_tap };
109 | 		// Verilator lint_on  UNUSED
110 | 	end else begin : SET_DYNAMIC_TAP_VALUES
111 | 		// Coef memory write index
112 | 		reg	[(LGNTAPS-1):0]	tapwidx;
113 | 
114 | 		initial	tapwidx = 0; // NTAPS[LGNTAPS-1:0]-1;
115 | 		always @(posedge i_clk)
116 | 			if(i_reset)
117 | 				tapwidx <= 0; // NTAPS[LGNTAPS-1:0]-1;
118 | 			else if (i_tap_wr)
119 | 				tapwidx <= tapwidx + 1'b1;
120 | 
121 | 		if (INITIAL_COEFFS != 0)
122 | 			initial $readmemh(INITIAL_COEFFS, tapmem);
123 | 		always @(posedge i_clk)
124 | 			if (i_tap_wr)
125 | 				tapmem[tapwidx] <= i_tap;
126 | 	end endgenerate
127 | 
128 | 
129 | 	//
130 | 	//
131 | 	// Record the incoming data into a local memory
132 | 	//
133 | 	//
134 | 
135 | 	// Notice how this data writing section is *independent* of the reset,
136 | 	// depending only upon new sample data.
137 | 	generate
138 | 		genvar i;
139 | 		for (i = 0; i < IW; i=i+1) begin
140 | 			always @(posedge i_clk)
141 | 				if (i_ce)
142 | 					dsrl[i] <= { dsrl[i][(MEMSZ-2):0], i_sample[i] };
143 | 		end
144 | 	endgenerate
145 | 
146 | 	//
147 | 	//
148 | 	// Calculate the indexes of the filter table
149 | 	//
150 | 	//
151 | 
152 | 	// Determine if the next clock (not this one) will contain the last
153 | 	// valid index, and so whether or not we need to stop.
154 | 	wire	last_tap_index;
155 | 	assign	last_tap_index = (NTAPS[LGNTAPS-1:0]-tidx <= 1);
156 | 	// The pre_acc_ce traveling CE values keep track of when the
157 | 	// results of reading memory are valid at the accumulation section
158 | 	// of this code later on.
159 | 	reg	[2:0]	pre_acc_ce;
160 | 	initial	pre_acc_ce = 3'h0;
161 | 	always @(posedge i_clk)
162 | 		if (i_reset)
163 | 			pre_acc_ce[0] <= 1'b0;
164 | 		else if (i_ce)
165 | 			pre_acc_ce[0] <= 1'b1;
166 | 		else if ((pre_acc_ce[0])&&(!last_tap_index))
167 | 			pre_acc_ce[0] <= 1'b1;
168 | 		else
169 | 			pre_acc_ce[0] <= 1'b0;
170 | 	// pre_acc_ce[0] means that the tap index is valid
171 | 	// pre_acc_ce[1] means that the tap value is valid
172 | 	// pre_acc_ce[2] means that the product is valid
173 | 
174 | 	always @(posedge i_clk)
175 | 		if (i_reset)
176 | 			pre_acc_ce[2:1] <= 2'b0;
177 | 		else
178 | 			pre_acc_ce[2:1] <= pre_acc_ce[1:0];
179 | 
180 | 	initial	tidx = 0;
181 | 	always @(posedge i_clk)
182 | 		if (i_ce)
183 | 		begin
184 | 			tidx <= 0;
185 | 		end else begin
186 | 			tidx <= tidx + 1'b1;
187 | 		end
188 | 
189 | 	// m_ce is valid when the first index is valid
190 | 	initial	m_ce = 1'b0;
191 | 	always @(posedge i_clk)
192 | 		m_ce <= (i_ce)&&(!i_reset);
193 | 
194 | 	//
195 | 	//
196 | 	// Read from memory cycle
197 | 	//
198 | 	//
199 | 	initial	tap = 0;
200 | 	always @(posedge i_clk)
201 | 		tap <= tapmem[tidx[(LGNTAPS-1):0]];
202 | 
203 | 	initial	data = 0;
204 | 	generate
205 | 		for (i=0; i < IW; i=i+1) begin
206 | 			always @(posedge i_clk)
207 | 				data[i] <= dsrl[i][tidx[(LGNTAPS-1):0]];
208 | 		end
209 | 	endgenerate
210 | 
211 | 	// d_ce is valid when the first data from memory is read/valid
212 | 	initial	d_ce = 0;
213 | 	always @(posedge i_clk)
214 | 		d_ce <= (m_ce)&&(!i_reset);
215 | 
216 | 	//
217 | 	// Apply the product to the tap and data just read
218 | 	//
219 | 	// p_ce is valid on the first valid product
220 | 	initial	p_ce = 1'b0;
221 | 	always @(posedge i_clk)
222 | 		p_ce <= (d_ce)&&(!i_reset);
223 | 
224 | 	initial	product = 0;
225 | 	always @(posedge i_clk)
226 | 		product <= tap * data;
227 | 
228 | 	initial	r_acc = 0;
229 | 	always @(posedge i_clk)
230 | 		if (p_ce)
231 | 			r_acc <={ {(OW-(IW+TW)){product[(IW+TW-1)]}}, product };
232 | 		else if (pre_acc_ce[2])
233 | 			r_acc <= r_acc + { {(OW-(IW+TW)){product[(IW+TW-1)]}},
234 | 						product };
235 | 
236 | 	//
237 | 	//
238 | 	// Copy the result to the output
239 | 	//
240 | 	//
241 | 	initial	o_result = 0;
242 | 	always @(posedge i_clk)
243 | 		if (p_ce)
244 | 			o_result <= r_acc;
245 | 
246 | 	initial	o_ce = 1'b0;
247 | 	always @(posedge i_clk)
248 | 		o_ce <= (p_ce)&&(!i_reset);
249 | endmodule
250 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/slowfil_srl_fixedtaps.v:
--------------------------------------------------------------------------------
 1 | module  slowfil_srl_fixedtaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_ce, o_result);
 2 | `ifdef	FORMAL
 3 | 	parameter		NTAPS=16, IW=9, TW=IW, OW=2*IW+5;
 4 | `else
 5 | 	parameter		NTAPS=128, IW=12, TW=IW, OW=2*IW+7;
 6 | `endif
 7 | 	parameter [0:0]		FIXED_TAPS=0;
 8 | 	input	wire			i_clk, i_reset;
 9 | 	//
10 | 	input	wire			i_tap_wr;	// Ignored if FIXED_TAPS
11 | 	input	wire	[(TW-1):0]	i_tap;		// Ignored if FIXED_TAPS
12 | 	//
13 | 	input	wire			i_ce;
14 | 	input	wire	[(IW-1):0]	i_sample;
15 | 	output	wire	          	o_ce;
16 | 	output	wire	[(OW-1):0]	o_result;
17 | 
18 | 	// This is a modified variant of the original slowfil that uses a
19 | 	// shift-register approach to store all input samples
20 | 	slowfil_srl #(.FIXED_TAPS(1), .NTAPS(NTAPS), .IW(IW), .TW(TW), .INITIAL_COEFFS("taps.hex")) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_ce(o_ce), .o_result(o_result));
21 | endmodule
22 | 
23 | `include "slowfil_srl.vh"
24 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/dspfilters/taps.hex:
--------------------------------------------------------------------------------
  1 | // 128-tap band-pass filter
  2 | //   (0-200Hz: -119.27dB; 300-500Hz: 0.00dB; 600-1000Hz: -119.27dB)
  3 | //   Coefficients from http://t-filter.engineerjs.com
  4 | 0
  5 | 0
  6 | 0
  7 | 0
  8 | 0
  9 | 0
 10 | 0
 11 | 0
 12 | 0
 13 | 0
 14 | 0
 15 | 1
 16 | 0
 17 | fff
 18 | 0
 19 | 0
 20 | fff
 21 | 0
 22 | 2
 23 | 2
 24 | fff
 25 | fff
 26 | 0
 27 | ffe
 28 | ffd
 29 | 2
 30 | 6
 31 | 2
 32 | ffd
 33 | 0
 34 | ffe
 35 | ff7
 36 | ffc
 37 | c
 38 | a
 39 | ffe
 40 | 1
 41 | 3
 42 | ff0
 43 | feb
 44 | 9
 45 | 18
 46 | 4
 47 | 2
 48 | 11
 49 | ff3
 50 | fcc
 51 | fef
 52 | 25
 53 | 13
 54 | 3
 55 | 30
 56 | 19
 57 | fa7
 58 | fa3
 59 | 1e
 60 | 2e
 61 | ff6
 62 | 6d
 63 | ca
 64 | f8e
 65 | e29
 66 | f57
 67 | 1de
 68 | 1de
 69 | f57
 70 | e29
 71 | f8e
 72 | ca
 73 | 6d
 74 | ff6
 75 | 2e
 76 | 1e
 77 | fa3
 78 | fa7
 79 | 19
 80 | 30
 81 | 3
 82 | 13
 83 | 25
 84 | fef
 85 | fcc
 86 | ff3
 87 | 11
 88 | 2
 89 | 4
 90 | 18
 91 | 9
 92 | feb
 93 | ff0
 94 | 3
 95 | 1
 96 | ffe
 97 | a
 98 | c
 99 | ffc
100 | ff7
101 | ffe
102 | 0
103 | ffd
104 | 2
105 | 6
106 | 2
107 | ffd
108 | ffe
109 | 0
110 | fff
111 | fff
112 | 2
113 | 2
114 | 0
115 | fff
116 | 0
117 | 0
118 | fff
119 | 0
120 | 1
121 | 0
122 | 0
123 | 0
124 | 0
125 | 0
126 | 0
127 | 0
128 | 0
129 | 0
130 | 0
131 | 0
132 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/ethernet/.gitignore:
--------------------------------------------------------------------------------
1 | verilog-ethernet
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/ethernet/README.md:
--------------------------------------------------------------------------------
1 | # Verilog Ethernet Components
2 | 
3 | Source: https://github.com/alexforencich/verilog-ethernet
4 | 
5 | Currently, just one design:
6 | 
7 | - **udp_64 module:** UDP block with 64 bit data width for 10G 
8 |   Ethernet. Manages UDP packet transmssion and reception.
9 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/ethernet/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import os, subprocess
4 | if not os.path.isdir('verilog-ethernet'):
5 |     subprocess.run(['git', 'clone', 'https://github.com/alexforencich/verilog-ethernet'])
6 | subprocess.run(['git', 'reset', '--hard', '696c634726da5d8a80393089417362823c065492'], cwd='verilog-ethernet')
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/ethernet/udp_complete_64_top.v:
--------------------------------------------------------------------------------
  1 | module udp_complete_64_top #(
  2 |     parameter ARP_CACHE_ADDR_WIDTH = 9,
  3 |     parameter ARP_REQUEST_RETRY_COUNT = 4,
  4 |     parameter ARP_REQUEST_RETRY_INTERVAL = 125000000*2,
  5 |     parameter ARP_REQUEST_TIMEOUT = 125000000*30,
  6 |     parameter UDP_CHECKSUM_GEN_ENABLE = 1,
  7 |     parameter UDP_CHECKSUM_PAYLOAD_FIFO_ADDR_WIDTH = 11,
  8 |     parameter UDP_CHECKSUM_HEADER_FIFO_ADDR_WIDTH = 3
  9 | )
 10 | (
 11 |     input  wire        clk,
 12 |     input  wire        rst,
 13 |     
 14 |     /*
 15 |      * Ethernet frame input
 16 |      */
 17 |     input  wire        s_eth_hdr_valid,
 18 |     output wire        s_eth_hdr_ready,
 19 |     input  wire [47:0] s_eth_dest_mac,
 20 |     input  wire [47:0] s_eth_src_mac,
 21 |     input  wire [15:0] s_eth_type,
 22 |     input  wire [63:0] s_eth_payload_axis_tdata,
 23 |     input  wire [7:0]  s_eth_payload_axis_tkeep,
 24 |     input  wire        s_eth_payload_axis_tvalid,
 25 |     output wire        s_eth_payload_axis_tready,
 26 |     input  wire        s_eth_payload_axis_tlast,
 27 |     input  wire        s_eth_payload_axis_tuser,
 28 |     
 29 |     /*
 30 |      * Ethernet frame output
 31 |      */
 32 |     output wire        m_eth_hdr_valid,
 33 |     input  wire        m_eth_hdr_ready,
 34 |     output wire [47:0] m_eth_dest_mac,
 35 |     output wire [47:0] m_eth_src_mac,
 36 |     output wire [15:0] m_eth_type,
 37 |     output wire [63:0] m_eth_payload_axis_tdata,
 38 |     output wire [7:0]  m_eth_payload_axis_tkeep,
 39 |     output wire        m_eth_payload_axis_tvalid,
 40 |     input  wire        m_eth_payload_axis_tready,
 41 |     output wire        m_eth_payload_axis_tlast,
 42 |     output wire        m_eth_payload_axis_tuser,
 43 |     
 44 |     /*
 45 |      * IP input
 46 |      */
 47 |     input  wire        s_ip_hdr_valid,
 48 |     output wire        s_ip_hdr_ready,
 49 |     input  wire [5:0]  s_ip_dscp,
 50 |     input  wire [1:0]  s_ip_ecn,
 51 |     input  wire [15:0] s_ip_length,
 52 |     input  wire [7:0]  s_ip_ttl,
 53 |     input  wire [7:0]  s_ip_protocol,
 54 |     input  wire [31:0] s_ip_source_ip,
 55 |     input  wire [31:0] s_ip_dest_ip,
 56 |     input  wire [63:0] s_ip_payload_axis_tdata,
 57 |     input  wire [7:0]  s_ip_payload_axis_tkeep,
 58 |     input  wire        s_ip_payload_axis_tvalid,
 59 |     output wire        s_ip_payload_axis_tready,
 60 |     input  wire        s_ip_payload_axis_tlast,
 61 |     input  wire        s_ip_payload_axis_tuser,
 62 |     
 63 |     /*
 64 |      * IP output
 65 |      */
 66 |     output wire        m_ip_hdr_valid,
 67 |     input  wire        m_ip_hdr_ready,
 68 |     output wire [47:0] m_ip_eth_dest_mac,
 69 |     output wire [47:0] m_ip_eth_src_mac,
 70 |     output wire [15:0] m_ip_eth_type,
 71 |     output wire [3:0]  m_ip_version,
 72 |     output wire [3:0]  m_ip_ihl,
 73 |     output wire [5:0]  m_ip_dscp,
 74 |     output wire [1:0]  m_ip_ecn,
 75 |     output wire [15:0] m_ip_length,
 76 |     output wire [15:0] m_ip_identification,
 77 |     output wire [2:0]  m_ip_flags,
 78 |     output wire [12:0] m_ip_fragment_offset,
 79 |     output wire [7:0]  m_ip_ttl,
 80 |     output wire [7:0]  m_ip_protocol,
 81 |     output wire [15:0] m_ip_header_checksum,
 82 |     output wire [31:0] m_ip_source_ip,
 83 |     output wire [31:0] m_ip_dest_ip,
 84 |     output wire [63:0] m_ip_payload_axis_tdata,
 85 |     output wire [7:0]  m_ip_payload_axis_tkeep,
 86 |     output wire        m_ip_payload_axis_tvalid,
 87 |     input  wire        m_ip_payload_axis_tready,
 88 |     output wire        m_ip_payload_axis_tlast,
 89 |     output wire        m_ip_payload_axis_tuser,
 90 |     
 91 |     /*
 92 |      * UDP input
 93 |      */
 94 |     input  wire        s_udp_hdr_valid,
 95 |     output wire        s_udp_hdr_ready,
 96 |     input  wire [5:0]  s_udp_ip_dscp,
 97 |     input  wire [1:0]  s_udp_ip_ecn,
 98 |     input  wire [7:0]  s_udp_ip_ttl,
 99 |     input  wire [31:0] s_udp_ip_source_ip,
100 |     input  wire [31:0] s_udp_ip_dest_ip,
101 |     input  wire [15:0] s_udp_source_port,
102 |     input  wire [15:0] s_udp_dest_port,
103 |     input  wire [15:0] s_udp_length,
104 |     input  wire [15:0] s_udp_checksum,
105 |     input  wire [63:0] s_udp_payload_axis_tdata,
106 |     input  wire [7:0]  s_udp_payload_axis_tkeep,
107 |     input  wire        s_udp_payload_axis_tvalid,
108 |     output wire        s_udp_payload_axis_tready,
109 |     input  wire        s_udp_payload_axis_tlast,
110 |     input  wire        s_udp_payload_axis_tuser,
111 |     
112 |     /*
113 |      * UDP output
114 |      */
115 |     output wire        m_udp_hdr_valid,
116 |     input  wire        m_udp_hdr_ready,
117 |     output wire [47:0] m_udp_eth_dest_mac,
118 |     output wire [47:0] m_udp_eth_src_mac,
119 |     output wire [15:0] m_udp_eth_type,
120 |     output wire [3:0]  m_udp_ip_version,
121 |     output wire [3:0]  m_udp_ip_ihl,
122 |     output wire [5:0]  m_udp_ip_dscp,
123 |     output wire [1:0]  m_udp_ip_ecn,
124 |     output wire [15:0] m_udp_ip_length,
125 |     output wire [15:0] m_udp_ip_identification,
126 |     output wire [2:0]  m_udp_ip_flags,
127 |     output wire [12:0] m_udp_ip_fragment_offset,
128 |     output wire [7:0]  m_udp_ip_ttl,
129 |     output wire [7:0]  m_udp_ip_protocol,
130 |     output wire [15:0] m_udp_ip_header_checksum,
131 |     output wire [31:0] m_udp_ip_source_ip,
132 |     output wire [31:0] m_udp_ip_dest_ip,
133 |     output wire [15:0] m_udp_source_port,
134 |     output wire [15:0] m_udp_dest_port,
135 |     output wire [15:0] m_udp_length,
136 |     output wire [15:0] m_udp_checksum,
137 |     output wire [63:0] m_udp_payload_axis_tdata,
138 |     output wire [7:0]  m_udp_payload_axis_tkeep,
139 |     output wire        m_udp_payload_axis_tvalid,
140 |     input  wire        m_udp_payload_axis_tready,
141 |     output wire        m_udp_payload_axis_tlast,
142 |     output wire        m_udp_payload_axis_tuser,
143 | 
144 |     /*
145 |      * Status
146 |      */
147 |     output wire        ip_rx_busy,
148 |     output wire        ip_tx_busy,
149 |     output wire        udp_rx_busy,
150 |     output wire        udp_tx_busy,
151 |     output wire        ip_rx_error_header_early_termination,
152 |     output wire        ip_rx_error_payload_early_termination,
153 |     output wire        ip_rx_error_invalid_header,
154 |     output wire        ip_rx_error_invalid_checksum,
155 |     output wire        ip_tx_error_payload_early_termination,
156 |     output wire        ip_tx_error_arp_failed,
157 |     output wire        udp_rx_error_header_early_termination,
158 |     output wire        udp_rx_error_payload_early_termination,
159 |     output wire        udp_tx_error_payload_early_termination,
160 | 
161 |     /*
162 |      * Configuration
163 |      */
164 |     input  wire [47:0] local_mac,
165 |     input  wire [31:0] local_ip,
166 |     input  wire [31:0] gateway_ip,
167 |     input  wire [31:0] subnet_mask,
168 |     input  wire        clear_arp_cache
169 | );
170 | udp_complete_64 #(
171 |     .ARP_CACHE_ADDR_WIDTH(ARP_CACHE_ADDR_WIDTH),
172 |     .ARP_REQUEST_RETRY_COUNT(ARP_REQUEST_RETRY_COUNT),
173 |     .ARP_REQUEST_RETRY_INTERVAL(ARP_REQUEST_RETRY_INTERVAL),
174 |     .ARP_REQUEST_TIMEOUT(ARP_REQUEST_TIMEOUT),
175 |     .UDP_CHECKSUM_GEN_ENABLE(UDP_CHECKSUM_GEN_ENABLE),
176 |     .UDP_CHECKSUM_PAYLOAD_FIFO_ADDR_WIDTH(UDP_CHECKSUM_PAYLOAD_FIFO_ADDR_WIDTH),
177 |     .UDP_CHECKSUM_HEADER_FIFO_ADDR_WIDTH(UDP_CHECKSUM_HEADER_FIFO_ADDR_WIDTH)
178 | ) top
179 | (
180 |     .clk(clk),
181 |     .rst(rst),
182 |     
183 |     .s_eth_hdr_valid(s_eth_hdr_valid),
184 |     .s_eth_hdr_ready(s_eth_hdr_ready),
185 |     .s_eth_dest_mac(s_eth_dest_mac),
186 |     .s_eth_src_mac(s_eth_src_mac),
187 |     .s_eth_type(s_eth_type),
188 |     .s_eth_payload_axis_tdata(s_eth_payload_axis_tdata),
189 |     .s_eth_payload_axis_tkeep(s_eth_payload_axis_tkeep),
190 |     .s_eth_payload_axis_tvalid(s_eth_payload_axis_tvalid),
191 |     .s_eth_payload_axis_tready(s_eth_payload_axis_tready),
192 |     .s_eth_payload_axis_tlast(s_eth_payload_axis_tlast),
193 |     .s_eth_payload_axis_tuser(s_eth_payload_axis_tuser),
194 |     
195 |     .m_eth_hdr_valid(m_eth_hdr_valid),
196 |     .m_eth_hdr_ready(m_eth_hdr_ready),
197 |     .m_eth_dest_mac(m_eth_dest_mac),
198 |     .m_eth_src_mac(m_eth_src_mac),
199 |     .m_eth_type(m_eth_type),
200 |     .m_eth_payload_axis_tdata(m_eth_payload_axis_tdata),
201 |     .m_eth_payload_axis_tkeep(m_eth_payload_axis_tkeep),
202 |     .m_eth_payload_axis_tvalid(m_eth_payload_axis_tvalid),
203 |     .m_eth_payload_axis_tready(m_eth_payload_axis_tready),
204 |     .m_eth_payload_axis_tlast(m_eth_payload_axis_tlast),
205 |     .m_eth_payload_axis_tuser(m_eth_payload_axis_tuser),
206 |     
207 |     .s_ip_hdr_valid(s_ip_hdr_valid),
208 |     .s_ip_hdr_ready(s_ip_hdr_ready),
209 |     .s_ip_dscp(s_ip_dscp),
210 |     .s_ip_ecn(s_ip_ecn),
211 |     .s_ip_length(s_ip_length),
212 |     .s_ip_ttl(s_ip_ttl),
213 |     .s_ip_protocol(s_ip_protocol),
214 |     .s_ip_source_ip(s_ip_source_ip),
215 |     .s_ip_dest_ip(s_ip_dest_ip),
216 |     .s_ip_payload_axis_tdata(s_ip_payload_axis_tdata),
217 |     .s_ip_payload_axis_tkeep(s_ip_payload_axis_tkeep),
218 |     .s_ip_payload_axis_tvalid(s_ip_payload_axis_tvalid),
219 |     .s_ip_payload_axis_tready(s_ip_payload_axis_tready),
220 |     .s_ip_payload_axis_tlast(s_ip_payload_axis_tlast),
221 |     .s_ip_payload_axis_tuser(s_ip_payload_axis_tuser),
222 |     
223 |     .m_ip_hdr_valid(m_ip_hdr_valid),
224 |     .m_ip_hdr_ready(m_ip_hdr_ready),
225 |     .m_ip_eth_dest_mac(m_ip_eth_dest_mac),
226 |     .m_ip_eth_src_mac(m_ip_eth_src_mac),
227 |     .m_ip_eth_type(m_ip_eth_type),
228 |     .m_ip_version(m_ip_version),
229 |     .m_ip_ihl(m_ip_ihl),
230 |     .m_ip_dscp(m_ip_dscp),
231 |     .m_ip_ecn(m_ip_ecn),
232 |     .m_ip_length(m_ip_length),
233 |     .m_ip_identification(m_ip_identification),
234 |     .m_ip_flags(m_ip_flags),
235 |     .m_ip_fragment_offset(m_ip_fragment_offset),
236 |     .m_ip_ttl(m_ip_ttl),
237 |     .m_ip_protocol(m_ip_protocol),
238 |     .m_ip_header_checksum(m_ip_header_checksum),
239 |     .m_ip_source_ip(m_ip_source_ip),
240 |     .m_ip_dest_ip(m_ip_dest_ip),
241 |     .m_ip_payload_axis_tdata(m_ip_payload_axis_tdata),
242 |     .m_ip_payload_axis_tkeep(m_ip_payload_axis_tkeep),
243 |     .m_ip_payload_axis_tvalid(m_ip_payload_axis_tvalid),
244 |     .m_ip_payload_axis_tready(m_ip_payload_axis_tready),
245 |     .m_ip_payload_axis_tlast(m_ip_payload_axis_tlast),
246 |     .m_ip_payload_axis_tuser(m_ip_payload_axis_tuser),
247 |     
248 |     .s_udp_hdr_valid(s_udp_hdr_valid),
249 |     .s_udp_hdr_ready(s_udp_hdr_ready),
250 |     .s_udp_ip_dscp(s_udp_ip_dscp),
251 |     .s_udp_ip_ecn(s_udp_ip_ecn),
252 |     .s_udp_ip_ttl(s_udp_ip_ttl),
253 |     .s_udp_ip_source_ip(s_udp_ip_source_ip),
254 |     .s_udp_ip_dest_ip(s_udp_ip_dest_ip),
255 |     .s_udp_source_port(s_udp_source_port),
256 |     .s_udp_dest_port(s_udp_dest_port),
257 |     .s_udp_length(s_udp_length),
258 |     .s_udp_checksum(s_udp_checksum),
259 |     .s_udp_payload_axis_tdata(s_udp_payload_axis_tdata),
260 |     .s_udp_payload_axis_tkeep(s_udp_payload_axis_tkeep),
261 |     .s_udp_payload_axis_tvalid(s_udp_payload_axis_tvalid),
262 |     .s_udp_payload_axis_tready(s_udp_payload_axis_tready),
263 |     .s_udp_payload_axis_tlast(s_udp_payload_axis_tlast),
264 |     .s_udp_payload_axis_tuser(s_udp_payload_axis_tuser),
265 |     
266 |     .m_udp_hdr_valid(m_udp_hdr_valid),
267 |     .m_udp_hdr_ready(m_udp_hdr_ready),
268 |     .m_udp_eth_dest_mac(m_udp_eth_dest_mac),
269 |     .m_udp_eth_src_mac(m_udp_eth_src_mac),
270 |     .m_udp_eth_type(m_udp_eth_type),
271 |     .m_udp_ip_version(m_udp_ip_version),
272 |     .m_udp_ip_ihl(m_udp_ip_ihl),
273 |     .m_udp_ip_dscp(m_udp_ip_dscp),
274 |     .m_udp_ip_ecn(m_udp_ip_ecn),
275 |     .m_udp_ip_length(m_udp_ip_length),
276 |     .m_udp_ip_identification(m_udp_ip_identification),
277 |     .m_udp_ip_flags(m_udp_ip_flags),
278 |     .m_udp_ip_fragment_offset(m_udp_ip_fragment_offset),
279 |     .m_udp_ip_ttl(m_udp_ip_ttl),
280 |     .m_udp_ip_protocol(m_udp_ip_protocol),
281 |     .m_udp_ip_header_checksum(m_udp_ip_header_checksum),
282 |     .m_udp_ip_source_ip(m_udp_ip_source_ip),
283 |     .m_udp_ip_dest_ip(m_udp_ip_dest_ip),
284 |     .m_udp_source_port(m_udp_source_port),
285 |     .m_udp_dest_port(m_udp_dest_port),
286 |     .m_udp_length(m_udp_length),
287 |     .m_udp_checksum(m_udp_checksum),
288 |     .m_udp_payload_axis_tdata(m_udp_payload_axis_tdata),
289 |     .m_udp_payload_axis_tkeep(m_udp_payload_axis_tkeep),
290 |     .m_udp_payload_axis_tvalid(m_udp_payload_axis_tvalid),
291 |     .m_udp_payload_axis_tready(m_udp_payload_axis_tready),
292 |     .m_udp_payload_axis_tlast(m_udp_payload_axis_tlast),
293 |     .m_udp_payload_axis_tuser(m_udp_payload_axis_tuser),
294 | 
295 |     .ip_rx_busy(ip_rx_busy),
296 |     .ip_tx_busy(ip_tx_busy),
297 |     .udp_rx_busy(udp_rx_busy),
298 |     .udp_tx_busy(udp_tx_busy),
299 |     .ip_rx_error_header_early_termination(ip_rx_error_header_early_termination),
300 |     .ip_rx_error_payload_early_termination(ip_rx_error_payload_early_termination),
301 |     .ip_rx_error_invalid_header(ip_rx_error_invalid_header),
302 |     .ip_rx_error_invalid_checksum(ip_rx_error_invalid_checksum),
303 |     .ip_tx_error_payload_early_termination(ip_tx_error_payload_early_termination),
304 |     .ip_tx_error_arp_failed(ip_tx_error_arp_failed),
305 |     .udp_rx_error_header_early_termination(udp_rx_error_header_early_termination),
306 |     .udp_rx_error_payload_early_termination(udp_rx_error_payload_early_termination),
307 |     .udp_tx_error_payload_early_termination(udp_tx_error_payload_early_termination),
308 | 
309 |     .local_mac(local_mac),
310 |     .local_ip(local_ip),
311 |     .gateway_ip(gateway_ip),
312 |     .subnet_mask(subnet_mask),
313 |     .clear_arp_cache(clear_arp_cache)
314 | );
315 | endmodule
316 | 
317 | `include "verilog-ethernet/rtl/udp_complete_64.v"
318 | `include "verilog-ethernet/rtl/udp_64.v"
319 | `include "verilog-ethernet/rtl/ip_complete_64.v"
320 | `include "verilog-ethernet/rtl/ip_arb_mux.v"
321 | `include "verilog-ethernet/rtl/../lib/axis/rtl/arbiter.v"
322 | `include "verilog-ethernet/rtl/arp_64.v"
323 | `include "verilog-ethernet/rtl/ip_64.v"
324 | `include "verilog-ethernet/rtl/eth_arb_mux.v"
325 | `include "verilog-ethernet/rtl/udp_checksum_gen_64.v"
326 | `include "verilog-ethernet/rtl/udp_ip_tx_64.v"
327 | `include "verilog-ethernet/rtl/udp_ip_rx_64.v"
328 | `include "verilog-ethernet/lib/axis/rtl/axis_fifo.v"
329 | `include "verilog-ethernet/rtl/ip_eth_tx_64.v"
330 | `include "verilog-ethernet/rtl/ip_eth_rx_64.v"
331 | `include "verilog-ethernet/rtl/arp_cache.v"
332 | `include "verilog-ethernet/rtl/arp_eth_tx_64.v"
333 | `include "verilog-ethernet/rtl/arp_eth_rx_64.v"
334 | `include "verilog-ethernet/lib/axis/rtl/priority_encoder.v"
335 | `include "verilog-ethernet/rtl/lfsr.v"
336 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/marlann/README.md:
--------------------------------------------------------------------------------
1 | # MARLANN - Multiply-Accumulate and Rectified-Linear Accelerator for Neural Networks
2 | 
3 | Source: https://github.com/SymbioticEDA/MARLANN
4 | 
5 | Currently, `marlann_compute.v` is a modified version of the `rtl/compute.v` design
6 | found upstream but with direct instantiations removed in favour of generic RTL for
7 | inference.
8 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/marlann/marlann_compute.v:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (C) 2018  Clifford Wolf <clifford@symbioticeda.com>
  3 |  *
  4 |  *  Permission to use, copy, modify, and/or distribute this software for any
  5 |  *  purpose with or without fee is hereby granted, provided that the above
  6 |  *  copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  *
 16 |  */
 17 | 
 18 | module marlann_compute #(
 19 | 	parameter integer NB = 2,
 20 | 	parameter integer CODE_SIZE = 512,
 21 | 	parameter integer COEFF_SIZE = 512
 22 | ) (
 23 | 	input         clock,
 24 | 	input         reset,
 25 | 	output        busy,
 26 | 
 27 | 	input         cmd_valid,
 28 | 	output        cmd_ready,
 29 | 	input  [31:0] cmd_insn,
 30 | 
 31 | 	output        mem_ren,
 32 | 	output [ 7:0] mem_wen,
 33 | 	output [15:0] mem_addr,
 34 | 	output [63:0] mem_wdata,
 35 | 	input  [63:0] mem_rdata,
 36 | 
 37 | 	output        tick_simd,
 38 | 	output        tick_nosimd
 39 | );
 40 | 	integer i;
 41 | 
 42 | 	reg [31:0] code_mem [0:CODE_SIZE-1];
 43 | 	reg [64*NB-1:0] coeff_mem [0:COEFF_SIZE-1];
 44 | 
 45 | 	reg [31:0] acc0, acc1;
 46 | 
 47 | 	reg [16:0] VBP, LBP, SBP;
 48 | 	reg [ 8:0] CBP;
 49 | 
 50 | 	reg        mem_rd0_en;
 51 | 	reg [15:0] mem_rd0_addr;
 52 | 
 53 | 	reg        mem_rd1_en;
 54 | 	reg [15:0] mem_rd1_addr;
 55 | 
 56 | 	reg [ 7:0] mem_wr_en;
 57 | 	reg [15:0] mem_wr_addr;
 58 | 	reg [63:0] mem_wr_wdata;
 59 | 
 60 | 	assign mem_ren = mem_rd0_en || mem_rd1_en;
 61 | 	assign mem_wen = mem_wr_en;
 62 | 	assign mem_addr = ({16{mem_rd0_en}} & mem_rd0_addr) | ({16{mem_rd1_en}} & mem_rd1_addr) | ({16{|mem_wr_en}} & mem_wr_addr);
 63 | 	assign mem_wdata = mem_wr_wdata;
 64 | 
 65 | 	wire [16:0] cmd_insn_maddr = cmd_insn[31:15];
 66 | 	wire [8:0] cmd_insn_caddr = cmd_insn[14:6];
 67 | 	wire [5:0] cmd_insn_opcode = cmd_insn[5:0];
 68 | 
 69 | 
 70 | 	/**** staging ****/
 71 | 
 72 | 	reg                 s1_en;
 73 | 	wire [        31:0] s1_insn;
 74 | 	wire                s1_stall;
 75 | 
 76 | 	reg                 s2_en;
 77 | 	reg  [        31:0] s2_insn;
 78 | 
 79 | 	reg                 s3_en;
 80 | 	reg  [        31:0] s3_insn;
 81 | 
 82 | 	reg                 s3a_en;
 83 | 	reg  [        31:0] s3a_insn;
 84 | 
 85 | 	reg                 s4_en;
 86 | 	reg  [        31:0] s4_insn;
 87 | 	reg  [   NB*64-1:0] s4_coeff;
 88 | 
 89 | 	reg                 s5_en;
 90 | 	reg  [        31:0] s5_insn;
 91 | 	reg  [     8*9-1:0] s5_max;
 92 | 
 93 | 	reg                 s6_en;
 94 | 	reg  [        31:0] s6_insn;
 95 | 	reg  [     4*9-1:0] s6_max;
 96 | 
 97 | 	reg                 s7_en;
 98 | 	reg  [        31:0] s7_insn;
 99 | 	wire [  NB*128-1:0] s7_prod;
100 | 	reg  [     2*9-1:0] s7_max;
101 | 
102 | 	reg                 s8_en;
103 | 	reg  [        31:0] s8_insn;
104 | 	reg  [        19:0] s8_sum0;
105 | 	reg  [        19:0] s8_sum1;
106 | 	reg  [         8:0] s8_max;
107 | 	reg                 s8_maxen;
108 | 
109 | 	reg                 s9_en;
110 | 	reg  [        31:0] s9_insn;
111 | 
112 | 
113 | 	/**** memory and max interlock ****/
114 | 
115 | 	reg [9:0] memlock_res;
116 | 	reg [9:0] memlock_mask;
117 | 	reg memlock_expect;
118 | 
119 | 	always @* begin
120 | 		memlock_mask = 0;
121 | 
122 | 		case (s1_insn[5:0])
123 | 			/* LoadCode, LoadCoeff0, LoadCoeff1 */
124 | 			4, 5, 6: memlock_mask = 1 << 0;
125 | 
126 | 			/* LdSet, LdSet0, LdSet1, LdAdd, LdAdd0, LdAdd1 */
127 | 			28, 29, 30, 32, 33, 34: begin
128 | 				memlock_mask = 1 << 4;
129 | 			end
130 | 
131 | 			/* MACC, MMAX, MACCZ, MMAXZ, MMAXN */
132 | 			40, 41, 42, 43, 45: memlock_mask = 1 << 0;
133 | 
134 | 			/* Store, Store0, Store1, ReLU, ReLU0, ReLU1, Save, Save0, Save1 */
135 | 			16, 17, 18, 20, 21, 22, 24, 25, 26: memlock_mask = 1 << 9;
136 | 		endcase
137 | 
138 | 		if (!s1_en || reset)
139 | 			memlock_mask = 0;
140 | 	end
141 | 
142 | 	reg maxlock_a;
143 | 	reg maxlock_b;
144 | 	reg maxlock_a_q;
145 | 
146 | 	always @* begin
147 | 		maxlock_a = 0;
148 | 		maxlock_b = 0;
149 | 
150 | 		case (s1_insn[5:0] & 6'b 1111_00)
151 | 			28, 32, 40, 44: maxlock_a = 1;
152 | 		endcase
153 | 
154 | 		case (s1_insn[5:0])
155 | 			41, 43, 45, 47: maxlock_b = 1;
156 | 		endcase
157 | 
158 | 		if (!s1_en || reset) begin
159 | 			maxlock_a = 0;
160 | 			maxlock_b = 0;
161 | 		end
162 | 	end
163 | 
164 | 	assign s1_stall = |(memlock_res & memlock_mask) || (maxlock_b && maxlock_a_q);
165 | 
166 | 	always @(posedge clock) begin
167 | 		{memlock_res, memlock_expect} <= memlock_res | (s1_stall ? 10'b 0 : memlock_mask);
168 | 		maxlock_a_q <= maxlock_a && !s1_stall;
169 | 
170 | 		if (reset) begin
171 | 			memlock_res <= 0;
172 | 			memlock_expect <= 0;
173 | 			maxlock_a_q <= 0;
174 | 		end
175 | 	end
176 | 
177 | 	assign cmd_ready = !s1_stall;
178 | 
179 | 	assign busy = |{s1_en, s2_en, s3_en, s4_en, s5_en, s6_en, s7_en, s8_en};
180 | 
181 | 
182 | 	/**** stage 1 ****/
183 | 
184 | 	reg [31:0] s1_insn_direct;
185 | 	reg [31:0] s1_insn_codemem;
186 | 	reg s1_insn_sel;
187 | 
188 | 	assign s1_insn = s1_insn_sel ? s1_insn_codemem : s1_insn_direct;
189 | 
190 | 	wire [16:0] s1_insn_maddr = s1_insn[31:15];
191 | 	wire [8:0] s1_insn_caddr = s1_insn[14:6];
192 | 	wire [5:0] s1_insn_opcode = s1_insn[5:0];
193 | 
194 | 	always @(posedge clock) begin
195 | 		if (!s1_stall) begin
196 | 			s1_en <= cmd_valid && cmd_ready;
197 | 			s1_insn_direct <= cmd_insn;
198 | 			s1_insn_codemem <= code_mem[cmd_insn[14:6]];
199 | 			s1_insn_sel <= cmd_insn[5:0] == 3;
200 | 		end
201 | 
202 | 		if (reset) begin
203 | 			s1_en <= 0;
204 | 		end
205 | 	end
206 | 
207 | 
208 | 	/**** stage 2 ****/
209 | 
210 | 	reg s2_tick_simd;
211 | 
212 | 	always @(posedge clock) begin
213 | 		s2_en <= 0;
214 | 		s2_insn <= s1_insn;
215 | 		s2_tick_simd <= 0;
216 | 
217 | 		mem_rd0_en <= 0;
218 | 		mem_rd0_addr <= 'bx;
219 | 
220 | 		if (!reset && s1_en && !s1_stall) begin
221 | 			s2_en <= 1;
222 | 
223 | 			case (s1_insn[5:0])
224 | 				/* LoadCode, LoadCoeff0, LoadCoeff1 */
225 | 				4, 5, 6: begin
226 | 					mem_rd0_en <= 1;
227 | 					mem_rd0_addr <= s1_insn[31:15] >> 1;
228 | 				end
229 | 
230 | 				/* SetVBP, AddVBP */
231 | 				8, 9: begin
232 | 					VBP <= s1_insn[31:15] + (s1_insn[0] ? VBP : 0);
233 | 				end
234 | 
235 | 				/* MACC, MMAX, MACCZ, MMAXZ, MMAXN */
236 | 				40, 41, 42, 43, 45: begin
237 | 					mem_rd0_en <= 1;
238 | 					mem_rd0_addr <= (s1_insn[31:15] + VBP) >> 1;
239 | 					s2_tick_simd <= 1;
240 | 				end
241 | 			endcase
242 | 		end
243 | 	end
244 | 
245 | 	assign tick_simd = s2_tick_simd;
246 | 	assign tick_nosimd = s2_en && !tick_simd;
247 | 
248 | 
249 | 	/**** stage 3 ****/
250 | 
251 | 	always @(posedge clock) begin
252 | 		s3_en <= 0;
253 | 		s3_insn <= s2_insn;
254 | 
255 | 		if (!reset && s2_en) begin
256 | 			s3_en <= 1;
257 | 		end
258 | 	end
259 | 
260 | 
261 | 	/**** stage 3A ****/
262 | 
263 | 	always @(posedge clock) begin
264 | 		s3a_en <= 0;
265 | 		s3a_insn <= s3_insn;
266 | 
267 | 		if (!reset && s3_en) begin
268 | 			s3a_en <= 1;
269 | 		end
270 | 	end
271 | 
272 | 
273 | 	/**** stage 4 ****/
274 | 
275 | 	always @(posedge clock) begin
276 | 		s4_en <= 0;
277 | 		s4_insn <= s3a_insn;
278 | 		s4_coeff <= coeff_mem[s3a_insn[14:6] + CBP];
279 | 
280 | 		if (!reset && s3a_en) begin
281 | 			s4_en <= 1;
282 | 
283 | 			/* SetCBP, AddCBP */
284 | 			if (s3a_insn[5:0] == 14 || s3a_insn[5:0] == 15) begin
285 | 				CBP <= s3a_insn[14:6] + (s3a_insn[0] ? CBP : 0);
286 | 			end
287 | 		end
288 | 	end
289 | 
290 | 
291 | 	/**** stage 5 ****/
292 | 
293 | 	always @(posedge clock) begin
294 | 		s5_en <= 0;
295 | 		s5_insn <= s4_insn;
296 | 
297 | 		s5_max[0*9 +: 9] <= s4_coeff[0*8 +: 8] ? $signed(mem_rdata[0*8 +: 8]) : 9'h100;
298 | 		s5_max[1*9 +: 9] <= s4_coeff[1*8 +: 8] ? $signed(mem_rdata[1*8 +: 8]) : 9'h100;
299 | 		s5_max[2*9 +: 9] <= s4_coeff[2*8 +: 8] ? $signed(mem_rdata[2*8 +: 8]) : 9'h100;
300 | 		s5_max[3*9 +: 9] <= s4_coeff[3*8 +: 8] ? $signed(mem_rdata[3*8 +: 8]) : 9'h100;
301 | 		s5_max[4*9 +: 9] <= s4_coeff[4*8 +: 8] ? $signed(mem_rdata[4*8 +: 8]) : 9'h100;
302 | 		s5_max[5*9 +: 9] <= s4_coeff[5*8 +: 8] ? $signed(mem_rdata[5*8 +: 8]) : 9'h100;
303 | 		s5_max[6*9 +: 9] <= s4_coeff[6*8 +: 8] ? $signed(mem_rdata[6*8 +: 8]) : 9'h100;
304 | 		s5_max[7*9 +: 9] <= s4_coeff[7*8 +: 8] ? $signed(mem_rdata[7*8 +: 8]) : 9'h100;
305 | 
306 | 		mem_rd1_en <= 0;
307 | 		mem_rd1_addr <= 'bx;
308 | 
309 | 		if (!reset && s4_en) begin
310 | 			s5_en <= 1;
311 | 
312 | 			case (s4_insn[5:0])
313 | 				/* LoadCode */
314 | 				4: begin
315 | 					code_mem[s4_insn[14:6]] <= mem_rdata[31:0];
316 | 				end
317 | 
318 | 				/* LoadCoeff0 */
319 | 				5: begin
320 | 					coeff_mem[s4_insn[14:6]][63:0] <= mem_rdata;
321 | 				end
322 | 
323 | 				/* LoadCoeff1 */
324 | 				6: begin
325 | 					coeff_mem[s4_insn[14:6]][127:64] <= mem_rdata;
326 | 				end
327 | 
328 | 				/* SetLBP, AddLBP */
329 | 				10, 11: begin
330 | 					LBP <= s4_insn[31:15] + (s4_insn[0] ? LBP : 0);
331 | 				end
332 | 
333 | 				/* LdSet, LdSet0, LdSet1, LdAdd, LdAdd0, LdAdd1 */
334 | 				28, 29, 30, 32, 33, 34: begin
335 | 					mem_rd1_en <= 1;
336 | 					mem_rd1_addr <= (s4_insn[31:15] + LBP) >> 1;
337 | 				end
338 | 			endcase
339 | 		end
340 | 	end
341 | 
342 | 
343 | 	/**** stage 6 ****/
344 | 
345 | 	always @(posedge clock) begin
346 | 		s6_en <= 0;
347 | 		s6_insn <= s5_insn;
348 | 
349 | 		s6_max[0*9 +: 9] <= $signed(s5_max[0*9 +: 9]) > $signed(s5_max[1*9 +: 9]) ? s5_max[0*9 +: 9] : s5_max[1*9 +: 9];
350 | 		s6_max[1*9 +: 9] <= $signed(s5_max[2*9 +: 9]) > $signed(s5_max[3*9 +: 9]) ? s5_max[2*9 +: 9] : s5_max[3*9 +: 9];
351 | 		s6_max[2*9 +: 9] <= $signed(s5_max[4*9 +: 9]) > $signed(s5_max[5*9 +: 9]) ? s5_max[4*9 +: 9] : s5_max[5*9 +: 9];
352 | 		s6_max[3*9 +: 9] <= $signed(s5_max[6*9 +: 9]) > $signed(s5_max[7*9 +: 9]) ? s5_max[6*9 +: 9] : s5_max[7*9 +: 9];
353 | 
354 | 		if (!reset && s5_en) begin
355 | 			s6_en <= 1;
356 | 		end
357 | 	end
358 | 
359 | 
360 | 	/**** stage 7 ****/
361 | 
362 | 	wire [NB*64-1:0] mulA = {mem_rdata, mem_rdata};
363 | 
364 | 	marlann_compute_mul2 mul [NB*4-1:0] (
365 | 		.clock (clock   ),
366 | 		.A     (mulA    ),
367 | 		.B     (s4_coeff),
368 | 		.X     (s7_prod )
369 | 	);
370 | 
371 | 	always @(posedge clock) begin
372 | 		s7_en <= 0;
373 | 		s7_insn <= s6_insn;
374 | 
375 | 		s7_max[0*9 +: 9] <= $signed(s6_max[0*9 +: 9]) > $signed(s6_max[1*9 +: 9]) ? s6_max[0*9 +: 9] : s6_max[1*9 +: 9];
376 | 		s7_max[1*9 +: 9] <= $signed(s6_max[2*9 +: 9]) > $signed(s6_max[3*9 +: 9]) ? s6_max[2*9 +: 9] : s6_max[3*9 +: 9];
377 | 
378 | 		if (!reset && s6_en) begin
379 | 			s7_en <= 1;
380 | 		end
381 | 	end
382 | 
383 | 
384 | 	/**** stage 8 ****/
385 | 
386 | 	reg [31:0] acc0zn;
387 | 
388 | 	always @* begin
389 | 		acc0zn = s7_insn[1] ? 0 : acc0;
390 | 		acc0zn = s7_insn[2] ? 32'h 8000_0000 : acc0zn;
391 | 	end
392 | 
393 | 	always @(posedge clock) begin
394 | 		s8_en <= 0;
395 | 		s8_insn <= s7_insn;
396 | 
397 | 		s8_sum0 <= $signed(s7_prod[  0 +: 16]) + $signed(s7_prod[ 16 +: 16]) + $signed(s7_prod[ 32 +: 16]) + $signed(s7_prod[ 48 +: 16]) +
398 | 		           $signed(s7_prod[ 64 +: 16]) + $signed(s7_prod[ 80 +: 16]) + $signed(s7_prod[ 96 +: 16]) + $signed(s7_prod[112 +: 16]);
399 | 
400 | 		s8_sum1 <= $signed(s7_prod[128 +: 16]) + $signed(s7_prod[144 +: 16]) + $signed(s7_prod[160 +: 16]) + $signed(s7_prod[176 +: 16]) +
401 | 		           $signed(s7_prod[192 +: 16]) + $signed(s7_prod[208 +: 16]) + $signed(s7_prod[224 +: 16]) + $signed(s7_prod[240 +: 16]);
402 | 
403 | 		s8_max <= $signed(s7_max[0*9 +: 9]) > $signed(s7_max[1*9 +: 9]) ? s7_max[0*9 +: 9] : s7_max[1*9 +: 9];
404 | 		s8_maxen <= ($signed(s7_max[0*9 +: 9]) > $signed(acc0zn)) || ($signed(s7_max[1*9 +: 9]) > $signed(acc0zn));
405 | 
406 | 		if (!reset && s7_en) begin
407 | 			s8_en <= 1;
408 | 		end
409 | 	end
410 | 
411 | 
412 | 	/**** stage 9 ****/
413 | 
414 | 	reg [31:0] new_acc0_add;
415 | 	reg [31:0] new_acc1_add;
416 | 
417 | 	reg [31:0] new_acc0_max;
418 | 
419 | 	reg [31:0] new_acc0;
420 | 	reg [31:0] new_acc1;
421 | 
422 | 	wire [31:0] acc0_shifted = $signed(acc0) >>> s8_insn[14:6];
423 | 	wire [31:0] acc1_shifted = $signed(acc1) >>> s8_insn[14:6];
424 | 
425 | 	reg [7:0] acc0_saturated;
426 | 	reg [7:0] acc1_saturated;
427 | 
428 | 	reg new_acc0_max_cmp;
429 | 	reg new_acc0_max_cmp_q;
430 | 
431 | 	always @* begin
432 | 		new_acc0_add = s8_insn[1] ? 0 : acc0;
433 | 		new_acc1_add = s8_insn[1] || s8_insn[2] ? 0 : acc1;
434 | 
435 | 		new_acc0_max = s8_insn[2] ? 32'h 8000_0000 : new_acc0_add;
436 | 
437 | 		new_acc0_add = $signed(new_acc0_add) + $signed(s8_sum0);
438 | 		new_acc1_add = $signed(new_acc1_add) + $signed(s8_sum1);
439 | 
440 | 		if (s8_max != 9'h 100)
441 | 			new_acc0_max = s8_maxen ? s8_max : new_acc0_max;
442 | 
443 | 		new_acc0 = s8_insn[0] ? new_acc0_max : new_acc0_add;
444 | 		new_acc1 = new_acc1_add;
445 | 	end
446 | 
447 | 	always @(posedge clock) begin
448 | 		s9_en <= 0;
449 | 		s9_insn <= s8_insn;
450 | 
451 | 		if (!reset && s8_en) begin
452 | 			s9_en <= 1;
453 | 
454 | 			/* MACC, MMAX, MMACZ, MMAXZ, MMAXN */
455 | 			if (s8_insn[5:3] == 3'b 101) begin
456 | 				acc0 <= new_acc0;
457 | 				acc1 <= new_acc1;
458 | 			end
459 | 
460 | 			/* LdSet, LdSet0 */
461 | 			if (s8_insn[5:0] == 28 || s8_insn[5:0] == 29) begin
462 | 				acc0 <= mem_rdata[31:0];
463 | 			end
464 | 
465 | 			/* LdSet, LdSet1 */
466 | 			if (s8_insn[5:0] == 28 || s8_insn[5:0] == 30) begin
467 | 				acc1 <= mem_rdata[63:32];
468 | 			end
469 | 
470 | 			/* LdAdd, LdAdd0 */
471 | 			if (s8_insn[5:0] == 32 || s8_insn[5:0] == 33) begin
472 | 				acc0 <= acc0 + mem_rdata[31:0];
473 | 			end
474 | 
475 | 			/* LdAdd, LdAdd1 */
476 | 			if (s8_insn[5:0] == 32 || s8_insn[5:0] == 34) begin
477 | 				acc1 <= acc1 + mem_rdata[63:32];
478 | 			end
479 | 		end
480 | 
481 | 		if (&acc0_shifted[31:7] == |acc0_shifted[31:7])
482 | 			acc0_saturated <= acc0_shifted[7:0];
483 | 		else
484 | 			acc0_saturated <= acc0_shifted[31] ? -128 : 127;
485 | 
486 | 		if (&acc1_shifted[31:7] == |acc1_shifted[31:7])
487 | 			acc1_saturated <= acc1_shifted[7:0];
488 | 		else
489 | 			acc1_saturated <= acc1_shifted[31] ? -128 : 127;
490 | 	end
491 | 
492 | 
493 | 	/**** write back ****/
494 | 
495 | 	reg [ 7:0] pre_mem_wr_en;
496 | 	reg [16:0] pre_mem_wr_addr;
497 | 	reg [63:0] pre_mem_wr_wdata;
498 | 
499 | 	always @* begin
500 | 		if (pre_mem_wr_addr[0]) begin
501 | 			mem_wr_en = pre_mem_wr_en << 1;
502 | 			mem_wr_addr = pre_mem_wr_addr >> 1;
503 | 			mem_wr_wdata = pre_mem_wr_wdata << 8;
504 | 		end else begin
505 | 			mem_wr_en = pre_mem_wr_en;
506 | 			mem_wr_addr = pre_mem_wr_addr >> 1;
507 | 			mem_wr_wdata = pre_mem_wr_wdata;
508 | 		end
509 | 	end
510 | 
511 | 	wire [5:0] s9_insn_opcode = s9_insn[5:0];
512 | 
513 | 	always @(posedge clock) begin
514 | 		pre_mem_wr_en <= 0;
515 | 		pre_mem_wr_addr <= s9_insn[31:15] + SBP;
516 | 		pre_mem_wr_wdata <= {
517 | 			{8{!s9_insn[2] || !acc1_saturated[7]}} & acc1_saturated,
518 | 			{8{!s9_insn[2] || !acc0_saturated[7]}} & acc0_saturated
519 | 		};
520 | 
521 | 		if (s9_en) begin
522 | 			/* Store, Store0, Store1, ReLU, ReLU0, ReLU1 */
523 | 			if (s9_insn[5:3] == 3'b 010) begin
524 | 				pre_mem_wr_en <= {!s9_insn[0], !s9_insn[1]};
525 | 			end
526 | 
527 | 			/* Save, Save0, Save1 */
528 | 			if (s9_insn[5:2] == 4'b 0110) begin
529 | 				pre_mem_wr_en <= {{4{!s9_insn[0]}}, {4{!s9_insn[1]}}};
530 | 				pre_mem_wr_wdata <= {acc1, acc0};
531 | 			end
532 | 
533 | 			/* SetSBP, AddSBP */
534 | 			if (s9_insn[5:0] == 12 || s9_insn[5:0] == 13) begin
535 | 				SBP <= s9_insn[31:15] + (s9_insn[0] ? SBP : 0);
536 | 			end
537 | 		end
538 | 
539 | 		if (reset || !s9_en) begin
540 | 			pre_mem_wr_en <= 0;
541 | 		end
542 | 	end
543 | endmodule
544 | 
545 | module marlann_compute_mul2 (
546 | 	input         clock,
547 | 	input  [15:0] A, B,
548 | 	output [31:0] X
549 | );
550 | 	reg [15:0] r1A, r2A, r3A;
551 | 	reg [15:0] r1B, r2B, r3B;
552 | 
553 | 	always @(posedge clock) begin
554 | 		r1A <= $signed(A[7:0]) * $signed(B[7:0]);
555 | 		r1B <= $signed(A[15:8]) * $signed(B[15:8]);
556 | 		r2A <= r1A;
557 | 		r2B <= r1B;
558 | 		r3A <= r2A;
559 | 		r3B <= r2B;
560 | 	end
561 | 
562 | 	assign X = {r3B, r3A};
563 | endmodule
564 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/mux/.gitignore:
--------------------------------------------------------------------------------
1 | mux_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/mux/README.md:
--------------------------------------------------------------------------------
1 | # Large multiplexers
2 | 
3 | The python script generates a selection of multiplexer descriptions, using variable 
4 | length index (e.g. `assign a = b [c]`) as well as `case` and `if`-`else` (balanced
5 | and unbalanced) styles, across a variety of power-of-2 and non-power-of-2 values
6 | for a number of inputs, as well as input width.
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/mux/common.py:
--------------------------------------------------------------------------------
1 | ../../benchmarks_small/mux/common.py


--------------------------------------------------------------------------------
/verilog/benchmarks_large/mux/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from common import *
 4 | 
 5 | if __name__ == "__main__":
 6 |     for N in [63,64,65] + [127,128,129] + [255,256,257]:
 7 |         for W in [8,16,32]:
 8 |             gen_mux_index(N,W)
 9 |             gen_mux_case(N,W)
10 |             gen_mux_if_bal(N,W)
11 |             gen_mux_if_unbal(N,W)
12 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/opensparc/README.md:
--------------------------------------------------------------------------------
 1 | # OpenSPARC Benchmarks
 2 | 
 3 | ## T2 Core
 4 | 
 5 | Downloaded from http://download.oracle.com/technetwork/systems/opensparc/OpenSPARCT2.1.3.tar.bz2
 6 | 
 7 | Based on FPGA configuration, with main memory size reduced and `mem_harness` rewritten to map better
 8 | to block RAM
 9 | 
10 | Copyright:
11 | ```
12 | Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved
13 | 4150 Network Circle, Santa Clara, California 95054, U.S.A.
14 | 
15 | This program is free software; you can redistribute it and/or modify
16 | it under the terms of the GNU General Public License as published by
17 | the Free Software Foundation; version 2 of the License.
18 | 
19 | This program is distributed in the hope that it will be useful,
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 | GNU General Public License for more details.
23 | 
24 | You should have received a copy of the GNU General Public License
25 | along with this program; if not, write to the Free Software
26 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27 | 
28 | For the avoidance of doubt, and except that if any non-GPL license 
29 | choice is available it will apply instead, Sun elects to use only 
30 | the General Public License version 2 (GPLv2) at this time for any 
31 | software where a choice of GPL license versions is made 
32 | available with the language indicating that GPLv2 or any later version 
33 | may be used, or where a choice of which version of the GPL is applied is 
34 | otherwise unspecified. 
35 | ```


--------------------------------------------------------------------------------
/verilog/benchmarks_large/opensparc/t2.v.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/opensparc/t2.v.gz


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/.gitignore:
--------------------------------------------------------------------------------
1 | picorv32
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/README.md:
--------------------------------------------------------------------------------
 1 | # PICORV32 - a size-optimized RISC-V core
 2 | 
 3 | Source: https://github.com/cliffordwolf/picorv32/tree/v1.0
 4 | 
 5 | There are three cores: small, regular and large.
 6 | 
 7 | - **PicoRV32 (small):** The `picorv32` module without counter instructions,
 8 |   without two-stage shifts, with externally latched `mem_rdata`, and without
 9 |   catching of misaligned memory accesses and illegal instructions.
10 | 
11 | - **PicoRV32 (regular):** The `picorv32` module in its default configuration.
12 | 
13 | - **PicoRV32 (large):** The `picorv32` module with enabled PCPI, IRQ, MUL,
14 |   DIV, BARREL_SHIFTER, and COMPRESSED_ISA features.
15 | 
16 | - **PicoSoC:** The `picosoc` module with `picorv32`, flash, UART, and SRAM
17 |   IP on a simple System-on-Chip.
18 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import os, subprocess
4 | if not os.path.isdir('picorv32'):
5 |     subprocess.run(['git', 'clone', 'https://github.com/cliffordwolf/picorv32'])
6 | subprocess.run(['git', 'reset', '--hard', 'v1.0'], cwd='picorv32')
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/picorv32.vh:
--------------------------------------------------------------------------------
1 | picorv32/picorv32.v


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/picorv32_large.v:
--------------------------------------------------------------------------------
 1 | module picorv32_large (
 2 |     input clk, resetn,
 3 | 
 4 |     output        mem_valid,
 5 |     output        mem_instr,
 6 |     input         mem_ready,
 7 | 
 8 |     output [31:0] mem_addr,
 9 |     output [31:0] mem_wdata,
10 |     output [ 3:0] mem_wstrb,
11 |     input  [31:0] mem_rdata
12 | );
13 |     top_large picorv32(
14 |         .clk      (clk      ),
15 |         .resetn   (resetn   ),
16 |         .mem_valid(mem_valid),
17 |         .mem_instr(mem_instr),
18 |         .mem_ready(mem_ready),
19 |         .mem_addr (mem_addr ),
20 |         .mem_wdata(mem_wdata),
21 |         .mem_wstrb(mem_wstrb),
22 |         .mem_rdata(mem_rdata)
23 |     );
24 | endmodule
25 | 
26 | `include "synth_area_top.vh"
27 | `include "picorv32.vh"
28 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/picorv32_regular.v:
--------------------------------------------------------------------------------
 1 | module picorv32_regular (
 2 |     input clk, resetn,
 3 | 
 4 |     output        mem_valid,
 5 |     output        mem_instr,
 6 |     input         mem_ready,
 7 | 
 8 |     output [31:0] mem_addr,
 9 |     output [31:0] mem_wdata,
10 |     output [ 3:0] mem_wstrb,
11 |     input  [31:0] mem_rdata
12 | );
13 |     top_regular picorv32(
14 |         .clk      (clk      ),
15 |         .resetn   (resetn   ),
16 |         .mem_valid(mem_valid),
17 |         .mem_instr(mem_instr),
18 |         .mem_ready(mem_ready),
19 |         .mem_addr (mem_addr ),
20 |         .mem_wdata(mem_wdata),
21 |         .mem_wstrb(mem_wstrb),
22 |         .mem_rdata(mem_rdata)
23 |     );
24 | endmodule
25 | 
26 | `include "synth_area_top.vh"
27 | `include "picorv32.vh"
28 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/picorv32_small.v:
--------------------------------------------------------------------------------
 1 | module picorv32_small (
 2 |     input clk, resetn,
 3 | 
 4 |     output        mem_valid,
 5 |     output        mem_instr,
 6 |     input         mem_ready,
 7 | 
 8 |     output [31:0] mem_addr,
 9 |     output [31:0] mem_wdata,
10 |     output [ 3:0] mem_wstrb,
11 |     input  [31:0] mem_rdata
12 | );
13 |     top_small picorv32(
14 |         .clk      (clk      ),
15 |         .resetn   (resetn   ),
16 |         .mem_valid(mem_valid),
17 |         .mem_instr(mem_instr),
18 |         .mem_ready(mem_ready),
19 |         .mem_addr (mem_addr ),
20 |         .mem_wdata(mem_wdata),
21 |         .mem_wstrb(mem_wstrb),
22 |         .mem_rdata(mem_rdata)
23 |     );
24 | endmodule
25 | 
26 | `include "synth_area_top.vh"
27 | `include "picorv32.vh"
28 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/picosoc.vh:
--------------------------------------------------------------------------------
1 | picorv32/picosoc/picosoc.v


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/picosoc_top.v:
--------------------------------------------------------------------------------
 1 | module picosoc_top (
 2 | 	input clk,
 3 | 	input resetn,
 4 | 
 5 | 	output        iomem_valid,
 6 | 	input         iomem_ready,
 7 | 	output [ 3:0] iomem_wstrb,
 8 | 	output [31:0] iomem_addr,
 9 | 	output [31:0] iomem_wdata,
10 | 	input  [31:0] iomem_rdata,
11 | 
12 | 	input  irq_5,
13 | 	input  irq_6,
14 | 	input  irq_7,
15 | 
16 | 	output ser_tx,
17 | 	input  ser_rx,
18 | 
19 | 	output flash_csb,
20 | 	output flash_clk,
21 | 
22 | 	output flash_io0_oe,
23 | 	output flash_io1_oe,
24 | 	output flash_io2_oe,
25 | 	output flash_io3_oe,
26 | 
27 | 	output flash_io0_do,
28 | 	output flash_io1_do,
29 | 	output flash_io2_do,
30 | 	output flash_io3_do,
31 | 
32 | 	input  flash_io0_di,
33 | 	input  flash_io1_di,
34 | 	input  flash_io2_di,
35 | 	input  flash_io3_di
36 | );
37 | 
38 | picosoc top (
39 | 	.clk(clk),
40 | 	.resetn(resetn),
41 | 
42 | 	.iomem_valid(iomem_valid),
43 | 	.iomem_ready(iomem_ready),
44 | 	.iomem_wstrb(iomem_wstrb),
45 | 	.iomem_addr(iomem_addr),
46 | 	.iomem_wdata(iomem_wdata),
47 | 	.iomem_rdata(iomem_rdata),
48 | 
49 | 	.irq_5(irq_5),
50 | 	.irq_6(irq_6),
51 | 	.irq_7(irq_7),
52 | 
53 | 	.ser_tx(ser_tx),
54 | 	.ser_rx(ser_rx),
55 | 
56 | 	.flash_csb(flash_csb),
57 | 	.flash_clk(flash_clk),
58 | 
59 | 	.flash_io0_oe(flash_io0_oe),
60 | 	.flash_io1_oe(flash_io1_oe),
61 | 	.flash_io2_oe(flash_io2_oe),
62 | 	.flash_io3_oe(flash_io3_oe),
63 | 
64 | 	.flash_io0_do(flash_io0_do),
65 | 	.flash_io1_do(flash_io1_do),
66 | 	.flash_io2_do(flash_io2_do),
67 | 	.flash_io3_do(flash_io3_do),
68 | 
69 | 	.flash_io0_di(flash_io0_di),
70 | 	.flash_io1_di(flash_io1_di),
71 | 	.flash_io2_di(flash_io2_di),
72 | 	.flash_io3_di(flash_io3_di)
73 | );
74 | 
75 | endmodule
76 | 
77 | `include "picosoc.vh"
78 | `include "simpleuart.vh"
79 | `include "spimemio.vh"
80 | `include "picorv32.vh"
81 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/simpleuart.vh:
--------------------------------------------------------------------------------
1 | picorv32/picosoc/simpleuart.v


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/spimemio.vh:
--------------------------------------------------------------------------------
1 | picorv32/picosoc/spimemio.v


--------------------------------------------------------------------------------
/verilog/benchmarks_large/picosoc/synth_area_top.vh:
--------------------------------------------------------------------------------
1 | picorv32/scripts/vivado/synth_area_top.v


--------------------------------------------------------------------------------
/verilog/benchmarks_large/riscv-bitmanip/README.md:
--------------------------------------------------------------------------------
 1 | # RISC-V Bitmanip (Bit Manipulation) Extension
 2 | 
 3 | Source: https://github.com/riscv/riscv-bitmanip
 4 | 
 5 | The bitmanip instructions extend the RISC-V instruction set to enable
 6 | efficent bit manipulation.
 7 | 
 8 | Currently, just two reference designs:
 9 | 
10 | - **shifter64**
11 | 
12 | - **smartbextdep**
13 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/riscv-bitmanip/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import urllib.request
4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/riscv/riscv-bitmanip/dadfdcbae935815db429a1db0bfed6973548bbf5/verilog/shifter64.v', 'shifter64.v')
5 | urllib.request.urlretrieve('https://raw.githubusercontent.com/riscv/riscv-bitmanip/dadfdcbae935815db429a1db0bfed6973548bbf5/verilog/smartbextdep.v', 'smartbextdep.v')
6 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/README.md:
--------------------------------------------------------------------------------
1 | # A second-order sigma-delta DAC with single-bit quantizer
2 | 
3 | ![./architecture.png](./architecture.png)
4 | 
5 | ## Functional verification
6 | This benchmark has a functional verification testbench. It generates a sine wave of approximately 1kHz (amplitude = 10000/32767 or approximately -10.3 dB) and feeds this into the SDDAC. The output waveform is analysed by a Python 3 script and shows the output spectrum of the SDDAC (FFT with a Blackman window).
7 | 
8 | To run the function verification testbench, execute `./run_sddac_tb.sh`. Prerequisites are Python3, Numpy and Matplotlib.
9 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/sddac/architecture.png


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "files": 
3 |     [
4 |         "sddac.v"
5 |     ]
6 | }
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/genspectrumplot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | # Generate spectrum of sigma-delta output bit sequence
 4 | # Author: Niels A. Moseley - Moseley Instruments / Symbiotic EDA
 5 | # 
 6 | # requires matplotlib and numpy
 7 | #
 8 | 
 9 | import numpy as np
10 | import matplotlib.pyplot as plt
11 | 
12 | sdout = np.genfromtxt('sddac_out.txt', dtype=np.float)
13 | sdout = sdout*2 - 1
14 | 
15 | # skip first 1000 samples to avoid FFTing startup transients
16 | sdout = sdout[1001:]
17 | 
18 | N = sdout.size
19 | print(N)
20 | sdout_fft = np.abs(np.fft.fft(np.multiply(sdout, np.blackman(N))))
21 | 
22 | plt.figure(1)
23 | plt.title("SDDAC output spectrum")
24 | plt.xlabel("Frequency")
25 | plt.ylabel("Signal Amplitude (dB)")
26 | plt.grid()
27 | freqaxis = np.linspace(0,N//2-1,N//2)/N
28 | plt.semilogx(freqaxis, 20.0*np.log10(sdout_fft[:N // 2]/(N/4)))
29 | 
30 | plt.show()
31 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/run_sddac_tb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | iverilog -m va_math -o sddac_tb.vvp sddac_tb.v sddac.v
4 | #iverilog -m va_math -o sddac_tb.vvp sddac_tb.v sddac.v_netlist.v ../../../celllibs/supergate/supergate.v
5 | 
6 | vvp sddac_tb.vvp
7 | python3 genspectrumplot.py
8 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/sddac.v:
--------------------------------------------------------------------------------
 1 | // Second order sigma-delta dac
 2 | //
 3 | // For benchmarking purposes only -- don't use this for an actual design.
 4 | // There are far more performant architectures. 
 5 | // 
 6 | // Author: Niels A. Moseley, n.a.moseley@moseleyinstruments.com
 7 | //
 8 | 
 9 | `ifdef DEBUG_SDDAC
10 | `include "constants.vams"
11 | `endif
12 | 
13 | module sddac(clk, rst_n, sig_in, sd_out);
14 | 
15 |     // inputs
16 |     input clk;                          // clock
17 |     input rst_n;                        // synchronous reset, active low
18 |     input signed [15:0] sig_in;         // 16 bits in Q(1,15) format
19 | 
20 |     // outputs
21 |     output reg sd_out = 0;
22 | 
23 |     // internal signals
24 |     reg signed [17:0] state1 = 0;       // Q(1,17)
25 |     reg signed [19:0] state2 = 0;       // Q(1,19)
26 |     reg signed [16:0] state1_in;        // Q(0,17)
27 |     reg signed [18:0] state2_in;        // Q(0,19)
28 |     reg signed [20:0] quant_in;         // Q(2,19)
29 |     reg signed [16:0] qq;
30 |     reg        [7:0]  lfsr_reg = 0;
31 |     reg               quantizer;
32 |     wire lfsr_fb;
33 | 
34 |     // linear feedback shift register feedback
35 |     assign lfsr_fb = (lfsr_reg[4] ^ lfsr_reg[2]);
36 | 
37 |     // combination process
38 |     always @(*)
39 |     begin
40 |         `ifdef DEBUG_SDDAC
41 |         qq = $signed(quantizer ? -17'h8000 : 17'h8000);
42 |         `endif
43 |         quant_in  = state2 + $signed(lfsr_fb ? -21'h4000 : 21'h4000);
44 |         quantizer = quant_in[20];
45 |         state1_in = sig_in - $signed(quantizer ? -17'h8000 : 17'h8000);        // Q(-1,17) - Q(0,17) -> Q(0,17)
46 |         state2_in = state1 - $signed(quantizer ? -19'h10000 : 19'h10000);      // Q(-1,19) - Q(0,19) -> Q(0,19)
47 |     end
48 | 
49 |     // clocked process
50 |     always @(posedge clk)
51 |     begin
52 |         if (rst_n == 1'b0)
53 |         begin
54 |             state1 <= 0;
55 |             state2 <= 0;
56 |             lfsr_reg <= 8'hff;
57 |         end
58 |         else begin
59 |             `ifdef DEBUG_SDDAC
60 |             $display("feedback : %f", qq*$pow(2.0,-15));
61 |             $display("state1_in: %f", state1_in*$pow(2.0,-17));
62 |             $display("state2_in: %f", state2_in*$pow(2.0,-19));
63 |             $display("");
64 |             `endif
65 |             state1 <= state1 + $signed({ state1_in[16], state1_in});
66 |             state2 <= state2 + $signed({ state2_in[18], state2_in});
67 |             sd_out <= !quantizer;
68 |             lfsr_reg <= {lfsr_reg[6:0], lfsr_fb};
69 |         end
70 |     end
71 | 
72 | endmodule
73 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/sddac/sddac_tb.v:
--------------------------------------------------------------------------------
 1 | // Testbench for sddac.v
 2 | // Author: Niels A. Moseley
 3 | 
 4 | `include "constants.vams"
 5 | 
 6 | module tb;
 7 | 
 8 | reg  clk   = 0;
 9 | reg  rst_n = 0;
10 | reg signed [15:0] sig = 0;
11 | wire dac_out;
12 | 
13 | real    phase = 0.0;
14 | integer fhandle;
15 | 
16 | // clock generation
17 | always #1 clk=~clk;
18 | 
19 | // devices under test
20 | sddac dut(clk, rst_n, sig, dac_out);
21 | 
22 | initial
23 | begin
24 |     $dumpfile("sddac_tb.vcd");
25 |     $dumpvars;
26 | 
27 |     fhandle = $fopen("sddac_out.txt","w");
28 | 
29 |     #4 rst_n = 1'b1;
30 | 
31 |     #526288 $finish;    // 2^18 + 1000 startup samples
32 | end
33 | 
34 | always @(posedge clk)
35 | begin
36 |     if (rst_n == 1'b1)
37 |     begin
38 |         $fwrite(fhandle, "%d\n", dac_out);
39 |         sig <= $sin(`M_TWO_PI*phase)*10000.0;
40 |         phase <= phase + 0.001;
41 |     end
42 | end
43 | 
44 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_large/vexriscv/README.md:
--------------------------------------------------------------------------------
1 | # VEXRISCV - a FPGA friend 32 bit RISC-V CPU implementation
2 | 
3 | Source: https://github.com/SpinalHDL/VexRiscv/tree/64e8919
4 | 
5 | There is one core currently.
6 | 
7 | - **VexRiscV (Full):** RTL generated using `sbt "runMain vexriscv.demo.GenFull"`
8 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/wb2axip/README.md:
--------------------------------------------------------------------------------
 1 | # WB2AXIP: A Pipelined Wishbone B4 to AXI4 bridge
 2 | 
 3 | Source: https://github.com/ZipCPU/wb2axip
 4 | 
 5 | Currently, just one design:
 6 | 
 7 | - **AXILXBAR** is a fully functional, formally verified,
 8 |     N master to M slave AXI-lite crossbar interconnect.
 9 |     As such, it permits min(N,M) active channel connections
10 |     between masters and slaves all at once.
11 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_large/wb2axip/generate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import urllib.request
4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/wb2axip/c6d7fb0390dd0a355963b2882cde1e98f1208087/rtl/axilxbar.v', 'axilxbar.v')
5 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/addertree/.gitignore:
--------------------------------------------------------------------------------
1 | addertree_*_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/addertree/README.md:
--------------------------------------------------------------------------------
1 | # addertree - adding multiple inputs
2 | 
3 | The python script generates modules which add up to eight inputs and using various input widths.
4 | No overflow checking is done.
5 | 
6 | The goal of this benchmark is to see which structure the synthesis tool generates.
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/addertree/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate a+b+c+d .. with inputs all the same width
 4 | 
 5 | import math
 6 | 
 7 | ## extend the port given by the string 'word' by 'bits' zero bits
 8 | def extend(word, bits):
 9 |     return "{<b>'d0, <word>}".replace("<b>",str(bits)).replace("<word>", word);
10 | 
11 | def gen_adder(nInputs, nWidth):
12 |     bits = int(math.ceil(math.log2(nInputs)))
13 |     with open("addertree_%d_%d.v" % (nInputs, nWidth), "w") as f:
14 |         body = extend("din[" + str(nWidth-1) + ":0]",bits)
15 |         for I in range(2, nInputs+1):
16 |             body = body + " + " + extend("din[" + str(nWidth*I-1) + ":" + str(nWidth*(I-1)) + "]", bits)
17 |         body = body + ";"
18 |         print("""
19 | module addertree_<n>_<w> (input [<t>-1:0] din, output [<w>-1:0] dout);
20 |   assign dout = <body>
21 | endmodule
22 | """.replace("<body>", body).replace("<n>", str(nInputs)).replace("<w>", str(nWidth)).replace("<t>", str(nWidth*nInputs)), file=f)
23 | 
24 | for nInputs in [3, 4, 5, 6, 7, 8]:
25 |     for nWidth in [4, 5, 6, 7, 8]:
26 |         gen_adder(nInputs, nWidth)
27 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/arith_ops/.gitignore:
--------------------------------------------------------------------------------
 1 | add_*_*.v
 2 | sub_*_*.v
 3 | mul_*_*.v
 4 | adds_*_*.v
 5 | subs_*_*.v
 6 | muls_*_*.v
 7 | shl_*_*.v
 8 | shr_*_*.v
 9 | sshr_*_*.v
10 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/arith_ops/README.md:
--------------------------------------------------------------------------------
1 | # arith_ops - various basic arithmetic operations
2 | 
3 | The python script generates a plethora of modules consisting of simple arithmetic operations,
4 | such as addition, subtraction, multiplication, left-shift, logical right-shift, arithmetic right-shift.
5 | 
6 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/arith_ops/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Add, Sub, Mul
 4 | 
 5 | ops = [
 6 |     ("add", "+", lambda i, j: max(i, j)+1),
 7 |     ("sub", "-", lambda i, j: max(i, j)+1),
 8 |     ("mul", "*", lambda i, j: i + j),
 9 | ]
10 | 
11 | for opname, opstr, opsize in ops:
12 |     for i in range(1, 16):
13 |         for j in range(i, 16):
14 |             for signed in [False, True]:
15 |                 name = "%s%s_%d_%d" % (opname, "s" if signed else "", i, j)
16 |                 signed_str = " signed" if signed else ""
17 |                 with open("%s.v" % name, "w") as f:
18 |                     print("module %s (input%s [%d:0] A, input%s [%d:0] B, output [%d:0] Y);" %
19 |                             (name, signed_str, i-1, signed_str, j-1, opsize(i, j)-1), file=f)
20 |                     print("  assign Y = A %s B;" % opstr, file=f)
21 |                     print("endmodule", file=f)
22 | 
23 | ## Shift Ops
24 | 
25 | ops = [
26 |     ("shl", "<<", ""),
27 |     ("shr", ">>", ""),
28 |     ("sshr", ">>>", " signed"),
29 | ]
30 | 
31 | for opname, opstr, signed_str in ops:
32 |     for i in range(1, 32):
33 |         for j in range(1, 6):
34 |             name = "%s_%d_%d" % (opname, i, j)
35 |             with open("%s.v" % name, "w") as f:
36 |                 print("module %s (input%s [%d:0] A, input [%d:0] B, output [%d:0] Y);" %
37 |                         (name, signed_str, i-1, j-1, i-1), file=f)
38 |                 print("  assign Y = A %s B;" % opstr, file=f)
39 |                 print("endmodule", file=f)
40 | 
41 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/cic/.gitginore:
--------------------------------------------------------------------------------
1 | *.vcd
2 | *.vvp
3 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/cic/README.md:
--------------------------------------------------------------------------------
 1 | # CIC5 - Cascaded Integrator-Comb DSP structure
 2 | 
 3 | This module decimates the incoming data stream by a factor of 5,
 4 | using a cascaded integrators and comb filters. It is a well-known
 5 | efficient DSP structure primarily found in high-speed A/D conversion
 6 | applications, such as Software Defined Radios (SDR).
 7 | 
 8 | The data widths are: 16-bit signed input, 28 bit signed output.
 9 | 
10 | Reference: https://en.wikipedia.org/wiki/Cascaded_integrator%E2%80%93comb_filter
11 | 
12 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/cic/cic5.v:
--------------------------------------------------------------------------------
 1 | // 5th order CIC filter with decimation factor of 5
 2 | // Author: Niels A. Moseley
 3 | //         Symbiotic EDA / Moseley Instruments
 4 | // 12-11-2018
 5 | 
 6 | module cic5(
 7 |     input  clk, 
 8 |     input  rst_n, 
 9 |     input  signed [15:0] d_in, 
10 |     output reg signed [27:0] d_out, 
11 |     output reg d_out_valid
12 |     );
13 | 
14 | reg signed [27:0] int_s  [1:5]; // integrator states
15 | reg signed [27:0] comb_s [1:5]; // comb filter states
16 | reg signed [27:0] tmp    [1:5];    // temporary var
17 | reg [2:0]  decimation_count;
18 | 
19 | integer i;
20 | 
21 |     always @(posedge clk)
22 |     begin
23 |         if (rst_n == 1'b0)
24 |         begin
25 |             for (i=1; i<=5; i=i+1) begin
26 |                 int_s[i]  <= 16'd0;
27 |                 comb_s[i] <= 28'd0;
28 |             end
29 |             decimation_count <= 0;            
30 |             d_out_valid <= 0;
31 |             d_out <= 0;
32 |         end
33 |         else
34 |         begin
35 |             // default updates
36 |             d_out_valid <= 1'b0;
37 |             decimation_count <= decimation_count + 1;
38 | 
39 |             // update the integrator filter states
40 |             int_s[1] <= int_s[1] + d_in;
41 |             for (i=2; i<=5; i=i+1) begin
42 |                 int_s[i] <= int_s[i] + int_s[i-1];
43 |             end
44 | 
45 |             // check if we can output new data
46 |             // at the decimated rate
47 |             
48 |             if (decimation_count == 3'd4)
49 |             begin
50 |                 // update the comb filter states
51 |                 tmp[1] = int_s[5] - comb_s[1];
52 |                 comb_s[1] <= int_s[5];
53 |                 for (i=2; i<=5; i=i+1) begin
54 |                     tmp[i] = tmp[i-1] - comb_s[i];
55 |                     comb_s[i] <= tmp[i-1];
56 |                 end
57 | 
58 |                 decimation_count <= 0;
59 |                 d_out_valid <= 1'b1;
60 |                 d_out <= tmp[5];
61 |             end;
62 |         end;
63 |     end
64 | 
65 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/cic/cic5_tb.v:
--------------------------------------------------------------------------------
 1 | // Testbench for 5th order CIC filter with decimation factor of 5
 2 | // Author: Niels A. Moseley
 3 | //         Symbiotic EDA / Moseley Instruments
 4 | //
 5 | // 12-11-2018
 6 | 
 7 | 
 8 | module tb;
 9 | 
10 | reg  clk   = 0;
11 | reg  rst_n = 0;
12 | reg  signed [15:0] d_in = 0;
13 | wire signed [27:0] d_out = 0;
14 | wire d_out_valid;
15 | 
16 | // clock generation
17 | always #1 clk=~clk;
18 | 
19 | // devices under test
20 | cic5 dut(clk, rst_n, d_in, d_out, d_out_valid);
21 | 
22 | initial
23 | begin
24 |     $dumpfile("cic5_tb.vcd");
25 |     $dumpvars;
26 |     d_in     <= 16'h7fff;
27 |     #4 rst_n = 1'b1;
28 |     #60 d_in <= -16'h7fff;
29 |     #60 $finish;
30 | end
31 | 
32 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/cic/run_testbench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | iverilog -o cic5_tb.vvp cic5_tb.v cic5.v
4 | vvp cic5_tb.vvp
5 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/decoder/.gitignore:
--------------------------------------------------------------------------------
1 | decode_*.v
2 | set_*.v
3 | clr_*.v
4 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/decoder/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Simple decoders
 4 | 
 5 | def gen_decode(n, t):
 6 |     if t == 0: body = "assign dout = din[sel];"
 7 |     if t == 1: body = "assign dout = din >> sel;"
 8 |     if t == 2: body = "wire [<n>-1:0] p = din << sel; assign dout = p[<n>-1];"
 9 |     with open("decode_%d_%d.v" % (n, t), "w") as f:
10 |         s = 1
11 |         while 2**s < n:
12 |             s += 1
13 |         print("""
14 | module decode_<n>_<t> (input [<s>-1:0] sel, input [<n>-1:0] din, output dout);
15 |   <body>
16 | endmodule
17 | """.replace("<body>", body).replace("<n>", str(n)).replace("<t>", str(t)).replace("<s>", str(s)), file=f)
18 | 
19 | for n in [1, 2, 3, 4, 5, 6, 7, 8, 10, 15, 24, 32, 55, 64]:
20 |     for t in range(3):
21 |         gen_decode(n, t)
22 | 
23 | 
24 | ## Set/Clear bit
25 | 
26 | def gen_setclr(n, t, v):
27 |     if t == 0: body = "dout[sel] = <v>;"
28 |     if t == 1 and v == 0: body = "dout = dout & ~(1 << sel);"
29 |     if t == 1 and v == 1: body = "dout = dout |  (1 << sel);"
30 |     with open("%s_%d_%d.v" % ("set" if v else "clr", n, t), "w") as f:
31 |         s = 1
32 |         while 2**s < n:
33 |             s += 1
34 |         print("""
35 | module <setclr>_<n>_<t> (input [<s>-1:0] sel, input [<n>-1:0] din, output reg [<n>-1:0] dout);
36 |   always @* begin dout = din; <body> end
37 | endmodule
38 | """.replace("<body>", body).replace("<setclr>", "set" if v else "clr").replace("<n>", str(n)) \
39 |    .replace("<t>", str(t)).replace("<s>", str(s)).replace("<v>", str(v)), file=f)
40 | 
41 | for n in [1, 2, 3, 4, 5, 6, 7, 8, 10, 15, 24, 32, 55, 64]:
42 |     for t in range(2):
43 |         for v in range(2):
44 |             gen_setclr(n, t, v)
45 | 
46 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/dspmac/.gitignore:
--------------------------------------------------------------------------------
1 | dspmac_*.v
2 | *.vvp


--------------------------------------------------------------------------------
/verilog/benchmarks_small/dspmac/README.md:
--------------------------------------------------------------------------------
 1 | # DSPMAC - A multiply-accumulate DSP structure
 2 | 
 3 | This module takes two fixed-point operands (A and B) and can perform the following operations:
 4 | 
 5 | * CLEAR - the accumulator is loaded with 0.
 6 | * MUL   - the accumulator is set to A*B.
 7 | * MAC   - the result of A*B is added to the accumulator.
 8 | * NOP   - the accumulator is left untouched.
 9 | 
10 | Several verions of the module are generated by the Python script,
11 | differing in the number of input and output bits.
12 | 
13 | This module forms the computational heart of FIR/IIR filter engines
14 | and generic DSP processors.
15 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/dspmac/dspmac.template:
--------------------------------------------------------------------------------
 1 | // DSP multiply-and-accumulate block without saturation
 2 | // Author: Niels A. Moseley
 3 | 
 4 | module dspmac_<w>_<s>(
 5 |     input clk, 
 6 |     input rst_n, 
 7 |     input [1:0] opcode,
 8 |     input signed [<w>-1:0] a_in, 
 9 |     input signed [<w>-1:0] b_in, 
10 |     output signed [<s>-1:0] accu_out
11 |     );
12 | 
13 | reg signed [<s>-1:0] accu;
14 | 
15 | parameter [1:0] OP_CLR = 2'b00,
16 |                 OP_MUL = 2'b01,
17 |                 OP_MAC = 2'b10,
18 |                 OP_NOP = 2'b11;    
19 | 
20 | always @(posedge clk or negedge rst_n)
21 | begin
22 |     if (rst_n == 1'b0)
23 |         accu <= <s>'d0;  // set accumulator to zero
24 |     else
25 |     begin
26 |         case(opcode)
27 |         OP_CLR:
28 |             accu <= 0;
29 |         OP_MUL:
30 |             accu <= a_in*b_in;
31 |         OP_MAC:
32 |             accu <= accu+a_in*b_in;
33 |         OP_NOP:
34 |             accu <= accu;
35 |         default:
36 |             accu <= accu;
37 |         endcase
38 |     end
39 | end
40 | 
41 | assign accu_out = accu;
42 | 
43 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/dspmac/dspmac_16_40_tb.v:
--------------------------------------------------------------------------------
 1 | // Testbench for sddac.v
 2 | // Author: Niels A. Moseley
 3 | 
 4 | `include "constants.vams"
 5 | 
 6 | module tb;
 7 | 
 8 | reg  clk   = 0;
 9 | reg  rst_n = 0;
10 | reg  [1:0] opcode = 2'b11;      // nop
11 | reg  signed [15:0] a_bus = 0;
12 | reg  signed [15:0] b_bus = 0;
13 | wire signed [39:0] result;
14 | 
15 | // clock generation
16 | always #1 clk=~clk;
17 | 
18 | // devices under test
19 | dspmac_16_40 dut(clk, rst_n, opcode, a_bus, b_bus, result);
20 | 
21 | initial
22 | begin
23 |     $dumpfile("dspmac_16_40_tb.vcd");
24 |     $dumpvars;
25 | 
26 |     opcode = 2'b00;     //CLR
27 | 
28 |     #4 rst_n = 1'b1;
29 |     a_bus    = 16'd32767;
30 |     b_bus    = 16'd32767;
31 |     opcode   = 2'b01;       // MUL
32 |     #2 opcode   = 2'b10;    // MAC
33 |     #2 opcode   = 2'b11;    // NOP
34 |     #2 opcode   = 2'b11;    // NOP
35 |     #2 $finish;
36 |     
37 | end
38 | 
39 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/dspmac/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate population count of a specified width
 4 | 
 5 | import math
 6 | 
 7 | def gen_dspmac(opBits, accuBits, template):
 8 |     with open("dspmac_%d_%d.v" % (opBits, accuBits), "w") as f:
 9 |         print(template.replace("<w>", str(opBits)).replace("<s>", str(accuBits)), file=f)
10 | 
11 | with open('dspmac.template','rt') as templatefile:
12 |     template = templatefile.read()
13 |     
14 |     for opBits in [8,12,16,20,24]:
15 |         gen_dspmac(opBits, opBits*2+8, ''.join(template))
16 | 
17 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/dspmac/run_testbench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | iverilog -m va_math -o dspmac_16_40_tb.vvp dspmac_16_40_tb.v dspmac_16_40.v
4 | vvp dspmac_16_40_tb.vvp


--------------------------------------------------------------------------------
/verilog/benchmarks_small/lfsr/.gitignore:
--------------------------------------------------------------------------------
1 | lfsr_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/lfsr/README.md:
--------------------------------------------------------------------------------
1 | # LFSR - Linear Feedback Shift Register
2 | 
3 | Linear feedback shift registers can be thought of as pseudo-random number generators.
4 | Possibly uses in digital design include efficient counters, noise/stimulus generators, etc.
5 | 
6 | The python script generates maximal-length XNOR-based LFSRs from 3 to 168 bits with no more than
7 | 5 taps, based on coefficients https://www.xilinx.com/support/documentation/application_notes/xapp210.pdf
8 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/lfsr/generate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #  From: https://www.xilinx.com/support/documentation/application_notes/xapp210.pdf
  4 | taps = [
  5 |         (3,2),
  6 |         (4,3),
  7 |         (5,3),
  8 |         (6,5),
  9 |         (7,6),
 10 |         (8,6,5,4),
 11 |         (9,5),
 12 |         (10,7),
 13 |         (11,9),
 14 |         (12,6,4,1),
 15 |         (13,4,3,1),
 16 |         (14,5,3,1),
 17 |         (15,14),
 18 |         (16,15,13,4),
 19 |         (17,14),
 20 |         (18,11),
 21 |         (19,6,2,1),
 22 |         (20,17),
 23 |         (21,19),
 24 |         (22,21),
 25 |         (23,18),
 26 |         (24,23,22,17),
 27 |         (25,22),
 28 |         (26,6,2,1),
 29 |         (27,5,2,1),
 30 |         (28,25),
 31 |         (29,27),
 32 |         (30,6,4,1),
 33 |         (31,28),
 34 |         (32,22,2,1),
 35 |         (33,20),
 36 |         (34,27,2,1),
 37 |         (35,33),
 38 |         (36,25),
 39 |         (37,5,4,3,2,1),
 40 |         (38,6,5,1),
 41 |         (39,35),
 42 |         (40,38,21,19),
 43 |         (40,38,21,19),
 44 |         (41,38),
 45 |         (42,41,20,19),
 46 |         (43,42,38,37),
 47 |         (44,43,18,17),
 48 |         (45,44,42,41),
 49 |         (46,45,26,25),
 50 |         (47,42),
 51 |         (48,47,21,20),
 52 |         (49,40),
 53 |         (50,49,24,23),
 54 |         (51,50,36,35),
 55 |         (52,49),
 56 |         (53,52,38,37),
 57 |         (54,53,18,17),
 58 |         (55,31),
 59 |         (56,55,35,34),
 60 |         (57,50),
 61 |         (58,39),
 62 |         (59,58,38,37),
 63 |         (60,59),
 64 |         (61,60,46,45),
 65 |         (62,61,6,5),
 66 |         (63,62),
 67 |         (64,63,61,60),
 68 |         (65,47),
 69 |         (66,65,57,56),
 70 |         (67,66,58,57),
 71 |         (68,59),
 72 |         (69,67,42,40),
 73 |         (70,69,55,54),
 74 |         (71,65),
 75 |         (72,66,25,19),
 76 |         (73,48),
 77 |         (74,73,59,58),
 78 |         (75,74,65,64),
 79 |         (76,75,41,40),
 80 |         (77,76,47,46),
 81 |         (78,77,59,58),
 82 |         (79,70),
 83 |         (80,79,43,42),
 84 |         (81,77),
 85 |         (82,79,47,44),
 86 |         (83,82,38,37),
 87 |         (84,71),
 88 |         (85,84,58,57),
 89 |         (86,85,74,73),
 90 |         (87,74),
 91 |         (88,87,17,16),
 92 |         (89,51),
 93 |         (90,89,72,71),
 94 |         (91,90,8,7),
 95 |         (92,91,80,79),
 96 |         (93,91),
 97 |         (94,73),
 98 |         (95,84),
 99 |         (96,94,49,47),
100 |         (97,91),
101 |         (98,87),
102 |         (99,97,54,52),
103 |         (100,63),
104 |         (101,100,95,94),
105 |         (102,101,36,35),
106 |         (103,94),
107 |         (104,103,94,93),
108 |         (105,89),
109 |         (106,91),
110 |         (107,105,44,42),
111 |         (108,77),
112 |         (109,108,103,102),
113 |         (110,109,98,97),
114 |         (111,101),
115 |         (112,110,69,67),
116 |         (113,104),
117 |         (114,113,33,32),
118 |         (115,114,101,100),
119 |         (116,115,46,45),
120 |         (117,115,99,97),
121 |         (118,85),
122 |         (119,111),
123 |         (120,113,9,2),
124 |         (121,103),
125 |         (122,121,63,62),
126 |         (123,121),
127 |         (124,87),
128 |         (125,124,18,17),
129 |         (126,125,90,89),
130 |         (127,126),
131 |         (128,126,101,99),
132 |         (129,124),
133 |         (130,127),
134 |         (131,130,84,83),
135 |         (132,103),
136 |         (133,132,82,81),
137 |         (134,77),
138 |         (135,124),
139 |         (136,135,11,10),
140 |         (137,116),
141 |         (138,137,131,130),
142 |         (139,136,134,131),
143 |         (140,111),
144 |         (141,140,110,109),
145 |         (142,121),
146 |         (143,142,123,122),
147 |         (144,143,75,74),
148 |         (145,93),
149 |         (146,145,87,86),
150 |         (147,146,110,109),
151 |         (148,121),
152 |         (149,148,40,39),
153 |         (150,97),
154 |         (151,148),
155 |         (152,151,87,86),
156 |         (153,152),
157 |         (154,152,27,25),
158 |         (155,154,124,123),
159 |         (156,155,41,40),
160 |         (157,156,131,130),
161 |         (158,157,132,131),
162 |         (159,128),
163 |         (160,159,142,141),
164 |         (161,143),
165 |         (162,161,75,74),
166 |         (163,162,104,103),
167 |         (164,163,151,150),
168 |         (165,164,135,134),
169 |         (166,165,128,127),
170 |         (167,161),
171 |         (168,166,153,151),
172 | ]
173 | 
174 | def gen_lfsr(taps):
175 |     length = taps[0]
176 |     with open("lfsr_%d.v" % (length), "w") as f:
177 |         print("""
178 | (* top *)
179 | module lfsr_{0} (input clk, output dout);
180 |   reg [{0}:1] state = {0}'b0;
181 |   always @(posedge clk)
182 |     state <= {{ state[{0}-1:1], {1} }};
183 |   assign dout = state[{0}];
184 | endmodule
185 | """.format(length, ' ~^ '.join([ "state[%d]" % t for t in taps])), file=f)
186 | 
187 | for t in taps:
188 |     gen_lfsr(t)
189 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/macc/.gitignore:
--------------------------------------------------------------------------------
1 | macc_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/macc/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def rtl_macc(name, AW, BW, AREG, BREG, MREG, Asigned, Bsigned, CEA, CEB, CEM, CEP):
 4 |     return """(* top *)
 5 | module {0} #(parameter AW={1}, BW={2}, AREG={3}, BREG={4}, MREG={5}) (input clk, CEA, CEB, CEM, CEP, input {6}[AW-1:0] A, input {7}[BW-1:0] B, output reg {8}[{9}-1:0] P);
 6 | reg {6}[AW-1:0] Ar;
 7 | reg {7}[BW-1:0] Br;
 8 | reg {8}[AW+BW-1:0] Mr;
 9 | generate
10 |     if (AREG) begin
11 |         always @(posedge clk) if ({10}) Ar <= A;
12 |     end
13 |     else
14 |         always @* Ar <= A;
15 |     if (BREG) begin
16 |         always @(posedge clk) if ({11}) Br <= B;
17 |     end
18 |     else
19 |         always @* Br <= B;
20 |     if (MREG) begin
21 |         always @(posedge clk) if ({12}) Mr <= Ar * Br;
22 |     end
23 |     else
24 |         always @* Mr <= Ar * Br;
25 |     always @(posedge clk) if ({13}) P <= P + Mr;
26 | endgenerate
27 | endmodule""".format(name, AW, BW, 
28 |                     '1' if AREG else '0', '1' if BREG else '0', '1' if MREG else '0',
29 |                     'signed ' if Asigned else '', 'signed ' if Bsigned else '', 'signed ' if Asigned and Bsigned else '',
30 |                     int(AW)+int(BW)+5,
31 |                     'CEA' if CEA else '1', 'CEB' if CEB else '1', 'CEM' if CEM else '1', 'CEP' if CEP else '1')
32 | 
33 | # https://stackoverflow.com/a/1482316
34 | from itertools import chain, combinations
35 | def powerset(iterable):
36 |     "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
37 |     s = list(iterable)
38 |     return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
39 | 
40 | def gen_macc(aRange, bRange, reg="AB"):
41 |     for A in aRange:
42 |         for B in bRange:
43 |             for R in map(lambda i:''.join(i), powerset(reg.replace('P',''))): # Register existence
44 |                 for E in map(lambda i:''.join(i), powerset(R + 'P' if 'P' in reg else '')): # Enable
45 |                     with open("macc_%s_%s_%s_%s.v" % (A,B,R,E), "w") as f:
46 |                         print(rtl_macc('macc_%s_%s_%s_%s' % (A,B,R,E),          # name
47 |                                       A.rstrip('s'), B.rstrip('s'),             # [AB]W
48 |                                       'A' in R, 'B' in R, 'M' in R,             # [ABM]REG
49 |                                       's' in A, 's' in B,                       # [AB]_signed
50 |                                       'A' in E, 'B' in E, 'M' in E, 'P' in E,   # CE[ABMP]
51 |                                       ), file=f)
52 |                     if A != B:
53 |                         with open("macc_%s_%s_%s_%s.v" % (B,A,R,E), "w") as f:
54 |                             print(rtl_macc('macc_%s_%s_%s_%s' % (B,A,R,E),          # name
55 |                                           B.rstrip('s'), A.rstrip('s'),             # [AB]W
56 |                                           'B' in R, 'A' in R, 'M' in R,             # [ABM]REG
57 |                                           's' in B, 's' in A,                       # [AB]_signed
58 |                                           'B' in E, 'A' in E, 'M' in E, 'P' in E,   # CE[ABMP]
59 |                                           ), file=f)
60 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/macc/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from common import gen_macc
 4 | 
 5 | ARange = ['16','16s','24','24s','32','32s']
 6 | BRange = ['2','2s','4','4s','8','8s','16','16s']
 7 | 
 8 | if __name__ == "__main__":
 9 |     gen_macc(ARange, BRange)
10 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mul/.gitignore:
--------------------------------------------------------------------------------
1 | mul_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mul/README.md:
--------------------------------------------------------------------------------
1 | # Small multipliers
2 | 
3 | The python script generates a selection of multiplers of varying sizes.
4 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mul/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def rtl_mul(name, AW, BW, AREG, BREG, MREG, PREG, Asigned, Bsigned, CEA, CEB, CEM, CEP):
 4 |     return """(* top *)
 5 | module {0} #(parameter AW={1}, BW={2}, AREG={3}, BREG={4}, MREG={5}, PREG={6}) (input clk, CEA, CEB, CEM, CEP, input {7}[AW-1:0] A, input {8}[BW-1:0] B, output reg {9}[AW+BW-1:0] P);
 6 | reg {7}[AW-1:0] Ar;
 7 | reg {8}[BW-1:0] Br;
 8 | reg {9}[AW+BW-1:0] Mr;
 9 | generate
10 |     if (AREG) begin
11 |         always @(posedge clk) if ({10}) Ar <= A;
12 |     end
13 |     else
14 |         always @* Ar <= A;
15 |     if (BREG) begin
16 |         always @(posedge clk) if ({11}) Br <= B;
17 |     end
18 |     else
19 |         always @* Br <= B;
20 |     if (MREG) begin
21 |         always @(posedge clk) if ({12}) Mr <= Ar * Br;
22 |     end
23 |     else
24 |         always @* Mr <= Ar * Br;
25 |     if (PREG) begin
26 |         always @(posedge clk) if ({13}) P <= Mr;
27 |     end
28 |     else
29 |         always @* P <= Mr;
30 | endgenerate
31 | endmodule""".format(name, AW, BW, 
32 |                     '1' if AREG else '0', '1' if BREG else '0', '1' if MREG else '0', '1' if PREG else '0',
33 |                     'signed ' if Asigned else '', 'signed ' if Bsigned else '', 'signed ' if Asigned and Bsigned else '',
34 |                     'CEA' if CEA else '1', 'CEB' if CEB else '1', 'CEM' if CEM else '1', 'CEP' if CEP else '1')
35 | 
36 | # https://stackoverflow.com/a/1482316
37 | from itertools import chain, combinations
38 | def powerset(iterable):
39 |     "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
40 |     s = list(iterable)
41 |     return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
42 | 
43 | def gen_mul(aRange, bRange, reg="ABP"):
44 |     for A in aRange:
45 |         for B in bRange:
46 |             for R in map(lambda i:''.join(i), powerset(reg)): # Register existence
47 |                 for E in map(lambda i:''.join(i), powerset(R)): # Enable
48 |                     with open("mul_%s_%s_%s_%s.v" % (A,B,R,E), "w") as f:
49 |                         print(rtl_mul('mul_%s_%s_%s_%s' % (A,B,R,E),            # name
50 |                                       A.rstrip('s'), B.rstrip('s'),             # [AB]W
51 |                                       'A' in R, 'B' in R, 'M' in R, 'P' in R,   # [ABMP]REG
52 |                                       's' in A, 's' in B,                       # [AB]_signed
53 |                                       'A' in E, 'B' in E, 'M' in E, 'P' in E,   # CE[ABMP]
54 |                                       ), file=f)
55 |                     if A != B:
56 |                         with open("mul_%s_%s_%s_%s.v" % (B,A,R,E), "w") as f:
57 |                             print(rtl_mul('mul_%s_%s_%s_%s' % (B,A,R,E),            # name
58 |                                           B.rstrip('s'), A.rstrip('s'),             # [AB]W
59 |                                           'B' in R, 'A' in R, 'M' in R, 'P' in R,   # [ABMP]REG
60 |                                           's' in B, 's' in A,                       # [AB]_signed
61 |                                           'B' in E, 'A' in E, 'M' in E, 'P' in E,   # CE[ABMP]
62 |                                           ), file=f)
63 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mul/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from common import gen_mul
 4 | 
 5 | ARange = ['16','16s','24','24s','32','32s']
 6 | BRange = ['2','2s','4','4s','8','8s','16','16s']
 7 | 
 8 | if __name__ == "__main__":
 9 |     gen_mul(ARange, BRange)
10 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/muladd/.gitignore:
--------------------------------------------------------------------------------
1 | muladd_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/muladd/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def rtl_muladd(name, AW, BW, CW, AREG, BREG, CREG, MREG, PREG, Asigned, Bsigned, Csigned, CEA, CEB, CEC, CEM, CEP):
 4 |     return """(* top *)
 5 | module {name} #(parameter AW={AW}, BW={BW}, CW={CW}, AREG={AREG}, BREG={BREG}, CREG={CREG}, MREG={MREG}, PREG={PREG}) (input clk, CEA, CEB, CEC, CEM, CEP, input {Asigned}[AW-1:0] A, input {Bsigned}[BW-1:0] B, input {Csigned}[CW-1:0] C, output reg {Msigned}[CW-1:0] P);
 6 | reg {Asigned}[AW-1:0] Ar;
 7 | reg {Bsigned}[BW-1:0] Br;
 8 | reg {Csigned}[CW-1:0] Cr;
 9 | reg {Msigned}[CW-1:0] Mr;
10 | generate
11 |     if (AREG) begin
12 |         always @(posedge clk) if ({CEA}) Ar <= A;
13 |     end
14 |     else
15 |         always @* Ar <= A;
16 |     if (BREG) begin
17 |         always @(posedge clk) if ({CEB}) Br <= B;
18 |     end
19 |     else
20 |         always @* Br <= B;
21 |     if (CREG) begin
22 |         always @(posedge clk) if ({CEC}) Cr <= C;
23 |     end
24 |     else
25 |         always @* Cr <= C;
26 |     if (MREG) begin
27 |         always @(posedge clk) if ({CEM}) Mr <= Ar * Br;
28 |     end
29 |     else
30 |         always @* Mr <= Ar * Br;
31 |     if (PREG) begin
32 |         always @(posedge clk) if ({CEP}) P <= Cr + Mr;
33 |     end
34 |     else
35 |         always @* P <= Cr + Mr;
36 | endgenerate
37 | endmodule""".format(name=name, AW=AW, BW=BW, CW=CW,
38 |                     AREG='1' if AREG else '0', BREG='1' if BREG else '0', CREG='1' if CREG else '0',  MREG='1' if MREG else '0', PREG='1' if PREG else '0',
39 |                     Asigned='signed ' if Asigned else '', Bsigned='signed ' if Bsigned else '', Csigned='signed ' if Csigned else '', Msigned='signed ' if Asigned and Bsigned and Csigned else '',
40 |                     CEA='CEA' if CEA else '1', CEB='CEB' if CEB else '1', CEC='CEC' if CEC else '1', CEM='CEM' if CEM else '1', CEP='CEP' if CEP else '1')
41 | 
42 | # https://stackoverflow.com/a/1482316
43 | from itertools import chain, combinations
44 | def powerset(iterable):
45 |     "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
46 |     s = list(iterable)
47 |     return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
48 | 
49 | def gen_muladd(aRange, bRange, cRange, reg="ABCP"):
50 |     for A in aRange:
51 |         for B in bRange:
52 |             for C in cRange:
53 |                 for R in map(lambda i:''.join(i), powerset(reg)): # Register existence
54 |                     for E in map(lambda i:''.join(i), powerset(R)): # Enable
55 |                         with open("muladd_%s_%s_%s_%s_%s.v" % (A,B,C,R,E), "w") as f:
56 |                             print(rtl_muladd('muladd_%s_%s_%s_%s_%s' % (A,B,C,R,E),               # name
57 |                                              A.rstrip('s'), B.rstrip('s'), C.rstrip('s'),      # [ABC]W
58 |                                              'A' in R, 'B' in R, 'C' in R, 'M' in R, 'P' in R, # [ABCMP]REG
59 |                                              's' in A, 's' in B, 's' in C,                     # [ABC]_signed
60 |                                              'A' in E, 'B' in E, 'C' in E, 'M' in E, 'P' in E, # CE[ABCMP]
61 |                                              ), file=f)
62 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/muladd/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from common import gen_muladd
 4 | 
 5 | ARange = ['16','32s']
 6 | BRange = ['8','16s']
 7 | CRange = ['32','40s']
 8 | 
 9 | if __name__ == "__main__":
10 |     gen_muladd(ARange, BRange, CRange)
11 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mux/.gitignore:
--------------------------------------------------------------------------------
1 | mux_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mux/README.md:
--------------------------------------------------------------------------------
1 | # Small multiplexers
2 | 
3 | The python script generates a selection of multiplexer descriptions, using variable 
4 | length index (e.g. `assign a = b [c]`) as well as `case` and `if`-`else` (balanced
5 | and unbalanced) styles, across a variety of power-of-2 and non-power-of-2 values
6 | for a number of inputs, as well as input width.
7 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mux/common.py:
--------------------------------------------------------------------------------
 1 | from math import log2, ceil
 2 | 
 3 | def gen_mux_index(N,W):
 4 |     with open("mux_index_%d_%d.v" % (N,W), "w") as f:
 5 |         print("""
 6 | (* top *)
 7 | module mux_index_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output [W-1:0] o);
 8 | assign o = i[s*W+:W];
 9 | endmodule
10 | """.format(N,W), file=f)
11 | 
12 | def gen_mux_case(N,W):
13 |     with open("mux_case_%d_%d.v" % (N,W), "w") as f:
14 |         print("""
15 | (* top *)
16 | module mux_case_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output reg [W-1:0] o);
17 | always @*
18 |     case (s)""".format(N,W), file=f)
19 |         for i in range( N):
20 |             print("        {0}: o <= i[{0}*W+:W];".format(i), file=f)
21 |         print("""        default: o <= {W{1'bx}};
22 |     endcase
23 | endmodule
24 | """, file=f)
25 | 
26 | def gen_mux_if_unbal(N,W):
27 |     with open("mux_if_unbal_%d_%d.v" % (N,W), "w") as f:
28 |         print("""
29 | (* top *)
30 | module mux_if_unbal_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output reg [W-1:0] o);
31 | always @*""".format(N,W), file=f)
32 |         print("    if (s == 0) o <= i[0*W+:W];", file=f)
33 |         for i in range(1,N):
34 |             print("    else if (s == {0}) o <= i[{0}*W+:W];".format(i), file=f)
35 |         print("    else o <= {W{1'bx}};", file=f)
36 |         print("""
37 | endmodule
38 | """, file=f)
39 | 
40 | def _gen_mux_if_bal_rec(f, N, depth):
41 |     indent = ' ' * depth
42 |     if len(N) == 1:
43 |         print("    {0}o <= i[{1}*W+:W];".format(indent, N[0]), file=f)
44 |     else:
45 |         print("    {0}if (s[{1}] == 1'b0)".format(indent, depth), file=f)
46 |         i = ceil(log2(len(N))) - 1
47 |         _gen_mux_if_bal_rec(f, N[:2**i], depth+1)
48 |         if N[2**i:] != [None]*len(N[2**i:]):
49 |             print("    {0}else".format(indent), file=f)
50 |             _gen_mux_if_bal_rec(f, N[2**i:], depth+1)
51 | 
52 | def gen_mux_if_bal(N,W):
53 |     with open("mux_if_bal_%d_%d.v" % (N,W), "w") as f:
54 |         print("""
55 | (* top *)
56 | module mux_if_bal_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output reg [W-1:0] o);
57 | always @* begin""".format(N,W), file=f)
58 |         pad = (2 ** int(ceil(log2(N)))) - N
59 |         print("    o <= {{W{{1'bx}}}};", file=f)
60 |         _gen_mux_if_bal_rec(f, list(range(N)) + [None]*pad, 0)
61 |         print("""end
62 | endmodule
63 | """, file=f)
64 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/mux/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from common import *
 4 | 
 5 | if __name__ == "__main__":
 6 |     for N in [2,3,4,5] + [7,8,9] + [15,16,17] + [31,32,33]:
 7 |         for W in [1,2,3,4,5,8]:
 8 |             gen_mux_index(N,W)
 9 |             gen_mux_case(N,W)
10 |             gen_mux_if_bal(N,W)
11 |             gen_mux_if_unbal(N,W)
12 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/onehot/.gitignore:
--------------------------------------------------------------------------------
1 | onehot2bin_*.v
2 | bin2onehot_*.v
3 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/onehot/README.md:
--------------------------------------------------------------------------------
 1 | # Onehot - binary to one-hot and one-hot to binary encoder/decoders
 2 | 
 3 | An N-bit one-hot decoder has 2^N output signals.
 4 | Only one of the output signals can be '1' at any time.
 5 | The index of the output that is set high, is equal to
 6 | the (unsigned) binary value at the input of the decoder.
 7 | 
 8 | The python script generates one-hot encoders and decoders of varying widths.
 9 | 
10 | Binary to one-hot decoders are frequently used in D/A converters and RAM/ROM
11 | row and column selection circuits.
12 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/onehot/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate one-hot to binary and binary to one-hot decoders
 4 | 
 5 | import math
 6 | 
 7 | def gen_bin2onehot(nBits):
 8 |     obits = int(math.pow(2,nBits))    # calculate output bits
 9 |     with open("bin2onehot_%d.v" % (nBits), "w") as f:
10 |         print("""
11 | module bin2onehot_<n> (input [<n>-1:0] din, output reg [<b>-1:0] dout);
12 |   always @(din) begin
13 |     dout = 0;
14 |     dout[din] = 1'b1;
15 |   end
16 | endmodule
17 | """.replace("<n>", str(nBits)).replace("<b>", str(obits)), file=f)
18 | 
19 | 
20 | def gen_onehot2bin(nBits):
21 |     obits = int(math.ceil(math.log2(nBits)))    # calculate output bits
22 |     with open("onehot2bin_%d.v" % (nBits), "w") as f:
23 |         print("""
24 | module onehot2bin_<n> (input [<n>-1:0] din, output reg [<b>-1:0] dout);
25 |   always @(din) begin
26 |     dout = 0;
27 |     case(din)
28 | """.replace("<n>", str(nBits)).replace("<b>", str(obits)), file=f)
29 |         ## emit switch case data..
30 |         for I in range(0, nBits):
31 |             print("""      <n>'d<t> : dout = <b>'d<i>;""".replace("<t>", str(int(math.pow(2,I)))).replace("<i>", str(I)).replace("<b>", str(obits)).replace("<n>", str(nBits)), file=f)
32 |         print("""
33 |       default: ;
34 |     endcase
35 |   end
36 | endmodule""", file=f)
37 |         
38 | 
39 | for nBits in [1,2,3,4,5,6,7,8]:
40 |     gen_bin2onehot(nBits)
41 | 
42 | for nBits in [1,2,3,4,5,6,7,8,16,32,64]:
43 |     gen_onehot2bin(nBits)
44 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/popcount/.gitignore:
--------------------------------------------------------------------------------
1 | popcount_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/popcount/README.md:
--------------------------------------------------------------------------------
1 | # popcount
2 | 
3 | This module counts the number of '1' bits at it's input.
4 | It is known as the 'population count'.
5 | 
6 | The python script generates various popcount modules varying in input width.
7 | 
8 | Population counting is used in error correction coding, parity checking, machine learning and encryption applications.
9 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/popcount/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate population count of a specified width
 4 | 
 5 | import math
 6 | 
 7 | def gen_popcount(nBits):
 8 |     obits = int(math.ceil(math.log2(nBits)))    # calculate output bits
 9 |     with open("popcount_%d.v" % (nBits), "w") as f:
10 |         body = "din[0]"
11 |         for I in range(1, nBits):
12 |             body = body + " + din[" + str(I) + "]"
13 |         body = body + ";"
14 |         print("""
15 | module popcount_<n> (input [<n>-1:0] din, output [<b>-1:0] dout);
16 |   assign dout = <body>
17 | endmodule
18 | """.replace("<body>", body).replace("<n>", str(nBits)).replace("<b>", str(obits)), file=f)
19 | 
20 | for nBits in [2,3,4,5,6,7,8,16,32,64]:
21 |     gen_popcount(nBits)
22 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/priodecode/.gitignore:
--------------------------------------------------------------------------------
1 | priodecoder_*.v
2 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/priodecode/README.md:
--------------------------------------------------------------------------------
 1 | # priodecode - Priority decoders
 2 | 
 3 | The priority decoder takes a bit-vector of request signals and
 4 | lets the request with the most weight through. I.e. only one of the
 5 | output signals is high, or none of the output signals are high.
 6 | 
 7 | The Python script generates priority decoders of various widths.
 8 | 
 9 | Priority decoders are using in interrupt processing, where the
10 | interrupt with the highest priority should be serviced first.
11 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/priodecode/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Priority decoders
 4 | 
 5 | import math
 6 | 
 7 | def gen_priodecoder(nBits):
 8 |     with open("priodecoder_%d.v" % (nBits), "w") as f:
 9 |         print("""
10 | module priodecoder_<n> (input [<n>-1:0] din, output [<n>-1:0] dout);
11 |   assign dout = din & (~din-1);
12 | endmodule
13 | """.replace("<n>", str(nBits)), file=f)
14 | 
15 | for nBits in range(2,16+1):
16 |     gen_priodecoder(nBits)
17 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/ram/.gitignore:
--------------------------------------------------------------------------------
1 | syncram_*.v
2 | dualport_syncram_*.v
3 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/ram/dualport_syncram.template:
--------------------------------------------------------------------------------
 1 | // Dual-port Synchronous RAM template file
 2 | // Author: Niels A. Moseley
 3 | //
 4 | // Modelled after the ICE40 SBRAM blocks but with only one clock domain
 5 | // Also, it is assumed that the read enable is always asserted.
 6 | // I don't know what happens to the read data when the address is
 7 | // simultaneously written.
 8 | //
 9 | //
10 | 
11 | module dualport_syncram_<w>_<s> (
12 |     clk,        // common clock
13 |     cs,         // active-high chip select
14 |     we,         // active-high write enable
15 |     waddr,      // write address
16 |     wdata,      // write data input
17 |     raddr,      // read address
18 |     rdata,      // read data output
19 | );
20 | 
21 | input clk,cs,we;
22 | input [<w>-1:0] waddr;
23 | input [<w>-1:0] raddr;
24 | input [<s>-1:0] wdata;
25 | output reg [<s>-1:0] rdata;
26 | 
27 | reg [<s>-1:0] mem [0:<w>-1];
28 | 
29 | always @(posedge clk)
30 | begin
31 |     if (cs == 1'b1) begin
32 |         if (we) begin
33 |             mem[waddr] <= wdata;
34 |         end
35 |         rdata <= mem[raddr];
36 |     end // chip select
37 | end
38 | 
39 | endmodule
40 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/ram/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate various synchronous RAM types with 2^<w> addresses and <s> bit data path.
 4 | 
 5 | import math
 6 | 
 7 | # generic synchronous RAM
 8 | def gen_syncram(addrWidth, ioWidth, template):
 9 |     with open("syncram_%d_%d.v" % (addrWidth, ioWidth), "w") as f:
10 |         print(template.replace("<w>", str(addrWidth)).replace("<s>", str(ioWidth)), file=f)
11 | 
12 | # generic synchronous RAM with transparent write-through
13 | def gen_syncram_tw(addrWidth, ioWidth, template):
14 |     with open("syncram_tw_%d_%d.v" % (addrWidth, ioWidth), "w") as f:
15 |         print(template.replace("<w>", str(addrWidth)).replace("<s>", str(ioWidth)), file=f)
16 | 
17 | # generic single-write/single-read synchronous RAM
18 | def gen_dualportsyncram(addrWidth, ioWidth, template):
19 |     with open("dualport_syncram_%d_%d.v" % (addrWidth, ioWidth), "w") as f:
20 |         print(template.replace("<w>", str(addrWidth)).replace("<s>", str(ioWidth)), file=f)
21 | 
22 | 
23 | 
24 | with open('syncram.template','rt') as templatefile:
25 |     template = templatefile.read()
26 |     
27 |     for ioWidth in [4,8,12,16]:
28 |         for addrBits in [4,8,9,10,11,12]:
29 |             gen_syncram(addrBits, ioWidth, ''.join(template))
30 | 
31 | with open('syncram_tw.template','rt') as templatefile:
32 |     template = templatefile.read()
33 |     
34 |     for ioWidth in [4,8,12,16]:
35 |         for addrBits in [4,8,9,10,11,12]:
36 |             gen_syncram_tw(addrBits, ioWidth, ''.join(template))
37 | 
38 | with open('dualport_syncram.template','rt') as templatefile:
39 |     template = templatefile.read()
40 |     
41 |     for ioWidth in [4,7,8,12,16]:
42 |         for addrBits in [4,8,9,10,11,12]:
43 |             gen_dualportsyncram(addrBits, ioWidth, ''.join(template))
44 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/ram/syncram.template:
--------------------------------------------------------------------------------
 1 | // Synchronous RAM template file
 2 | // Author: Niels A. Moseley
 3 | //
 4 | // Data out holds its previous value when data is written
 5 | //
 6 | 
 7 | module syncram_<w>_<s> (
 8 |     clk,
 9 |     cs,         // active-high chip select
10 |     we,         // active-high write enable
11 |     addr_in,
12 |     data_in,
13 |     data_out    
14 | );
15 | 
16 | input clk,cs,we;
17 | input [<w>-1:0] addr_in;
18 | input [<s>-1:0] data_in;
19 | output reg [<s>-1:0] data_out;
20 | 
21 | reg [<s>-1:0] mem [0:<w>-1];
22 | 
23 | always @(posedge clk)
24 | begin
25 |     if (cs == 1'b1) begin
26 |         if (we) begin
27 |             mem[addr_in] <= data_in;
28 |         end else begin
29 |             data_out <= mem[addr_in];
30 |         end // write enable
31 |     end // chip select
32 | end
33 | 
34 | endmodule
35 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/ram/syncram_tw.template:
--------------------------------------------------------------------------------
 1 | // Synchronous RAM template file
 2 | // Author: Niels A. Moseley
 3 | //
 4 | // Featuring pass-through-on-write logic
 5 | //
 6 | 
 7 | module syncram_tw_<w>_<s> (
 8 |     clk,
 9 |     cs,         // active-high chip select
10 |     we,         // active-high write enable
11 |     addr_in,
12 |     data_in,
13 |     data_out    
14 | );
15 | 
16 | input clk,cs,we;
17 | input [<w>-1:0] addr_in;
18 | input [<s>-1:0] data_in;
19 | output reg [<s>-1:0] data_out;
20 | 
21 | reg [<s>-1:0] mem [0:<w>-1];
22 | 
23 | always @(posedge clk)
24 | begin
25 |     if (cs == 1'b1) begin
26 |         if (we) begin
27 |             data_out <= data_in;    // pass-through data at input to output during write operations
28 |             mem[addr_in] <= data_in;
29 |         end else begin
30 |             data_out <= mem[addr_in];
31 |         end // write enable
32 |     end // chip select
33 | end
34 | 
35 | endmodule
36 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/.gitignore:
--------------------------------------------------------------------------------
1 | *.vcd
2 | *.vvp
3 | *_netlist.v
4 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/README.md:
--------------------------------------------------------------------------------
 1 | # Various - various designs
 2 | 
 3 | The following designs can be found here:
 4 | 
 5 | ## latch8
 6 | 
 7 | A simple 8-bit latch, like grandmother used to make them.
 8 | 
 9 | ## crc32
10 | 
11 | A 32-bit CRC based on https://msdn.microsoft.com/en-us/library/dd905031.aspx
12 | It has an 8-bit input and 32-bit output.
13 | The CRC is updated on every clock.
14 | 
15 | ## pwm256
16 | 
17 | An 8-bit counter and comparator can generate a pulse-width modulated single-bit output.
18 | This PWM module can be used to control the brightness of an LED, or be used (after analogue filtering) as a D/A converter featuring impressive intermodulation distortion.
19 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/crc32.v:
--------------------------------------------------------------------------------
  1 | // CRC32 based on https://msdn.microsoft.com/en-us/library/dd905031.aspx
  2 | //
  3 | // 
  4 | //
  5 | //
  6 | //
  7 | 
  8 | module crc32 (input [7:0] din, input clk, input rst_n, output [31:0] dout);
  9 | 
 10 |   reg [31:0] crctbl [255:0];
 11 |   reg [31:0] crc;
 12 | 
 13 |   initial
 14 |     begin
 15 |         crctbl[0] <= 32'h00000000;
 16 |         crctbl[1] <= 32'h77073096;
 17 |         crctbl[2] <= 32'hEE0E612C;
 18 |         crctbl[3] <= 32'h990951BA;
 19 |         crctbl[4] <= 32'h076DC419;
 20 |         crctbl[5] <= 32'h706AF48F;
 21 |         crctbl[6] <= 32'hE963A535;
 22 |         crctbl[7] <= 32'h9E6495A3;
 23 |         crctbl[8] <= 32'h0EDB8832;
 24 |         crctbl[9] <= 32'h79DCB8A4;
 25 |         crctbl[10] <= 32'hE0D5E91E;
 26 |         crctbl[11] <= 32'h97D2D988;
 27 |         crctbl[12] <= 32'h09B64C2B;
 28 |         crctbl[13] <= 32'h7EB17CBD;
 29 |         crctbl[14] <= 32'hE7B82D07;
 30 |         crctbl[15] <= 32'h90BF1D91;
 31 |         crctbl[16] <= 32'h1DB71064;
 32 |         crctbl[17] <= 32'h6AB020F2;
 33 |         crctbl[18] <= 32'hF3B97148;
 34 |         crctbl[19] <= 32'h84BE41DE;
 35 |         crctbl[20] <= 32'h1ADAD47D;
 36 |         crctbl[21] <= 32'h6DDDE4EB;
 37 |         crctbl[22] <= 32'hF4D4B551;
 38 |         crctbl[23] <= 32'h83D385C7;
 39 |         crctbl[24] <= 32'h136C9856;
 40 |         crctbl[25] <= 32'h646BA8C0;
 41 |         crctbl[26] <= 32'hFD62F97A;
 42 |         crctbl[27] <= 32'h8A65C9EC;
 43 |         crctbl[28] <= 32'h14015C4F;
 44 |         crctbl[29] <= 32'h63066CD9;
 45 |         crctbl[30] <= 32'hFA0F3D63;
 46 |         crctbl[31] <= 32'h8D080DF5;
 47 |         crctbl[32] <= 32'h3B6E20C8;
 48 |         crctbl[33] <= 32'h4C69105E;
 49 |         crctbl[34] <= 32'hD56041E4;
 50 |         crctbl[35] <= 32'hA2677172;
 51 |         crctbl[36] <= 32'h3C03E4D1;
 52 |         crctbl[37] <= 32'h4B04D447;
 53 |         crctbl[38] <= 32'hD20D85FD;
 54 |         crctbl[39] <= 32'hA50AB56B;
 55 |         crctbl[40] <= 32'h35B5A8FA;
 56 |         crctbl[41] <= 32'h42B2986C;
 57 |         crctbl[42] <= 32'hDBBBC9D6;
 58 |         crctbl[43] <= 32'hACBCF940;
 59 |         crctbl[44] <= 32'h32D86CE3;
 60 |         crctbl[45] <= 32'h45DF5C75;
 61 |         crctbl[46] <= 32'hDCD60DCF;
 62 |         crctbl[47] <= 32'hABD13D59;
 63 |         crctbl[48] <= 32'h26D930AC;
 64 |         crctbl[49] <= 32'h51DE003A;
 65 |         crctbl[50] <= 32'hC8D75180;
 66 |         crctbl[51] <= 32'hBFD06116;
 67 |         crctbl[52] <= 32'h21B4F4B5;
 68 |         crctbl[53] <= 32'h56B3C423;
 69 |         crctbl[54] <= 32'hCFBA9599;
 70 |         crctbl[55] <= 32'hB8BDA50F;
 71 |         crctbl[56] <= 32'h2802B89E;
 72 |         crctbl[57] <= 32'h5F058808;
 73 |         crctbl[58] <= 32'hC60CD9B2;
 74 |         crctbl[59] <= 32'hB10BE924;
 75 |         crctbl[60] <= 32'h2F6F7C87;
 76 |         crctbl[61] <= 32'h58684C11;
 77 |         crctbl[62] <= 32'hC1611DAB;
 78 |         crctbl[63] <= 32'hB6662D3D;
 79 |         crctbl[64] <= 32'h76DC4190;
 80 |         crctbl[65] <= 32'h01DB7106;
 81 |         crctbl[66] <= 32'h98D220BC;
 82 |         crctbl[67] <= 32'hEFD5102A;
 83 |         crctbl[68] <= 32'h71B18589;
 84 |         crctbl[69] <= 32'h06B6B51F;
 85 |         crctbl[70] <= 32'h9FBFE4A5;
 86 |         crctbl[71] <= 32'hE8B8D433;
 87 |         crctbl[72] <= 32'h7807C9A2;
 88 |         crctbl[73] <= 32'h0F00F934;
 89 |         crctbl[74] <= 32'h9609A88E;
 90 |         crctbl[75] <= 32'hE10E9818;
 91 |         crctbl[76] <= 32'h7F6A0DBB;
 92 |         crctbl[77] <= 32'h086D3D2D;
 93 |         crctbl[78] <= 32'h91646C97;
 94 |         crctbl[79] <= 32'hE6635C01;
 95 |         crctbl[80] <= 32'h6B6B51F4;
 96 |         crctbl[81] <= 32'h1C6C6162;
 97 |         crctbl[82] <= 32'h856530D8;
 98 |         crctbl[83] <= 32'hF262004E;
 99 |         crctbl[84] <= 32'h6C0695ED;
100 |         crctbl[85] <= 32'h1B01A57B;
101 |         crctbl[86] <= 32'h8208F4C1;
102 |         crctbl[87] <= 32'hF50FC457;
103 |         crctbl[88] <= 32'h65B0D9C6;
104 |         crctbl[89] <= 32'h12B7E950;
105 |         crctbl[90] <= 32'h8BBEB8EA;
106 |         crctbl[91] <= 32'hFCB9887C;
107 |         crctbl[92] <= 32'h62DD1DDF;
108 |         crctbl[93] <= 32'h15DA2D49;
109 |         crctbl[94] <= 32'h8CD37CF3;
110 |         crctbl[95] <= 32'hFBD44C65;
111 |         crctbl[96] <= 32'h4DB26158;
112 |         crctbl[97] <= 32'h3AB551CE;
113 |         crctbl[98] <= 32'hA3BC0074;
114 |         crctbl[99] <= 32'hD4BB30E2;
115 |         crctbl[100] <= 32'h4ADFA541;
116 |         crctbl[101] <= 32'h3DD895D7;
117 |         crctbl[102] <= 32'hA4D1C46D;
118 |         crctbl[103] <= 32'hD3D6F4FB;
119 |         crctbl[104] <= 32'h4369E96A;
120 |         crctbl[105] <= 32'h346ED9FC;
121 |         crctbl[106] <= 32'hAD678846;
122 |         crctbl[107] <= 32'hDA60B8D0;
123 |         crctbl[108] <= 32'h44042D73;
124 |         crctbl[109] <= 32'h33031DE5;
125 |         crctbl[110] <= 32'hAA0A4C5F;
126 |         crctbl[111] <= 32'hDD0D7CC9;
127 |         crctbl[112] <= 32'h5005713C;
128 |         crctbl[113] <= 32'h270241AA;
129 |         crctbl[114] <= 32'hBE0B1010;
130 |         crctbl[115] <= 32'hC90C2086;
131 |         crctbl[116] <= 32'h5768B525;
132 |         crctbl[117] <= 32'h206F85B3;
133 |         crctbl[118] <= 32'hB966D409;
134 |         crctbl[119] <= 32'hCE61E49F;
135 |         crctbl[120] <= 32'h5EDEF90E;
136 |         crctbl[121] <= 32'h29D9C998;
137 |         crctbl[122] <= 32'hB0D09822;
138 |         crctbl[123] <= 32'hC7D7A8B4;
139 |         crctbl[124] <= 32'h59B33D17;
140 |         crctbl[125] <= 32'h2EB40D81;
141 |         crctbl[126] <= 32'hB7BD5C3B;
142 |         crctbl[127] <= 32'hC0BA6CAD;
143 |         crctbl[128] <= 32'hEDB88320;
144 |         crctbl[129] <= 32'h9ABFB3B6;
145 |         crctbl[130] <= 32'h03B6E20C;
146 |         crctbl[131] <= 32'h74B1D29A;
147 |         crctbl[132] <= 32'hEAD54739;
148 |         crctbl[133] <= 32'h9DD277AF;
149 |         crctbl[134] <= 32'h04DB2615;
150 |         crctbl[135] <= 32'h73DC1683;
151 |         crctbl[136] <= 32'hE3630B12;
152 |         crctbl[137] <= 32'h94643B84;
153 |         crctbl[138] <= 32'h0D6D6A3E;
154 |         crctbl[139] <= 32'h7A6A5AA8;
155 |         crctbl[140] <= 32'hE40ECF0B;
156 |         crctbl[141] <= 32'h9309FF9D;
157 |         crctbl[142] <= 32'h0A00AE27;
158 |         crctbl[143] <= 32'h7D079EB1;
159 |         crctbl[144] <= 32'hF00F9344;
160 |         crctbl[145] <= 32'h8708A3D2;
161 |         crctbl[146] <= 32'h1E01F268;
162 |         crctbl[147] <= 32'h6906C2FE;
163 |         crctbl[148] <= 32'hF762575D;
164 |         crctbl[149] <= 32'h806567CB;
165 |         crctbl[150] <= 32'h196C3671;
166 |         crctbl[151] <= 32'h6E6B06E7;
167 |         crctbl[152] <= 32'hFED41B76;
168 |         crctbl[153] <= 32'h89D32BE0;
169 |         crctbl[154] <= 32'h10DA7A5A;
170 |         crctbl[155] <= 32'h67DD4ACC;
171 |         crctbl[156] <= 32'hF9B9DF6F;
172 |         crctbl[157] <= 32'h8EBEEFF9;
173 |         crctbl[158] <= 32'h17B7BE43;
174 |         crctbl[159] <= 32'h60B08ED5;
175 |         crctbl[160] <= 32'hD6D6A3E8;
176 |         crctbl[161] <= 32'hA1D1937E;
177 |         crctbl[162] <= 32'h38D8C2C4;
178 |         crctbl[163] <= 32'h4FDFF252;
179 |         crctbl[164] <= 32'hD1BB67F1;
180 |         crctbl[165] <= 32'hA6BC5767;
181 |         crctbl[166] <= 32'h3FB506DD;
182 |         crctbl[167] <= 32'h48B2364B;
183 |         crctbl[168] <= 32'hD80D2BDA;
184 |         crctbl[169] <= 32'hAF0A1B4C;
185 |         crctbl[170] <= 32'h36034AF6;
186 |         crctbl[171] <= 32'h41047A60;
187 |         crctbl[172] <= 32'hDF60EFC3;
188 |         crctbl[173] <= 32'hA867DF55;
189 |         crctbl[174] <= 32'h316E8EEF;
190 |         crctbl[175] <= 32'h4669BE79;
191 |         crctbl[176] <= 32'hCB61B38C;
192 |         crctbl[177] <= 32'hBC66831A;
193 |         crctbl[178] <= 32'h256FD2A0;
194 |         crctbl[179] <= 32'h5268E236;
195 |         crctbl[180] <= 32'hCC0C7795;
196 |         crctbl[181] <= 32'hBB0B4703;
197 |         crctbl[182] <= 32'h220216B9;
198 |         crctbl[183] <= 32'h5505262F;
199 |         crctbl[184] <= 32'hC5BA3BBE;
200 |         crctbl[185] <= 32'hB2BD0B28;
201 |         crctbl[186] <= 32'h2BB45A92;
202 |         crctbl[187] <= 32'h5CB36A04;
203 |         crctbl[188] <= 32'hC2D7FFA7;
204 |         crctbl[189] <= 32'hB5D0CF31;
205 |         crctbl[190] <= 32'h2CD99E8B;
206 |         crctbl[191] <= 32'h5BDEAE1D;
207 |         crctbl[192] <= 32'h9B64C2B0;
208 |         crctbl[193] <= 32'hEC63F226;
209 |         crctbl[194] <= 32'h756AA39C;
210 |         crctbl[195] <= 32'h026D930A;
211 |         crctbl[196] <= 32'h9C0906A9;
212 |         crctbl[197] <= 32'hEB0E363F;
213 |         crctbl[198] <= 32'h72076785;
214 |         crctbl[199] <= 32'h05005713;
215 |         crctbl[200] <= 32'h95BF4A82;
216 |         crctbl[201] <= 32'hE2B87A14;
217 |         crctbl[202] <= 32'h7BB12BAE;
218 |         crctbl[203] <= 32'h0CB61B38;
219 |         crctbl[204] <= 32'h92D28E9B;
220 |         crctbl[205] <= 32'hE5D5BE0D;
221 |         crctbl[206] <= 32'h7CDCEFB7;
222 |         crctbl[207] <= 32'h0BDBDF21;
223 |         crctbl[208] <= 32'h86D3D2D4;
224 |         crctbl[209] <= 32'hF1D4E242;
225 |         crctbl[210] <= 32'h68DDB3F8;
226 |         crctbl[211] <= 32'h1FDA836E;
227 |         crctbl[212] <= 32'h81BE16CD;
228 |         crctbl[213] <= 32'hF6B9265B;
229 |         crctbl[214] <= 32'h6FB077E1;
230 |         crctbl[215] <= 32'h18B74777;
231 |         crctbl[216] <= 32'h88085AE6;
232 |         crctbl[217] <= 32'hFF0F6A70;
233 |         crctbl[218] <= 32'h66063BCA;
234 |         crctbl[219] <= 32'h11010B5C;
235 |         crctbl[220] <= 32'h8F659EFF;
236 |         crctbl[221] <= 32'hF862AE69;
237 |         crctbl[222] <= 32'h616BFFD3;
238 |         crctbl[223] <= 32'h166CCF45;
239 |         crctbl[224] <= 32'hA00AE278;
240 |         crctbl[225] <= 32'hD70DD2EE;
241 |         crctbl[226] <= 32'h4E048354;
242 |         crctbl[227] <= 32'h3903B3C2;
243 |         crctbl[228] <= 32'hA7672661;
244 |         crctbl[229] <= 32'hD06016F7;
245 |         crctbl[230] <= 32'h4969474D;
246 |         crctbl[231] <= 32'h3E6E77DB;
247 |         crctbl[232] <= 32'hAED16A4A;
248 |         crctbl[233] <= 32'hD9D65ADC;
249 |         crctbl[234] <= 32'h40DF0B66;
250 |         crctbl[235] <= 32'h37D83BF0;
251 |         crctbl[236] <= 32'hA9BCAE53;
252 |         crctbl[237] <= 32'hDEBB9EC5;
253 |         crctbl[238] <= 32'h47B2CF7F;
254 |         crctbl[239] <= 32'h30B5FFE9;
255 |         crctbl[240] <= 32'hBDBDF21C;
256 |         crctbl[241] <= 32'hCABAC28A;
257 |         crctbl[242] <= 32'h53B39330;
258 |         crctbl[243] <= 32'h24B4A3A6;
259 |         crctbl[244] <= 32'hBAD03605;
260 |         crctbl[245] <= 32'hCDD70693;
261 |         crctbl[246] <= 32'h54DE5729;
262 |         crctbl[247] <= 32'h23D967BF;
263 |         crctbl[248] <= 32'hB3667A2E;
264 |         crctbl[249] <= 32'hC4614AB8;
265 |         crctbl[250] <= 32'h5D681B02;
266 |         crctbl[251] <= 32'h2A6F2B94;
267 |         crctbl[252] <= 32'hB40BBE37;
268 |         crctbl[253] <= 32'hC30C8EA1;
269 |         crctbl[254] <= 32'h5A05DF1B;
270 |         crctbl[255] <= 32'h2D02EF8D;
271 |     end
272 | 
273 |   always @(posedge clk)
274 |   begin
275 |     if (rst_n == 1'b0)
276 |       crc <= 32'hFFFFFFFF;
277 |     else
278 |       crc <= (crc >> 8) ^ crctbl[(crc[7:0] ^ din)];    
279 |   end
280 | 
281 |   assign dout = crc;
282 | 
283 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/latch.v:
--------------------------------------------------------------------------------
 1 | // Generate a simple 8-bit latch
 2 | // Author: Niels A. Moseley
 3 | //         Moseley Instruments / Symbiotic EDA
 4 | //         02-11-2018
 5 | // 
 6 | 
 7 | module latch(input [7:0] din, input gate, output reg [7:0] dout);
 8 | 
 9 |   reg [7:0] state;
10 | 
11 |   always @(gate or din)
12 |   begin
13 |     if (gate == 1'b1)
14 |     begin
15 |       dout <= din;
16 |     end
17 |   end
18 | 
19 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/pwm256.v:
--------------------------------------------------------------------------------
 1 | // 256-level PWM generator
 2 | // Author: Niels A. Moseley
 3 | //         Symbiotic EDA / Moseley Instruments
 4 | //         10-11-2018
 5 | 
 6 | module pwm256(
 7 |     input clk, 
 8 |     input rst_n, 
 9 |     input [7:0] d_in,
10 |     output reg pwm_out
11 | );
12 | 
13 | reg signed [7:0] counter;
14 | 
15 | always @(posedge clk or negedge rst_n)
16 | begin
17 |     if (rst_n == 1'b0)
18 |     begin
19 |         counter <= 8'd0;
20 |         pwm_out <= 1'b0;
21 |     end
22 |     else
23 |     begin
24 |         counter <= counter + 8'd1;
25 |         if (counter >= d_in)
26 |             pwm_out <= 1'b1;
27 |         else
28 |             pwm_out <= 1'b0;
29 |     end
30 | end
31 | 
32 | endmodule
33 | 


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/pwm256_tb.v:
--------------------------------------------------------------------------------
 1 | // Testbench for 256-level PWM generator
 2 | // Author: Niels A. Moseley
 3 | //         Symbiotic EDA / Moseley Instruments
 4 | //         10-11-2018
 5 | 
 6 | module tb;
 7 | 
 8 | reg  clk   = 0;
 9 | reg  rst_n = 0;
10 | reg  [7:0] d_in  = 8'd128;
11 | wire pwm;
12 | 
13 | // clock generation
14 | always #1 clk=~clk;
15 | 
16 | // devices under test
17 | pwm256 dut(clk, rst_n, d_in, pwm);
18 | 
19 | initial
20 | begin
21 |     $dumpfile("pwm256_tb.vcd");
22 |     $dumpvars;
23 | 
24 |     #4 rst_n = 1'b1;
25 |     #516 d_in = 8'd10;
26 |     #1028 d_in = 8'd246;
27 |     #1540 $finish;
28 |     
29 | end
30 | 
31 | endmodule


--------------------------------------------------------------------------------
/verilog/benchmarks_small/various/run_testbench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | iverilog -o pwm256_tb.vvp pwm256_tb.v pwm256.v
4 | vvp pwm256_tb.vvp
5 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_large/cordic/.gitignore:
--------------------------------------------------------------------------------
1 | cordic_*_*.vhdl
2 | *.ghw
3 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_large/cordic/cordic.template:
--------------------------------------------------------------------------------
  1 | -- pipelined CORDIC algorithm to calculate sin/cos pair from a given angle (0..1)
  2 | -- Author: Niels A. Moseley
  3 | --
  4 | 
  5 | library ieee;
  6 | use ieee.std_logic_1164.all;
  7 | use ieee.numeric_std.all;
  8 | 
  9 | --  Define the cordic stage
 10 | entity cordic_stage_<w> is
 11 |     generic (shiftN : integer);
 12 |     port
 13 |     (
 14 |         clk         : in std_logic;
 15 |         rst_n       : in std_logic;
 16 |         x_in        : in signed(<w>-1 downto 0);
 17 |         y_in        : in signed(<w>-1 downto 0);
 18 |         angle_in    : in signed(<w>-1 downto 0);
 19 |         angle_adj   : in signed(<w>-1 downto 0);
 20 |         x_out       : out signed(<w>-1 downto 0);
 21 |         y_out       : out signed(<w>-1 downto 0);
 22 |         angle_out   : out signed(<w>-1 downto 0)
 23 |     );
 24 | end cordic_stage_<w>;
 25 | 
 26 | architecture rtl of cordic_stage_<w> is
 27 |     signal new_x : signed(<w>-1 downto 0);
 28 |     signal new_y : signed(<w>-1 downto 0);
 29 |     signal new_angle : signed(<w>-1 downto 0);
 30 | begin
 31 | 
 32 |     -- combination process
 33 |     proc_comb: process(x_in, y_in, angle_in, angle_adj)
 34 |         alias    sign      : std_logic is angle_in(<w>-1);
 35 |         variable shifted_x : signed(<w>-1 downto 0);
 36 |         variable shifted_y : signed(<w>-1 downto 0);
 37 |     begin
 38 | 
 39 |         shifted_x := shift_right(x_in, shiftN);
 40 |         shifted_y := shift_right(y_in, shiftN);
 41 | 
 42 |         if (sign = '1') then
 43 |             new_x <= x_in + shifted_y;
 44 |             new_y <= y_in - shifted_x;
 45 |             new_angle <= angle_in + angle_adj;
 46 |         else
 47 |             new_x <= x_in - shifted_y;
 48 |             new_y <= y_in + shifted_x;
 49 |             new_angle <= angle_in - angle_adj;
 50 |         end if;
 51 |     end process proc_comb;
 52 | 
 53 |     -- clocked process
 54 |     proc_clk: process(clk)
 55 |     begin
 56 |         if (rising_edge(clk)) then
 57 |             if (rst_n = '0') then
 58 |                 -- in reset
 59 |                 x_out <= (others => '0');
 60 |                 y_out <= (others => '0');
 61 |                 angle_out <= (others => '0');
 62 |             else
 63 |                 x_out <= new_x;
 64 |                 y_out <= new_y;
 65 |                 angle_out <= new_angle;
 66 |             end if;
 67 |         end if;
 68 |     end process proc_clk;
 69 | 
 70 | end rtl;
 71 | 
 72 | 
 73 | 
 74 | library ieee;
 75 | use ieee.std_logic_1164.all;
 76 | use ieee.numeric_std.all;
 77 | 
 78 | --  Define the main CORDIC entity
 79 | entity cordic_<s>_<w> is
 80 |     port
 81 |     (
 82 |         clk         : in std_logic;
 83 |         rst_n       : in std_logic;
 84 |         angle_in    : in signed(<w>-1 downto 0);
 85 |         sin_out     : out signed(<w>-1 downto 0);
 86 |         cos_out     : out signed(<w>-1 downto 0)
 87 |     );
 88 | end cordic_<s>_<w>;
 89 | 
 90 | architecture rtl of cordic_<s>_<w> is
 91 |     signal x_in : signed (<w>-1 downto 0);
 92 |     signal y_in : signed (<w>-1 downto 0);
 93 |     signal z_in : signed (<w>-1 downto 0);
 94 | 
 95 |     type bus_t IS ARRAY (0 to <s>-1) of signed(<w>-1 downto 0);
 96 | 
 97 |     signal xbus : bus_t;
 98 |     signal ybus : bus_t;
 99 |     signal zbus : bus_t;
100 | begin
101 |   
102 |     -- combination process
103 |     proc_comb: process(angle_in)    
104 |     begin
105 |         case angle_in(<w>-1 downto <w>-2) is
106 |         when "00" =>
107 |             x_in <= <v>;
108 |             y_in <= (others=>'0');
109 |             z_in <= angle_in;
110 |         when "11" =>
111 |             x_in <= <v>;
112 |             y_in <= (others=>'0');
113 |             z_in <= angle_in;
114 |         when "01" =>
115 |             x_in <= (others=>'0');
116 |             y_in <= <v>;
117 |             z_in <= "00" & angle_in(<w>-3 downto 0);
118 |         when "10" =>
119 |             x_in <= (others=>'0');
120 |             y_in <= -<v>;
121 |             z_in <= "11" & angle_in(<w>-3 downto 0);
122 |         when others =>
123 |             x_in <= (others=>'0');
124 |             y_in <= (others=>'0');
125 |             z_in <= (others=>'0');
126 |         end case;        
127 |     end process proc_comb;
128 | 
129 |     -- generate instances of cordic_stage here..
130 |     <g>
131 | 
132 |     -- permanently assign outputs
133 |     cos_out <= xbus(<s>-1);
134 |     sin_out <= ybus(<s>-1);
135 | 
136 | end rtl;


--------------------------------------------------------------------------------
/vhdl/benchmarks_large/cordic/cordic_tb.vhdl:
--------------------------------------------------------------------------------
 1 | -- testbench for cordic_10_16.vhdl
 2 | -- Author: Niels A. Moseley
 3 | 
 4 | library ieee;
 5 | use ieee.std_logic_1164.all;
 6 | use ieee.numeric_std.all;
 7 | 
 8 | entity cordic_tb is
 9 | end cordic_tb;
10 | 
11 | architecture tb of cordic_tb is
12 |     signal clk      : std_logic := '0';
13 |     signal rst_n    : std_logic := '0';
14 |     signal angle_in : signed(15 downto 0) := (others => '0');
15 |     signal sin_out  : signed(15 downto 0) := (others => '0');
16 |     signal cos_out  : signed(15 downto 0) := (others => '0');
17 | 
18 |     signal run_sim  : std_logic := '1';
19 | begin
20 | 
21 |     -- device under test
22 |     dut: entity work.cordic_10_16
23 |         port map(clk, rst_n, angle_in, cos_out, sin_out);
24 | 
25 |     proc_clk: process
26 |     begin
27 |         if (run_sim = '1') then
28 |             wait for 1 ns;
29 |             clk <= not clk;
30 |         else
31 |             wait;
32 |         end if;
33 |     end process proc_clk;        
34 | 
35 |     proc_stim: process
36 |     begin
37 |         wait for 10 ns;
38 |         rst_n <= '1';
39 | 
40 |         wait for 2000 ns;
41 | 
42 |         run_sim <= '0';
43 | 
44 |         wait;
45 |     end process proc_stim;
46 | 
47 |     proc_angle: process(clk)
48 |     begin
49 |         if (rising_edge(clk) and (rst_n = '1')) then
50 |             angle_in <= angle_in + to_signed(123,16);
51 |         end if;
52 |     end process proc_angle;
53 | 
54 | end tb;


--------------------------------------------------------------------------------
/vhdl/benchmarks_large/cordic/generate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | ## Generate a pipelined CORDIC with a certain number of iteration stages
 4 | ## The script must also generate the angle table
 5 | ##
 6 | ## <w> = bit width of cordic stage
 7 | ## <s> = number of stages
 8 | ## <v> = cordic vector start magnitude, approx 0.6199505
 9 | ## <g> = generated calls to cordic_stage
10 | ##
11 | 
12 | 
13 | import math
14 | 
15 | def gen_cordic(stages, bits, template):
16 |     ## calculate the CORDIC gain so we can compensate this
17 |     ## by reducing the input vector length to avoid overflow.
18 |     ##
19 |     ## the Nth stage has a gain of sqrt(1.0 + 2^-2N) when counting
20 |     ## stages from 0.
21 |     ##
22 |     ## Total gain for 4 stages : 1.64248406575
23 |     ##                5 stages : 1.64568891576
24 |     ##                6 stages : 1.64649227871
25 |     ## 
26 | 
27 |     amp = 1.0
28 |     for I in range(0,stages):
29 |         amp = amp * math.sqrt(1.0 + math.pow(2.0,-2*I))
30 | 
31 |     startval = int( math.floor((2**(bits-1)-1) / amp) )
32 | 
33 |     template = template.replace("<v>", "to_signed(" + str(startval) + ",<w>)")
34 |     template = template.replace("<w>", str(bits)).replace("<s>", str(stages))
35 |     
36 |     ## generate calls to cordic_stage    
37 |     
38 |     gen = "\n"
39 |     gen = gen + " "*4 + "stage_0: entity work.cordic_stage_<w>(rtl)\n"
40 |     gen = gen + " "*4 + "    generic map (shiftN => 0)\n"
41 |     gen = gen + " "*4 + "    port map (clk, rst_n, x_in, y_in, z_in, <a>, xbus(0), ybus(0), zbus(0));\n\n"
42 |     #gen = gen + "        clk    => clk,\n"
43 |     #gen = gen + "        rst_n  => rst_n,\n"
44 |     #gen = gen + "        x_in   => x_in,\n"
45 |     #gen = gen + "        y_in   => y_in,\n"
46 |     #gen = gen + "        y_in   => y_in,\n"
47 | 
48 |     #gen = "    cordic_stage_<w> #(0) stage0(clk, rst_n, x_in, y_in, z_in, <a>, xbus[0], ybus[0], zbus[0]);\n"
49 |     tanval = int( round((2**(bits)) * 0.125,0) )
50 |     gen = gen.replace("<a>", "to_signed(" + str(tanval) + ",<w>)")
51 |     gen = gen.replace("<w>", str(bits))
52 | 
53 |     s = ""
54 |     s = s + " "*4 + "stage_<j>: entity work.cordic_stage_<w>(rtl)\n"
55 |     s = s + " "*4 + "    generic map (shiftN => <j>)\n"
56 |     s = s + " "*4 + "    port map (clk, rst_n, xbus(<i>), ybus(<i>), zbus(<i>), <a>, xbus(<j>), ybus(<j>), zbus(<j>));\n"
57 | 
58 |     for I in range(1,stages):
59 |         tanval = int( round((2**(bits)) * math.atan(math.pow(2.0, -I))/(2.0*3.14159265359),0) )
60 |         gen_s = s.replace("<i>", str(I-1)).replace("<a>", "to_signed(" + str(tanval) + ",<w>)") + "\n"
61 |         gen_s = gen_s.replace("<j>",str(I))
62 |         gen_s = gen_s.replace("<w>",str(bits))
63 |         gen = gen + gen_s
64 |         
65 |     template = template.replace("<g>", gen)
66 | 
67 |     with open("cordic_%d_%d.vhdl" % (stages, bits), "w") as f:
68 |         print(template, file=f)
69 | 
70 | with open('cordic.template','rt') as templatefile:
71 |     template = templatefile.read()
72 |     
73 |     for stages in [4,5,6,7,8,9,10]:
74 |         for bits in [8,12,16]:
75 |             gen_cordic(stages, bits, ''.join(template))
76 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_large/cordic/run_cordic_tb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | ghdl -a cordic_10_16.vhdl
4 | ghdl -a cordic_tb.vhdl
5 | ghdl -e cordic_tb
6 | ghdl -r cordic_tb --wave=cordic_tb.ghw
7 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/cic/.gitignore:
--------------------------------------------------------------------------------
1 | *.cf
2 | *.ghw
3 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/cic/README.md:
--------------------------------------------------------------------------------
 1 | # CIC5 - Cascaded Integrator-Comb DSP structure
 2 | 
 3 | This module decimates the incoming data stream by a factor of 5,
 4 | using a cascaded integrators and comb filters. It is a well-known
 5 | efficient DSP structure primarily found in high-speed A/D conversion
 6 | applications, such as Software Defined Radios (SDR).
 7 | 
 8 | The data widths are: 16-bit signed input, 28 bit signed output.
 9 | 
10 | Reference: https://en.wikipedia.org/wiki/Cascaded_integrator%E2%80%93comb_filter
11 | 
12 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/cic/cic5.m.vhdl:
--------------------------------------------------------------------------------
 1 | -- cic5 - a 5th order CIC decimation filter
 2 | --        with 5x decimation factor
 3 | --
 4 | -- Author: Niels Moseley
 5 | --         Symbiotic EDA / Moseley Instruments
 6 | -- 12-11-2018
 7 | --
 8 | 
 9 | library ieee;
10 | use ieee.std_logic_1164.all;
11 | use ieee.numeric_std.all;
12 | 
13 | entity cic5 is
14 |     port
15 |     (
16 |         clk         : in std_logic;
17 |         rst_n       : in std_logic;
18 |         d_in        : in signed(15 downto 0);
19 |         d_out       : out signed(27 downto 0);
20 |         d_out_valid : out std_logic     -- high for one clock cycle
21 |     );
22 | end cic5;
23 | 
24 | architecture rtl of cic5 is
25 |     signal decimation_cnt : unsigned(2 downto 0);
26 | 
27 |     type state5_t is array(1 to 5) of signed(27 downto 0);
28 | 
29 |     signal int_s   : state5_t;    -- integrator states
30 |     signal comb_s  : state5_t;
31 | begin
32 | 
33 |     proc_clk: process(clk)
34 |         variable tmp : state5_t;
35 |     begin
36 |         if (rising_edge(clk)) then
37 |             if (rst_n = '0') then
38 |                 -- reset all integrator states
39 |                 for I in 1 to 5 loop
40 |                     int_s(I)  <= (others => '0');
41 |                     comb_s(I) <= (others => '0');
42 |                 end loop;
43 | 
44 |                 decimation_cnt <= (others => '0');
45 |                 d_out          <= (others => '0');
46 |                 d_out_valid    <= '0';
47 |             else
48 |                 -- default updates when clocked
49 |                 decimation_cnt <= decimation_cnt + 1;
50 |                 d_out_valid    <= '0';
51 | 
52 |                 -- calculate new integrator states
53 |                 int_s(1) <= int_s(1) + resize(d_in, int_s(1)'length);
54 |                 for I in 2 to 5 loop
55 |                     int_s(I) <= int_s(I) + int_s(I-1);
56 |                 end loop;
57 | 
58 |                 -- check if we can output new data at the
59 |                 -- reduced rate
60 |                 if (decimation_cnt = to_unsigned(4,decimation_cnt'length)) then
61 |                     decimation_cnt <= to_unsigned(0, decimation_cnt'length);
62 |                     
63 |                     -- calculate the CIC comb filters at the lower rate
64 |                     -- and update their filter states
65 |                     tmp(1)    := int_s(5) - comb_s(1);  -- calculate comb #1 output
66 |                     comb_s(1) <= int_s(5);              -- update comb #1 filter state
67 |                     for I in 2 to 5 loop
68 |                         tmp(I)    := tmp(I-1) - comb_s(I);
69 |                         comb_s(I) <= tmp(I-1);
70 |                     end loop;
71 | 
72 |                     -- output a signal!
73 |                     d_out <= tmp(5);
74 |                     d_out_valid <= '1';
75 |                 end if;
76 |             end if;
77 |         end if;
78 |     end process proc_clk;
79 | 
80 | end rtl;
81 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/cic/cic5_tb.m.vhdl:
--------------------------------------------------------------------------------
 1 | -- Testbench for cic5 - a 5th order CIC filter decimating 5x.
 2 | -- Author: Niels Moseley
 3 | --         Symbiotic EDA / Moseley Instruments
 4 | -- 12-11-2018
 5 | --
 6 | 
 7 | library ieee;
 8 | use ieee.std_logic_1164.all;
 9 | use ieee.numeric_std.all;
10 | use work.all;
11 | 
12 | entity cic5_tb is
13 | end cic5_tb;
14 | 
15 | architecture tb of cic5_tb is
16 |     signal clk          : std_logic := '0';
17 |     signal rst_n        : std_logic := '1';
18 |     signal d_in         : signed(15 downto 0) := X"0000";
19 |     signal d_out        : signed(27 downto 0) := X"0000000";
20 |     signal d_out_valid  : std_logic := '0';
21 | 
22 |     signal do_sim : std_logic := '1';
23 | begin
24 | 
25 |     u_dut: entity work.cic5
26 |         port map
27 |         (
28 |             clk    => clk,
29 |             rst_n  => rst_n,
30 |             d_in   => d_in,
31 |             d_out  => d_out,
32 |             d_out_valid => d_out_valid
33 |         );
34 | 
35 |     proc_sim: process
36 |     begin
37 |         d_in <= X"7FFF";
38 |         rst_n <= '0';
39 |         wait for 4 ns;
40 |         rst_n <= '1';
41 |         wait for 2*5*6 ns;
42 |         -- after 5*6 clocks and 7FFF as input, the
43 |         -- CIC filter's output must be stable.
44 |         -- given that the gain is 3125x, the output
45 |         -- should be 102396875.
46 |         assert (d_out = to_signed(102396875, d_out'length)) report "CIC5 filter output not correct" severity error;
47 |         do_sim <= '0';
48 |         wait;
49 |     end process proc_sim;
50 | 
51 |     proc_clk: process
52 |     begin
53 |         if (do_sim = '1') then
54 |             clk <= not clk;
55 |             wait for 1 ns;
56 |         else
57 |             wait;
58 |         end if;
59 |     end process proc_clk;
60 | 
61 | end tb;


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/cic/run_testbench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | ghdl -a cic5.m.vhdl
4 | ghdl -a cic5_tb.m.vhdl
5 | ghdl -e cic5_tb
6 | ghdl -r cic5_tb --wave=cic5_tb.ghw


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/various/.gitignore:
--------------------------------------------------------------------------------
1 | *.cf
2 | *.ghw
3 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/various/pwm256.m.vhdl:
--------------------------------------------------------------------------------
 1 | -- PWM256 - a 256 level PWM generator
 2 | -- Author: Niels Moseley
 3 | --         Symbiotic EDA / Moseley Instruments
 4 | -- 10-11-2018
 5 | --
 6 | 
 7 | library ieee;
 8 | use ieee.std_logic_1164.all;
 9 | use ieee.numeric_std.all;
10 | 
11 | entity pwm256 is
12 |     port
13 |     (
14 |         clk     : in std_logic;
15 |         rst_n   : in  std_logic;
16 |         d_in    : in unsigned(7 downto 0);
17 |         pwm_out : out std_logic
18 |     );
19 | end pwm256;
20 | 
21 | architecture rtl of pwm256 is
22 |     signal counter : unsigned(7 downto 0);
23 | begin
24 | 
25 |     proc_clk: process(clk)
26 |     begin
27 |         if (rising_edge(clk)) then
28 |             if (rst_n = '0') then
29 |                 counter <= (others => '0');
30 |             else
31 |                 counter <= counter + 1;
32 |             end if;
33 |         end if;
34 | 
35 |         if (counter <= d_in) then
36 |             pwm_out <= '1';
37 |         else
38 |             pwm_out <= '0';
39 |         end if;
40 |     end process proc_clk;
41 | 
42 | end rtl;
43 | 


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/various/pwm256_tb.m.vhdl:
--------------------------------------------------------------------------------
 1 | -- Testbench for PWM256 - a 256 level PWM generator
 2 | -- Author: Niels Moseley
 3 | --         Symbiotic EDA / Moseley Instruments
 4 | -- 10-11-2018
 5 | --
 6 | 
 7 | library ieee;
 8 | use ieee.std_logic_1164.all;
 9 | use ieee.numeric_std.all;
10 | use work.all;
11 | 
12 | entity pwm256_tb is
13 | end pwm256_tb;
14 | 
15 | architecture tb of pwm256_tb is
16 |     signal clk  : std_logic := '0';
17 |     signal rst_n: std_logic := '1';
18 |     signal d_in : unsigned(7 downto 0) := "00000000";
19 |     signal pwm  : std_logic;
20 | 
21 |     signal do_sim : std_logic := '1';
22 | begin
23 | 
24 |     u_dut: entity work.pwm256
25 |         port map
26 |         (
27 |             clk    => clk,
28 |             rst_n  => rst_n,
29 |             d_in   => d_in,
30 |             pwm_out=> pwm
31 |         );
32 | 
33 |     proc_sim: process
34 |     begin
35 |         d_in <= to_unsigned(128,8);
36 |         rst_n <= '0';
37 |         wait for 4 ns;
38 |         rst_n <= '1';
39 |         wait for 2*256 ns;
40 |         d_in <= to_unsigned(10,8);
41 |         wait for 2*256 ns;
42 |         d_in <= to_unsigned(246,8);
43 |         wait for 2*256 ns;
44 |         do_sim <= '0';
45 |         wait;
46 |     end process proc_sim;
47 | 
48 | 
49 | 
50 |     proc_clk: process
51 |     begin
52 |         if (do_sim = '1') then
53 |             clk <= not clk;
54 |             wait for 1 ns;
55 |         else
56 |             wait;
57 |         end if;
58 |     end process proc_clk;
59 | 
60 | end tb;


--------------------------------------------------------------------------------
/vhdl/benchmarks_small/various/run_testbench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | ghdl -a pwm256.m.vhdl
4 | ghdl -a pwm256_tb.m.vhdl
5 | ghdl -e pwm256_tb
6 | ghdl -r pwm256_tb --wave=pwm256_tb.ghw
7 | 


--------------------------------------------------------------------------------