├── .cirrus.yml ├── .cirrus ├── Dockerfile.ubuntu16.04 └── upload.sh ├── .gitignore ├── LICENSE.txt ├── README.md ├── celllibs ├── README ├── simple │ ├── README │ └── simple.lib └── supergate │ ├── README │ ├── supergate.lib │ └── supergate.v ├── scripts ├── database_html.sh ├── database_make.py ├── yosys-ice40-flopcount.sh ├── yosys-ice40-lutcount.sh ├── yosys-sanity.sh ├── yosys-simplelib.sh └── yosys-supergatelib.sh ├── verilog ├── benchmarks_large │ ├── .gitignore │ ├── boom │ │ ├── MediumBoom.v.gz │ │ ├── MediumOctoBoom.v.gz │ │ ├── MegaOctoBoom.v.gz │ │ ├── README.md │ │ ├── SmallBoom.v.gz │ │ └── SmallQuadBoom.v.gz │ ├── cam │ │ ├── README.md │ │ ├── cam_bram_top.v │ │ ├── cam_srl_top.v │ │ └── generate.py │ ├── cordic │ │ ├── .gitignore │ │ ├── README.md │ │ ├── cordic.template │ │ ├── generate.py │ │ └── run_cordic_tb.sh │ ├── dspfilters │ │ ├── .gitignore │ │ ├── README.md │ │ ├── fastfir_dynamictaps.v │ │ ├── fastfir_fixedtaps.v │ │ ├── generate.py │ │ ├── slowfil_fixedtaps.v │ │ ├── slowfil_srl.vh │ │ ├── slowfil_srl_fixedtaps.v │ │ └── taps.hex │ ├── ethernet │ │ ├── .gitignore │ │ ├── README.md │ │ ├── generate.py │ │ └── udp_complete_64_top.v │ ├── marlann │ │ ├── README.md │ │ └── marlann_compute.v │ ├── mux │ │ ├── .gitignore │ │ ├── README.md │ │ ├── common.py │ │ └── generate.py │ ├── opensparc │ │ ├── README.md │ │ └── t2.v.gz │ ├── picosoc │ │ ├── .gitignore │ │ ├── README.md │ │ ├── generate.py │ │ ├── picorv32.vh │ │ ├── picorv32_large.v │ │ ├── picorv32_regular.v │ │ ├── picorv32_small.v │ │ ├── picosoc.vh │ │ ├── picosoc_top.v │ │ ├── simpleuart.vh │ │ ├── spimemio.vh │ │ └── synth_area_top.vh │ ├── riscv-bitmanip │ │ ├── README.md │ │ └── generate.py │ ├── sddac │ │ ├── README.md │ │ ├── architecture.png │ │ ├── config.json │ │ ├── genspectrumplot.py │ │ ├── run_sddac_tb.sh │ │ ├── sddac.v │ │ └── sddac_tb.v │ ├── vexriscv │ │ ├── README.md │ │ └── vexriscv.demo.GenFull.v │ └── wb2axip │ │ ├── README.md │ │ └── generate.py └── benchmarks_small │ ├── addertree │ ├── .gitignore │ ├── README.md │ └── generate.py │ ├── arith_ops │ ├── .gitignore │ ├── README.md │ └── generate.py │ ├── cic │ ├── .gitginore │ ├── README.md │ ├── cic5.v │ ├── cic5_tb.v │ └── run_testbench.sh │ ├── decoder │ ├── .gitignore │ └── generate.py │ ├── dspmac │ ├── .gitignore │ ├── README.md │ ├── dspmac.template │ ├── dspmac_16_40_tb.v │ ├── generate.py │ └── run_testbench.sh │ ├── lfsr │ ├── .gitignore │ ├── README.md │ └── generate.py │ ├── macc │ ├── .gitignore │ ├── common.py │ └── generate.py │ ├── mul │ ├── .gitignore │ ├── README.md │ ├── common.py │ └── generate.py │ ├── muladd │ ├── .gitignore │ ├── common.py │ └── generate.py │ ├── mux │ ├── .gitignore │ ├── README.md │ ├── common.py │ └── generate.py │ ├── onehot │ ├── .gitignore │ ├── README.md │ └── generate.py │ ├── popcount │ ├── .gitignore │ ├── README.md │ └── generate.py │ ├── priodecode │ ├── .gitignore │ ├── README.md │ └── generate.py │ ├── ram │ ├── .gitignore │ ├── dualport_syncram.template │ ├── generate.py │ ├── syncram.template │ └── syncram_tw.template │ └── various │ ├── .gitignore │ ├── README.md │ ├── crc32.v │ ├── latch.v │ ├── pwm256.v │ ├── pwm256_tb.v │ └── run_testbench.sh └── vhdl ├── benchmarks_large └── cordic │ ├── .gitignore │ ├── cordic.template │ ├── cordic_tb.vhdl │ ├── generate.py │ └── run_cordic_tb.sh └── benchmarks_small ├── cic ├── .gitignore ├── README.md ├── cic5.m.vhdl ├── cic5_tb.m.vhdl └── run_testbench.sh └── various ├── .gitignore ├── pwm256.m.vhdl ├── pwm256_tb.m.vhdl └── run_testbench.sh /.cirrus.yml: -------------------------------------------------------------------------------- 1 | task: 2 | name: sanity-test-ubuntu1604 3 | container: 4 | cpu: 1 5 | memory: 16 6 | dockerfile: .cirrus/Dockerfile.ubuntu16.04 7 | env: 8 | GITHUB_TOKEN: ENCRYPTED[c86a89228a6bd63329c6e28f2228d48e79a47a9fa0481d3e15f0f30b0e74fa26b5748afd1e44523e90450d53911e67f4] 9 | permission_script: mkdir reports && chmod +x ./scripts/database_make.py && chmod +x ./scripts/database_html.sh && chmod +x .cirrus/upload.sh 10 | sanity_script: ./scripts/database_make.py yosys-sanity ./verilog/ && ./scripts/database_html.sh && cp ./database/index.html ./reports/benchmarks_sanity.html 11 | push_script: .cirrus/upload.sh 12 | 13 | task: 14 | name: small-test-ubuntu1604 15 | container: 16 | cpu: 1 17 | memory: 16 18 | dockerfile: .cirrus/Dockerfile.ubuntu16.04 19 | env: 20 | GITHUB_TOKEN: ENCRYPTED[c86a89228a6bd63329c6e28f2228d48e79a47a9fa0481d3e15f0f30b0e74fa26b5748afd1e44523e90450d53911e67f4] 21 | permission_script: mkdir reports && chmod +x ./scripts/database_make.py && chmod +x ./scripts/database_html.sh && chmod +x .cirrus/upload.sh 22 | small_script: ./scripts/database_make.py yosys-ice40-lutcount ./verilog/benchmarks_small/ && ./scripts/database_html.sh && cp ./database/index.html ./reports/benchmarks_small.html 23 | push_script: .cirrus/upload.sh 24 | 25 | task: 26 | name: large-test-ubuntu1604 27 | container: 28 | cpu: 1 29 | memory: 16 30 | dockerfile: .cirrus/Dockerfile.ubuntu16.04 31 | env: 32 | GITHUB_TOKEN: ENCRYPTED[c86a89228a6bd63329c6e28f2228d48e79a47a9fa0481d3e15f0f30b0e74fa26b5748afd1e44523e90450d53911e67f4] 33 | EXTRA_FLAGS: -noflatten 34 | permission_script: mkdir reports && chmod +x ./scripts/database_make.py && chmod +x ./scripts/database_html.sh && chmod +x .cirrus/upload.sh 35 | large_script: ./scripts/database_make.py yosys-ice40-lutcount ./verilog/benchmarks_large/ && ./scripts/database_html.sh && cp ./database/index.html ./reports/benchmarks_large.html 36 | push_script: .cirrus/upload.sh 37 | 38 | -------------------------------------------------------------------------------- /.cirrus/Dockerfile.ubuntu16.04: -------------------------------------------------------------------------------- 1 | FROM ubuntu:xenial-20181113 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | #RUN set -e -x ;\ 6 | # apt-get -y update ;\ 7 | # apt-get -y upgrade ;\ 8 | # apt-get -y install \ 9 | # build-essential autoconf cmake clang bison wget flex gperf \ 10 | # libreadline-dev gawk tcl-dev libffi-dev graphviz xdot python3-dev \ 11 | # libboost-all-dev qt5-default git libftdi-dev pkg-config 12 | 13 | RUN set -e -x ;\ 14 | apt-get -y update ;\ 15 | apt-get -y upgrade ;\ 16 | apt-get -y install \ 17 | build-essential autoconf cmake clang bison wget flex gperf \ 18 | libreadline-dev gawk tcl-dev libffi-dev graphviz xdot python3-dev \ 19 | libboost-all-dev git libftdi-dev pkg-config 20 | 21 | # get most recent release version of Yosys 22 | RUN set -e -x ;\ 23 | mkdir -p /usr/local/src ;\ 24 | cd /usr/local/src ;\ 25 | git clone --recursive https://github.com/YosysHQ/yosys.git ;\ 26 | cd yosys ;\ 27 | latestTag=$(git describe --tags `git rev-list --tags --max-count=1`) ;\ 28 | git checkout $latestTag ;\ 29 | make -j $(nproc) ;\ 30 | make install ;\ 31 | rm -rf /usr/local/src/yosys 32 | 33 | -------------------------------------------------------------------------------- /.cirrus/upload.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ "$CIRRUS_RELEASE" == "" ]]; then 4 | echo "Not a release. No need to deploy!" 5 | exit 0 6 | fi 7 | 8 | if [[ "$GITHUB_TOKEN" == "" ]]; then 9 | echo "Please provide GitHub access token via GITHUB_TOKEN environment variable!" 10 | exit 1 11 | fi 12 | 13 | file_content_type="application/octet-stream" 14 | files_to_upload=( 15 | ./reports/benchmarks_small.html 16 | ./reports/benchmarks_large.html 17 | ) 18 | 19 | for fpath in $files_to_upload 20 | do 21 | echo "Uploading $fpath..." 22 | name=$(basename "$fpath") 23 | url_to_upload="https://uploads.github.com/repos/$CIRRUS_REPO_FULL_NAME/releases/$CIRRUS_RELEASE/assets?name=$name" 24 | curl -X POST \ 25 | --data-binary @$fpath \ 26 | --header "Authorization: token $GITHUB_TOKEN" \ 27 | --header "Content-Type: $file_content_type" \ 28 | $url_to_upload 29 | done 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /database 2 | __pycache__ 3 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2018, Symbiotic EDA GmbH. 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yosys-bench 2 | 3 | This is a collection of Verilog designs of different type and size, used as benchmarks in Yosys development. 4 | 5 | Create a PR if you think you have an interesting benchmark. 6 | 7 | ### benchmarks_small 8 | 9 | This directory contains small (mostly synthetic) benchmarks that can be used 10 | to analyse and compare the performance of the tools in specific situations. 11 | 12 | 13 | ### benchmarks_large 14 | 15 | This directory contains larger "real-world" designs. They can be used for 16 | estimating the overall performance of the tools. 17 | 18 | # Running the benchmarks 19 | 20 | Benchmarks are processed by the ```./scripts/database_make.py``` Python3 script. The script performs the following steps: 21 | 22 | * It traverses the given directories and executes the `generate.py` Python script, if there is one. These scripts generate Verilog or VHDL files for some testbenches. 23 | * It checks for a `config.json` file. If there is one, it loads the configuration and reads which HDL files it should use for the testbench. 24 | * If there wasn't a `config.json` file, it simply uses all the `.v` and `.vhdl` files it can find for the testbench. 25 | 26 | example: 27 | ```./scripts/database_make.py yosys-ice40-lutcount ``` 28 | 29 | Each benchmark produces an entry in the `./database` directory. Running `./scripts/database_html.sh` will generate a .html file with the results in the `./database` directory. 30 | 31 | # Adding benchmarks 32 | To add a benchmark, simply create a directory in the `benchmarks_small` or `benchmarks_large` directory, optionally supply a `generate.py` and/or `config.json` and add your HDL files. 33 | 34 | Please also add a `README.md` file to your benchmark so others know what it is you are benchmarking. 35 | 36 | # The `config.json` file 37 | The `config.json` file lists the HDL files that you want to benchmark. Each file will be benchmarked separately. 38 | 39 | Example: 40 | 41 | ``` 42 | { 43 | "files": 44 | [ 45 | "sddac.v", "sddac2.v" 46 | ] 47 | } 48 | ``` 49 | -------------------------------------------------------------------------------- /celllibs/README: -------------------------------------------------------------------------------- 1 | Put your proprietary cell libs in subdirs here... -------------------------------------------------------------------------------- /celllibs/simple/README: -------------------------------------------------------------------------------- 1 | ======================================== 2 | Simple Cell Lib version 1.0 3 | ======================================== 4 | 5 | This is a minimal cell library for benchmarking purposes. 6 | It contains process-agnostic cells with just one drive strength. 7 | Cell sizes are given in units of the inverter size. 8 | This library is solely meant to be uses for benchmarking and is unfit for any purpose. 9 | 10 | The following cells are present in the library: 11 | 12 | * inverter. 13 | * tri-state inverter. 14 | * 2-input nand. 15 | * 2-input nor. 16 | * 2-input xor. 17 | * 2-input inverting mux. 18 | * D-type flip-flop with reset and preset. 19 | -------------------------------------------------------------------------------- /celllibs/simple/simple.lib: -------------------------------------------------------------------------------- 1 | /********************************************/ 2 | /* */ 3 | /* Simple cell library for Bench marking */ 4 | /* */ 5 | /* Symbiotic EDA GmbH / Moseley Instruments */ 6 | /* Niels A. Moseley */ 7 | /* */ 8 | /* Process: none */ 9 | /* */ 10 | /* Date : 12-9-2018 */ 11 | /* Version: 1.1 */ 12 | /* */ 13 | /* Changelog: */ 14 | /* 1.0 NAM Initial version */ 15 | /* 1.1 NAM Added latch */ 16 | /* */ 17 | /********************************************/ 18 | 19 | library(simple) 20 | { 21 | technology (cmos); 22 | revision : 1.0; 23 | 24 | time_unit : "1ps"; 25 | pulling_resistance_unit : "1kohm"; 26 | voltage_unit : "1V"; 27 | current_unit : "1uA"; 28 | 29 | capacitive_load_unit(1,ff); 30 | 31 | default_inout_pin_cap : 7.0; 32 | default_input_pin_cap : 7.0; 33 | default_output_pin_cap : 0.0; 34 | default_fanout_load : 1.0; 35 | 36 | default_wire_load_capacitance : 0.1; 37 | default_wire_load_resistance : 1.0e-3; 38 | default_wire_load_area : 0.0; 39 | 40 | nom_process : 1.0; 41 | nom_temperature : 25.0; 42 | nom_voltage : 1.2; 43 | 44 | delay_model : generic_cmos; 45 | 46 | /* Inverter */ 47 | cell (inv) 48 | { 49 | area : 1; 50 | pin(A) 51 | { 52 | direction : input; 53 | } 54 | pin(Y) 55 | { 56 | direction : output; 57 | function : "A'"; 58 | } 59 | } 60 | 61 | /* tri-state inverter */ 62 | cell (tri_inv) 63 | { 64 | area : 4; 65 | pin(A) 66 | { 67 | direction : input; 68 | } 69 | pin(S) 70 | { 71 | direction : input; 72 | } 73 | pin(Z) 74 | { 75 | direction : output; 76 | function : "A'"; 77 | three_State : "S'"; 78 | } 79 | } 80 | 81 | cell (buf) 82 | { 83 | area : 5; 84 | pin(A) 85 | { 86 | direction : input; 87 | } 88 | pin(Y) 89 | { 90 | direction : output; 91 | function : "A"; 92 | } 93 | } 94 | 95 | /* 2-input NAND gate */ 96 | cell (nand2) 97 | { 98 | area : 3; 99 | pin(A) 100 | { 101 | direction : input; 102 | } 103 | pin(B) 104 | { 105 | direction : input; 106 | } 107 | pin(Y) 108 | { 109 | direction: output; 110 | function : "(A * B)'"; 111 | } 112 | } 113 | 114 | /* 2-input NOR gate */ 115 | cell (nor2) 116 | { 117 | area : 3; 118 | pin(A) 119 | { 120 | direction : input; 121 | } 122 | pin(B) 123 | { 124 | direction : input; 125 | } 126 | pin(Y) 127 | { 128 | direction: output; 129 | function : "(A + B)'"; 130 | } 131 | } 132 | 133 | /* 2-input XOR */ 134 | cell (xor2) 135 | { 136 | area : 6; 137 | pin(A) 138 | { 139 | direction : input; 140 | } 141 | pin(B) 142 | { 143 | direction : input; 144 | } 145 | pin(Y) 146 | { 147 | direction: output; 148 | function : "(A *B') + (A' * B)"; 149 | } 150 | } 151 | 152 | /* 2-input inverting MUX */ 153 | cell (imux2) 154 | { 155 | area : 5; 156 | pin(A) 157 | { 158 | direction : input; 159 | } 160 | pin(B) 161 | { 162 | direction : input; 163 | } 164 | pin(S) 165 | { 166 | direction : input; 167 | } 168 | pin(Y) 169 | { 170 | direction: output; 171 | function : "( (A * S) + (B * S') )'"; 172 | } 173 | } 174 | 175 | /* D-type flip-flop with asynchronous reset and preset */ 176 | cell (dff) 177 | { 178 | area : 83; 179 | ff("IQ", "IQN") 180 | { 181 | next_state : "D"; 182 | clocked_on : "CLK"; 183 | clear : "RESET"; 184 | preset : "PRESET"; 185 | clear_preset_var1 : L; 186 | clear_preset_var2 : L; 187 | } 188 | pin(D) 189 | { 190 | direction : input; 191 | } 192 | pin(CLK) 193 | { 194 | direction : input; 195 | } 196 | pin(RESET) 197 | { 198 | direction : input; 199 | } 200 | pin(PRESET) 201 | { 202 | direction : input; 203 | } 204 | pin(Q) 205 | { 206 | direction: output; 207 | function : "IQ"; 208 | timing() { 209 | timing_type : rising_edge; 210 | intrinsic_rise : 65; 211 | intrinsic_fall : 65; 212 | rise_resistance : 0; 213 | fall_resistance : 0; 214 | related_pin : "CLK"; 215 | } 216 | timing () { 217 | timing_type : clear; 218 | timing_sense : positive_unate; 219 | intrinsic_fall : 75; 220 | related_pin : "RESET"; 221 | } 222 | timing () { 223 | timing_type : preset; 224 | timing_sense : negative_unate; 225 | intrinsic_rise : 75; 226 | related_pin : "PRESET"; 227 | } 228 | } 229 | pin(QN) 230 | { 231 | direction: output; 232 | function : "IQN"; 233 | timing() { 234 | timing_type : rising_edge; 235 | intrinsic_rise : 65; 236 | intrinsic_fall : 65; 237 | rise_resistance : 0; 238 | fall_resistance : 0; 239 | related_pin : "CLK"; 240 | } 241 | timing () { 242 | timing_type : preset; 243 | timing_sense : negative_unate; 244 | intrinsic_rise : 75; 245 | related_pin : "RESET"; 246 | } 247 | timing () { 248 | timing_type : clear; 249 | timing_sense : positive_unate; 250 | intrinsic_fall : 75; 251 | related_pin : "PRESET"; 252 | } 253 | } 254 | } 255 | 256 | /* Latch with asynchronous reset and preset */ 257 | cell(latch) 258 | { 259 | area : 5; 260 | latch ("IQ","IQN") 261 | { 262 | enable : "G"; 263 | data_in : "D"; 264 | } 265 | 266 | pin(D) 267 | { 268 | direction : input; 269 | } 270 | pin(G) 271 | { 272 | direction : input; 273 | } 274 | 275 | pin(Q) 276 | { 277 | direction : output; 278 | function : "IQ"; 279 | internal_node : "Q"; 280 | 281 | timing() 282 | { 283 | timing_type : rising_edge; 284 | intrinsic_rise : 65; 285 | intrinsic_fall : 65; 286 | rise_resistance : 0; 287 | fall_resistance : 0; 288 | related_pin : "G"; 289 | } 290 | 291 | timing() 292 | { 293 | timing_sense : positive_unate; 294 | intrinsic_rise : 65; 295 | intrinsic_fall : 65; 296 | rise_resistance : 0; 297 | fall_resistance : 0; 298 | related_pin : "D"; 299 | } 300 | } 301 | 302 | pin(QN) 303 | { 304 | direction : output; 305 | function : "IQN"; 306 | internal_node : "QN"; 307 | 308 | timing() 309 | { 310 | timing_type : rising_edge; 311 | intrinsic_rise : 65; 312 | intrinsic_fall : 65; 313 | rise_resistance : 0; 314 | fall_resistance : 0; 315 | related_pin : "G"; 316 | } 317 | 318 | timing() 319 | { 320 | timing_sense : negative_unate; 321 | intrinsic_rise : 65; 322 | intrinsic_fall : 65; 323 | rise_resistance : 0; 324 | fall_resistance : 0; 325 | related_pin : "D"; 326 | } 327 | } 328 | } 329 | 330 | } /* end */ 331 | -------------------------------------------------------------------------------- /celllibs/supergate/README: -------------------------------------------------------------------------------- 1 | ======================================== 2 | Supergate Cell Lib version 1.0 3 | ======================================== 4 | 5 | This is a cell library for benchmarking purposes. 6 | The library is based on the 'simple' cell library but additionally 7 | contains a half-adder, full-adder, AOI221 and OAI221 cells. 8 | 9 | The cells are process-agnostic with just one drive strength. 10 | Cell sizes are given in units of the inverter size. 11 | This library is solely meant to be uses for benchmarking and is unfit for any purpose. 12 | 13 | The following cells are present in the library: 14 | 15 | * inverter. 16 | * tri-state inverter. 17 | * 2-input nand. 18 | * 2-input nor. 19 | * 2-input xor. 20 | * 2-input inverting mux. 21 | * D-type flip-flop with reset and preset. 22 | * half-adder cell. 23 | * full-adder cell. 24 | * and-or-invert 211 cell. 25 | * or-and-invert 211 cell. 26 | 27 | -------------------------------------------------------------------------------- /celllibs/supergate/supergate.lib: -------------------------------------------------------------------------------- 1 | /********************************************/ 2 | /* */ 3 | /* Supergate cell library for Bench marking */ 4 | /* */ 5 | /* Symbiotic EDA GmbH / Moseley Instruments */ 6 | /* Niels A. Moseley */ 7 | /* */ 8 | /* Process: none */ 9 | /* */ 10 | /* Date : 02-11-2018 */ 11 | /* Version: 1.0 */ 12 | /* */ 13 | /********************************************/ 14 | 15 | library(supergate) { 16 | technology (cmos); 17 | revision : 1.0; 18 | 19 | time_unit : "1ps"; 20 | pulling_resistance_unit : "1kohm"; 21 | voltage_unit : "1V"; 22 | current_unit : "1uA"; 23 | 24 | capacitive_load_unit(1,ff); 25 | 26 | default_inout_pin_cap : 7.0; 27 | default_input_pin_cap : 7.0; 28 | default_output_pin_cap : 0.0; 29 | default_fanout_load : 1.0; 30 | 31 | default_wire_load_capacitance : 0.1; 32 | default_wire_load_resistance : 1.0e-3; 33 | default_wire_load_area : 0.0; 34 | 35 | nom_process : 1.0; 36 | nom_temperature : 25.0; 37 | nom_voltage : 1.2; 38 | 39 | delay_model : generic_cmos; 40 | 41 | /* Inverter */ 42 | cell (inv) { 43 | area : 1; 44 | pin(A) { 45 | direction : input; 46 | } 47 | 48 | pin(Y) { 49 | direction : output; 50 | function : "A'"; 51 | } 52 | } 53 | 54 | /* tri-state inverter */ 55 | cell (tri_inv) { 56 | area : 4; 57 | pin(A) { 58 | direction : input; 59 | } 60 | pin(S) { 61 | direction : input; 62 | } 63 | pin(Z) { 64 | direction : output; 65 | function : "A'"; 66 | three_State : "S'"; 67 | } 68 | } 69 | 70 | cell (buffer) { 71 | area : 5; 72 | pin(A) { 73 | direction : input; 74 | } 75 | pin(Y) { 76 | direction : output; 77 | function : "A"; 78 | } 79 | } 80 | 81 | /* 2-input NAND gate */ 82 | cell (nand2) { 83 | area : 3; 84 | pin(A) { 85 | direction : input; 86 | } 87 | pin(B) { 88 | direction : input; 89 | } 90 | pin(Y) { 91 | direction: output; 92 | function : "(A * B)'"; 93 | } 94 | } 95 | 96 | /* 2-input NOR gate */ 97 | cell (nor2) { 98 | area : 3; 99 | pin(A) { 100 | direction : input; 101 | } 102 | pin(B) { 103 | direction : input; 104 | } 105 | pin(Y) { 106 | direction: output; 107 | function : "(A + B)'"; 108 | } 109 | } 110 | 111 | /* 2-input XOR */ 112 | cell (xor2) { 113 | area : 6; 114 | pin(A) { 115 | direction : input; 116 | } 117 | pin(B) { 118 | direction : input; 119 | } 120 | pin(Y) { 121 | direction: output; 122 | function : "(A *B') + (A' * B)"; 123 | } 124 | } 125 | 126 | /* 2-input inverting MUX */ 127 | cell (imux2) { 128 | area : 5; 129 | pin(A) { 130 | direction : input; 131 | } 132 | pin(B) { 133 | direction : input; 134 | } 135 | pin(S) { 136 | direction : input; 137 | } 138 | pin(Y) { 139 | direction: output; 140 | function : "( (A * S) + (B * S') )'"; 141 | } 142 | } 143 | 144 | /* D-type flip-flop with asynchronous reset and preset */ 145 | cell (dff) 146 | { 147 | area : 6; 148 | ff("IQ", "IQN") { 149 | next_state : "D"; 150 | clocked_on : "CLK"; 151 | clear : "RESET"; 152 | preset : "PRESET"; 153 | clear_preset_var1 : L; 154 | clear_preset_var2 : L; 155 | } 156 | pin(D) { 157 | direction : input; 158 | } 159 | pin(CLK) { 160 | direction : input; 161 | } 162 | pin(RESET) { 163 | direction : input; 164 | } 165 | pin(PRESET) { 166 | direction : input; 167 | } 168 | pin(Q) { 169 | direction: output; 170 | function : "IQ"; 171 | timing() { 172 | timing_type : rising_edge; 173 | intrinsic_rise : 65; 174 | intrinsic_fall : 65; 175 | rise_resistance : 0; 176 | fall_resistance : 0; 177 | related_pin : "CLK"; 178 | } 179 | timing () { 180 | timing_type : clear; 181 | timing_sense : positive_unate; 182 | intrinsic_fall : 75; 183 | related_pin : "RESET"; 184 | } 185 | timing () { 186 | timing_type : preset; 187 | timing_sense : negative_unate; 188 | intrinsic_rise : 75; 189 | related_pin : "PRESET"; 190 | } 191 | } 192 | pin(QN) { 193 | direction: output; 194 | function : "IQN"; 195 | timing() { 196 | timing_type : rising_edge; 197 | intrinsic_rise : 65; 198 | intrinsic_fall : 65; 199 | rise_resistance : 0; 200 | fall_resistance : 0; 201 | related_pin : "CLK"; 202 | } 203 | timing () { 204 | timing_type : preset; 205 | timing_sense : negative_unate; 206 | intrinsic_rise : 75; 207 | related_pin : "RESET"; 208 | } 209 | timing () { 210 | timing_type : clear; 211 | timing_sense : positive_unate; 212 | intrinsic_fall : 75; 213 | related_pin : "PRESET"; 214 | } 215 | } 216 | } 217 | 218 | /* Latch */ 219 | cell(latch) { 220 | area : 5; 221 | latch ("IQ","IQN") { 222 | enable : "G"; 223 | data_in : "D"; 224 | } 225 | 226 | pin(D) { 227 | direction : input; 228 | } 229 | pin(G) { 230 | direction : input; 231 | } 232 | 233 | pin(Q) { 234 | direction : output; 235 | function : "IQ"; 236 | internal_node : "Q"; 237 | 238 | timing() { 239 | timing_type : rising_edge; 240 | intrinsic_rise : 65; 241 | intrinsic_fall : 65; 242 | rise_resistance : 0; 243 | fall_resistance : 0; 244 | related_pin : "G"; 245 | } 246 | 247 | timing() { 248 | timing_sense : positive_unate; 249 | intrinsic_rise : 65; 250 | intrinsic_fall : 65; 251 | rise_resistance : 0; 252 | fall_resistance : 0; 253 | related_pin : "D"; 254 | } 255 | } 256 | 257 | pin(QN) { 258 | direction : output; 259 | function : "IQN"; 260 | internal_node : "QN"; 261 | 262 | timing() { 263 | timing_type : rising_edge; 264 | intrinsic_rise : 65; 265 | intrinsic_fall : 65; 266 | rise_resistance : 0; 267 | fall_resistance : 0; 268 | related_pin : "G"; 269 | } 270 | 271 | timing() { 272 | timing_sense : negative_unate; 273 | intrinsic_rise : 65; 274 | intrinsic_fall : 65; 275 | rise_resistance : 0; 276 | fall_resistance : 0; 277 | related_pin : "D"; 278 | } 279 | } 280 | } 281 | 282 | /* 3 input AND-OR-INVERT gate */ 283 | cell (aoi211) { 284 | area : 3; 285 | pin(A) { 286 | direction : input; 287 | } 288 | pin(B) { 289 | direction : input; 290 | } 291 | pin(C) { 292 | direction : input; 293 | } 294 | pin(Y) { 295 | direction: output; 296 | function : "((A * B) + C)'"; 297 | } 298 | } 299 | 300 | 301 | /* 3 input OR-AND-INVERT gate */ 302 | cell (oai211) { 303 | area : 3; 304 | pin(A) { 305 | direction : input; 306 | } 307 | pin(B) { 308 | direction : input; 309 | } 310 | pin(C) { 311 | direction : input; 312 | } 313 | pin(Y) { 314 | direction: output; 315 | function : "((A + B) * C)'"; 316 | } 317 | } 318 | 319 | /* half adder */ 320 | cell (halfadder) { 321 | area : 5; 322 | pin(A) { 323 | direction : input; 324 | } 325 | pin(B) { 326 | direction : input; 327 | } 328 | pin(C) { 329 | direction : output; 330 | function : "(A * B)"; 331 | } 332 | pin(Y) { 333 | direction: output; 334 | function : "(A *B') + (A' * B)"; 335 | } 336 | } 337 | 338 | /* full adder */ 339 | cell (fulladder) { 340 | area : 8; 341 | pin(A) { 342 | direction : input; 343 | } 344 | pin(B) { 345 | direction : input; 346 | } 347 | pin(CI) { 348 | direction : input; 349 | } 350 | pin(CO) { 351 | direction : output; 352 | function : "(((A * B)+(B * CI))+(CI * A))"; 353 | } 354 | pin(Y) { 355 | direction: output; 356 | function : "((A^B)^CI)"; 357 | } 358 | } 359 | 360 | } /* end */ 361 | -------------------------------------------------------------------------------- /celllibs/supergate/supergate.v: -------------------------------------------------------------------------------- 1 | /********************************************/ 2 | /* */ 3 | /* Supergate cell library for Bench marking */ 4 | /* */ 5 | /* Symbiotic EDA GmbH / Moseley Instruments */ 6 | /* Niels A. Moseley */ 7 | /* */ 8 | /* Process: none */ 9 | /* */ 10 | /* Date : 02-11-2018 */ 11 | /* Version: 1.0 */ 12 | /* */ 13 | /********************************************/ 14 | 15 | module inv(input A, output Y); 16 | assign Y = ~A; 17 | endmodule 18 | 19 | module tri_inv(input A, input S, output reg Y); 20 | always@(*) 21 | begin 22 | if (S==1'b0) 23 | begin 24 | Y <= 1'bz; 25 | end 26 | else 27 | begin 28 | Y <= ~A; 29 | end 30 | end 31 | endmodule 32 | 33 | module buffer(input A, output Y); 34 | assign Y = A; 35 | endmodule 36 | 37 | module nand2(input A, input B, output Y); 38 | assign Y = ~(A & B); 39 | endmodule 40 | 41 | module nor2(input A, input B, output Y); 42 | assign Y = ~(A | B); 43 | endmodule 44 | 45 | module xor2(input A, input B, output Y); 46 | assign Y = A ^ B; 47 | endmodule 48 | 49 | module imux2(input A, input B, input S, output Y); 50 | assign Y = ~(S ? A : B); 51 | endmodule 52 | 53 | module dff(input CLK, input D, input RESET, input PRESET, output reg Q, output reg QN); 54 | always@(CLK or RESET or PRESET) 55 | begin 56 | if (RESET) 57 | begin 58 | Q <= 1'b0; 59 | QN <= 1'b1; 60 | end 61 | else 62 | if (PRESET) 63 | begin 64 | Q <= 1'b1; 65 | QN <= 1'b0; 66 | end 67 | else 68 | if (CLK) 69 | begin 70 | Q <= D; 71 | QN <= ~D; 72 | end 73 | end 74 | endmodule 75 | 76 | module latch(input G, input D, output reg Q, output reg QN); 77 | always@(G or D) 78 | begin 79 | if (G) 80 | begin 81 | Q <= D; 82 | QN <= ~D; 83 | end 84 | end 85 | endmodule 86 | 87 | 88 | module aoi211(input A, input B, input C, output Y); 89 | assign Y = ~((A&B)|C); 90 | endmodule 91 | 92 | module oai211(input A, input B, input C, output Y); 93 | assign Y = ~((A|B)&C); 94 | endmodule 95 | 96 | module halfadder(input A, input B, output C, output Y); 97 | assign Y = A^B; 98 | assign C = A&B; 99 | endmodule 100 | 101 | module fulladder(input A, input B, input CI, output CO, output Y); 102 | assign Y = (A^B)^CI; 103 | assign CO = ((A&B)|(B&CI))|(CI&A); 104 | endmodule 105 | -------------------------------------------------------------------------------- /scripts/database_html.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tables="$(cd database; ls -d */ | sed 's,/$,,;')" 4 | tests="$(cd database; ls */*.dat | sed 's,.*/,,; s,\.dat$,,;' | sort -n)" 5 | 6 | exec > database/index.html 7 | 8 | echo "" 9 | echo "" 10 | echo "" 11 | for tab in $tables; do echo ""; done 12 | echo "" 13 | for tst in $tests; do 14 | echo "" 15 | echo "" 16 | for tab in $tables; do 17 | if test -f database/$tab/$tst.dat; then 18 | echo "" 19 | else 20 | echo "" 21 | fi 22 | done 23 | echo "" 24 | done 25 | echo "
Test$tab
$tst$( cat database/$tab/$tst.dat )N/A
" 26 | -------------------------------------------------------------------------------- /scripts/database_make.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import subprocess 6 | import json 7 | 8 | ## execute a JSON configuration 9 | def executeConfig(cellibpath, shellScriptName, dbpath, subdir, config): 10 | for fileName in config["files"]: 11 | hdlsrc = os.path.join(subdir, fileName) 12 | filewithoutext, file_extension = os.path.splitext(fileName) 13 | datfile = open(os.path.join(dbpath, filewithoutext + ".dat"), "wt") 14 | print(" Running HDL file " + fileName) 15 | retval = subprocess.check_call([os.path.abspath("./scripts/"+shellScriptName+".sh"), os.path.abspath("./" +hdlsrc), celllibpath], 16 | cwd=os.path.abspath(subdir), 17 | stdout=datfile, 18 | stderr=sys.stderr 19 | ) 20 | datfile.close() 21 | return 22 | 23 | ########################################################################################## 24 | ## MAIN PROGRAM STARTS HERE 25 | ########################################################################################## 26 | 27 | # Check the number of arguments to provide help, if needed. 28 | if (len(sys.argv) < 3): 29 | print("Usage: database_make .. ") 30 | sys.exit(1) 31 | 32 | shellScriptName = sys.argv[1] 33 | dbpath = os.path.abspath("./database/"+shellScriptName) 34 | celllibpath = os.path.abspath("./celllibs") 35 | 36 | os.system("rm -rf "+dbpath) 37 | os.system("mkdir -p "+dbpath) 38 | 39 | # call all generate.py scripts 40 | for dir in sys.argv[2:]: 41 | for subdir, dirs, files in os.walk(dir): 42 | for file in files: 43 | if (file == "generate.py"): 44 | script = os.path.join(subdir, file) 45 | print("Executing " + script) 46 | retval = subprocess.check_call(["python3","generate.py"], 47 | cwd=os.path.abspath(subdir), 48 | stdout=sys.stdout, 49 | stderr=sys.stderr 50 | ) 51 | 52 | # execute all .v or .vhdl scripts specified in the config.json file 53 | # or if there is no config.json, simply walk the directory. 54 | dir = sys.argv[2] 55 | queue = [ dir ] 56 | print("Processing directory: " + dir) 57 | while queue: 58 | subdir = queue.pop() 59 | listdir = os.listdir(subdir) 60 | # Do not enter git repositories 61 | if '.git' in listdir: continue 62 | for item in listdir: 63 | path = os.path.join(subdir, item) 64 | if os.path.isdir(path): 65 | queue.append(path) 66 | elif os.path.isfile(path): 67 | # check if there is a config.json file 68 | if item == 'config.json': 69 | print(" Running config file: " + item) 70 | with open(path, 'r') as configFile: 71 | try: 72 | config = json.load(configFile) 73 | executeConfig(celllibpath, shellScriptName, dbpath, subdir, config) 74 | except ValueError as error: 75 | print(" --- ERROR PARSING CONFIG.JSON ---") 76 | pass 77 | if (item.endswith(".v")): 78 | # skip all files that end in _tb.v as they are testbench files 79 | # containing unsynthesizable code 80 | if (item.endswith("_tb.v")): 81 | print(" Skipping Verilog testbench file " + item) 82 | continue 83 | # skip any netlist files that might have been produced in 84 | # previous runs 85 | if (item.endswith("_netlist.v")): 86 | print(" Skipping Verilog netlist file " + item) 87 | continue 88 | verilogsrc = os.path.join(subdir, item) 89 | filewithoutext, file_extension = os.path.splitext(item) 90 | datfile = open(os.path.join(dbpath, filewithoutext + ".dat"), "wt") 91 | print(" Running Verilog file " + item) 92 | retval = subprocess.check_call([os.path.abspath("./scripts/"+shellScriptName+".sh"), os.path.abspath("./" +verilogsrc), celllibpath], 93 | cwd=os.path.abspath(subdir), 94 | stdout=datfile, 95 | stderr=sys.stderr 96 | ) 97 | datfile.close() 98 | 99 | if (item.endswith(".vhdl")): 100 | vhdlsrc = os.path.join(subdir, item) 101 | filewithoutext, file_extension = os.path.splitext(item) 102 | datfile = open(os.path.join(dbpath, filewithoutext + ".dat"), "wt") 103 | print(" Running VHDL file " + item) 104 | retval = subprocess.check_call([os.path.abspath("./scripts/"+shellScriptName+".sh"),os.path.abspath("./" +vhdlsrc), celllibpath], 105 | cwd=os.path.abspath(subdir), 106 | stdout=datfile, 107 | stderr=sys.stderr 108 | ) 109 | datfile.close() 110 | -------------------------------------------------------------------------------- /scripts/yosys-ice40-flopcount.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # mode script for ICE40 FPGA FF count 5 | # 6 | 7 | logfile=$( mktemp ) 8 | scriptpath=$( pwd ) 9 | 10 | # create synthesis script 11 | myfile="$1" 12 | if [ ${myfile: -5} == ".vhdl" ] 13 | then 14 | topmodule=$( basename -s .vhdl "$1" ) 15 | echo "read -vhdl $1" > script.yos 16 | else 17 | topmodule=$( basename -s .v "$1") 18 | echo "read -vlog2k $1" > script.yos 19 | fi 20 | echo "synth_ice40 -top $topmodule" >> script.yos 21 | 22 | # run tools 23 | yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null 24 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_DFF/ { s/.* //; p; }; }; d;' $logfile 25 | rm -f $logfile 26 | rm -f script.yos 27 | -------------------------------------------------------------------------------- /scripts/yosys-ice40-lutcount.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # mode script for ICE40 FPGA LUT count 5 | # 6 | 7 | logfile=$( mktemp ) 8 | scriptpath=$( pwd ) 9 | 10 | # create synthesis script 11 | myfile="$1" 12 | if [ ${myfile: -5} == ".vhdl" ] 13 | then 14 | topmodule=$( basename -s .vhdl "$1" ) 15 | echo "read -vhdl $1" > script.yos 16 | else 17 | topmodule=$( basename -s .v "$1") 18 | echo "read -vlog2k $1" > script.yos 19 | fi 20 | echo "synth_ice40 -top $topmodule $EXTRA_FLAGS" >> script.yos 21 | 22 | # run tools 23 | yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null 24 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_LUT4/ { s/.* //; p; }; }; d;' $logfile 25 | rm -f $logfile 26 | rm -f script.yos 27 | -------------------------------------------------------------------------------- /scripts/yosys-sanity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # mode script for simple ASIC cell library 5 | # 6 | # Using custom ABC script so we can get the area of the circuit: 7 | # strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put 8 | # 9 | 10 | logfile=$( mktemp ) 11 | scriptpath=$( pwd ) 12 | 13 | # create synthesis script 14 | myfile="$1" 15 | celllibpath="$2" 16 | 17 | #mkdir -p netlists 18 | 19 | if [ ${myfile: -5} == ".vhdl" ] 20 | then 21 | topmodule=$( basename -s .vhdl "$1" ) 22 | echo "read -vhdl $1" > script.yos 23 | else 24 | topmodule=$( basename -s .v "$1") 25 | echo "read -vlog2k $1" > script.yos 26 | fi 27 | echo "hierarchy -check -top $topmodule" >> script.yos 28 | 29 | yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null 30 | echo $? 31 | rm -f $logfile 32 | rm -f script.yos 33 | -------------------------------------------------------------------------------- /scripts/yosys-simplelib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # mode script for simple ASIC cell library 5 | # 6 | # Using custom ABC script so we can get the area of the circuit: 7 | # strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put 8 | # 9 | 10 | logfile=$( mktemp ) 11 | scriptpath=$( pwd ) 12 | 13 | # create synthesis script 14 | myfile="$1" 15 | celllibpath="$2" 16 | 17 | #mkdir -p netlists 18 | 19 | if [ ${myfile: -5} == ".vhdl" ] 20 | then 21 | topmodule=$( basename -s .vhdl "$1" ) 22 | echo "read -vhdl $1" > script.yos 23 | else 24 | topmodule=$( basename -s .v "$1") 25 | echo "read -vlog2k $1" > script.yos 26 | fi 27 | echo "hierarchy; proc; fsm; opt; memory; opt" >> script.yos 28 | echo "techmap; opt" >> script.yos 29 | echo "dfflibmap -liberty $celllibpath/simple/simple.lib" >> script.yos 30 | echo "abc -liberty $celllibpath/simple/simple.lib" >> script.yos 31 | echo "write_verilog /$1_netlist.v" >> script.yos 32 | echo "stat -liberty $celllibpath/simple/simple.lib" >> script.yos 33 | #echo "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put" > abc.script 34 | 35 | # run tools 36 | #yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null 37 | yosys -l $logfile -p "script $scriptpath/script.yos" >/dev/null 38 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_LUT4/ { s/.* //; p; }; }; d;' $logfile 39 | cp $logfile $celllibpath/../log.txt 40 | rm -f $logfile 41 | rm -f script.yos 42 | #rm -f abc.script 43 | -------------------------------------------------------------------------------- /scripts/yosys-supergatelib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # mode script for simple ASIC cell library 5 | # 6 | # Using custom ABC script so we can get the area of the circuit: 7 | # strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put 8 | # 9 | 10 | logfile=$( mktemp ) 11 | scriptpath=$( pwd ) 12 | 13 | # create synthesis script 14 | myfile="$1" 15 | celllibpath="$2" 16 | 17 | #mkdir -p netlists 18 | 19 | if [ ${myfile: -5} == ".vhdl" ] 20 | then 21 | topmodule=$( basename -s .vhdl "$1" ) 22 | echo "read -vhdl $1" > script.yos 23 | else 24 | topmodule=$( basename -s .v "$1") 25 | echo "read -vlog2k $1" > script.yos 26 | fi 27 | #echo "read_liberty $celllibpath/supergate/supergate.lib" >> script.yos 28 | echo "hierarchy; proc; fsm; opt; memory; opt" >> script.yos 29 | echo "techmap; opt" >> script.yos 30 | echo "dfflibmap -liberty $celllibpath/supergate/supergate.lib" >> script.yos 31 | echo "abc -liberty $celllibpath/supergate/supergate.lib" >> script.yos 32 | echo "write_verilog /$1_netlist.v" >> script.yos 33 | echo "stat -liberty $celllibpath/supergate/supergate.lib" >> script.yos 34 | #echo "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put" > abc.script 35 | 36 | # run tools 37 | #yosys -ql $logfile -p "script $scriptpath/script.yos" >/dev/null 38 | yosys -l $logfile -p "script $scriptpath/script.yos" >/dev/null 39 | sed -r '/^[0-9\.]+ Printing statistics./,/^[0-9\.]+ / { /SB_LUT4/ { s/.* //; p; }; }; d;' $logfile 40 | cp $logfile $celllibpath/../log.txt 41 | rm -f $logfile 42 | rm -f script.yos 43 | #rm -f abc.script 44 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/boom/MediumBoom.v.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/MediumBoom.v.gz -------------------------------------------------------------------------------- /verilog/benchmarks_large/boom/MediumOctoBoom.v.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/MediumOctoBoom.v.gz -------------------------------------------------------------------------------- /verilog/benchmarks_large/boom/MegaOctoBoom.v.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/MegaOctoBoom.v.gz -------------------------------------------------------------------------------- /verilog/benchmarks_large/boom/README.md: -------------------------------------------------------------------------------- 1 | # BOOM RISC-V core 2 | 3 | Generated from https://github.com/riscv-boom/boom-template commit 8241911d3fa13ab81df276899c2ab839fd8b3912 4 | 5 | SmallBoom and MediumBoom are default single-core configs. Other multi-core configs are custom: 6 | ```scala 7 | class SmallQuadBoomConfig extends Config( 8 | new WithRVC ++ 9 | new WithSmallBooms ++ 10 | new DefaultBoomConfig ++ 11 | new WithNBoomCores(4) ++ 12 | new WithoutTLMonitors ++ 13 | new freechips.rocketchip.system.BaseConfig) 14 | 15 | class MediumOctoBoomConfig extends Config( 16 | new WithRVC ++ 17 | new WithMediumBooms ++ 18 | new DefaultBoomConfig ++ 19 | new WithNBoomCores(8) ++ 20 | new WithoutTLMonitors ++ 21 | new freechips.rocketchip.system.BaseConfig) 22 | 23 | class MegaOctoBoomConfig extends Config( 24 | new WithRVC ++ 25 | new WithMegaBooms ++ 26 | new DefaultBoomConfig ++ 27 | new WithNBoomCores(8) ++ 28 | new WithoutTLMonitors ++ 29 | new freechips.rocketchip.system.BaseConfig) 30 | ``` 31 | 32 | Note that MegaOctoBoomConfig is primarily intended as a torture test rather than a useful benchmark, 33 | as a large percentage of the final resource usage is used for bit-blasted 16-write-port memories. 34 | 35 | Copyright: 36 | ``` 37 | 38 | Copyright (c) 2017, The Regents of the University of California (Regents). 39 | All Rights Reserved. 40 | 41 | Redistribution and use in source and binary forms, with or without 42 | modification, are permitted provided that the following conditions are met: 43 | 44 | 1. Redistributions of source code must retain the above copyright 45 | notice, this list of conditions and the following disclaimer. 46 | 47 | 2. Redistributions in binary form must reproduce the above copyright 48 | notice, this list of conditions and the following disclaimer in the 49 | documentation and/or other materials provided with the distribution. 50 | 51 | 3. Neither the name of the Regents nor the 52 | names of its contributors may be used to endorse or promote products 53 | derived from this software without specific prior written permission. 54 | 55 | IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 56 | SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING 57 | OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF REGENTS HAS 58 | BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59 | 60 | REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 61 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 | PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED 63 | HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE 64 | MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 65 | ``` 66 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/boom/SmallBoom.v.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/SmallBoom.v.gz -------------------------------------------------------------------------------- /verilog/benchmarks_large/boom/SmallQuadBoom.v.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/boom/SmallQuadBoom.v.gz -------------------------------------------------------------------------------- /verilog/benchmarks_large/cam/README.md: -------------------------------------------------------------------------------- 1 | # Verilog CAM: Content Addressable Memory 2 | 3 | Source: https://github.com/alexforencich/verilog-cam 4 | 5 | Two designs: 6 | 7 | - **CAM_SRL_TOP** 64 bit data content by 32 entry content addressable memory 8 | built out of shift registers. 9 | 10 | - **CAM_BRAM_TOP** 64 bit data content by 32 entry content addressable memory 11 | built out of block RAMs. 12 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/cam/cam_bram_top.v: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2015-2016 Alex Forencich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | */ 24 | 25 | // Language: Verilog 2001 26 | 27 | `timescale 1ns / 1ps 28 | 29 | /* 30 | * Content Addressable Memory 31 | */ 32 | module cam_bram_top #( 33 | // search data bus width 34 | parameter DATA_WIDTH = 64, 35 | // memory size in log2(words) 36 | parameter ADDR_WIDTH = 5, 37 | // CAM style (SRL, BRAM) 38 | parameter CAM_STYLE = "BRAM", 39 | // width of data bus slices 40 | parameter SLICE_WIDTH = 4 41 | ) 42 | ( 43 | input wire clk, 44 | input wire rst, 45 | 46 | input wire [ADDR_WIDTH-1:0] write_addr, 47 | input wire [DATA_WIDTH-1:0] write_data, 48 | input wire write_delete, 49 | input wire write_enable, 50 | output wire write_busy, 51 | 52 | input wire [DATA_WIDTH-1:0] compare_data, 53 | output wire [2**ADDR_WIDTH-1:0] match_many, 54 | output wire [2**ADDR_WIDTH-1:0] match_single, 55 | output wire [ADDR_WIDTH-1:0] match_addr, 56 | output wire match 57 | ); 58 | 59 | generate 60 | if (CAM_STYLE == "SRL") begin 61 | cam_srl #( 62 | .DATA_WIDTH(DATA_WIDTH), 63 | .ADDR_WIDTH(ADDR_WIDTH), 64 | .SLICE_WIDTH(SLICE_WIDTH) 65 | ) 66 | cam_inst ( 67 | .clk(clk), 68 | .rst(rst), 69 | .write_addr(write_addr), 70 | .write_data(write_data), 71 | .write_delete(write_delete), 72 | .write_enable(write_enable), 73 | .write_busy(write_busy), 74 | .compare_data(compare_data), 75 | .match_many(match_many), 76 | .match_single(match_single), 77 | .match_addr(match_addr), 78 | .match(match) 79 | ); 80 | end else if (CAM_STYLE == "BRAM") begin 81 | cam_bram #( 82 | .DATA_WIDTH(DATA_WIDTH), 83 | .ADDR_WIDTH(ADDR_WIDTH), 84 | .SLICE_WIDTH(SLICE_WIDTH) 85 | ) 86 | cam_inst ( 87 | .clk(clk), 88 | .rst(rst), 89 | .write_addr(write_addr), 90 | .write_data(write_data), 91 | .write_delete(write_delete), 92 | .write_enable(write_enable), 93 | .write_busy(write_busy), 94 | .compare_data(compare_data), 95 | .match_many(match_many), 96 | .match_single(match_single), 97 | .match_addr(match_addr), 98 | .match(match) 99 | ); 100 | end 101 | endgenerate 102 | 103 | endmodule 104 | 105 | `include "cam_bram.vh" 106 | `include "priority_encoder.vh" 107 | `include "ram_dp.vh" 108 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/cam/cam_srl_top.v: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2015-2016 Alex Forencich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | */ 24 | 25 | // Language: Verilog 2001 26 | 27 | `timescale 1ns / 1ps 28 | 29 | /* 30 | * Content Addressable Memory 31 | */ 32 | module cam_srl_top #( 33 | // search data bus width 34 | parameter DATA_WIDTH = 64, 35 | // memory size in log2(words) 36 | parameter ADDR_WIDTH = 5, 37 | // CAM style (SRL, BRAM) 38 | parameter CAM_STYLE = "SRL", 39 | // width of data bus slices 40 | parameter SLICE_WIDTH = 4 41 | ) 42 | ( 43 | input wire clk, 44 | input wire rst, 45 | 46 | input wire [ADDR_WIDTH-1:0] write_addr, 47 | input wire [DATA_WIDTH-1:0] write_data, 48 | input wire write_delete, 49 | input wire write_enable, 50 | output wire write_busy, 51 | 52 | input wire [DATA_WIDTH-1:0] compare_data, 53 | output wire [2**ADDR_WIDTH-1:0] match_many, 54 | output wire [2**ADDR_WIDTH-1:0] match_single, 55 | output wire [ADDR_WIDTH-1:0] match_addr, 56 | output wire match 57 | ); 58 | 59 | generate 60 | if (CAM_STYLE == "SRL") begin 61 | cam_srl #( 62 | .DATA_WIDTH(DATA_WIDTH), 63 | .ADDR_WIDTH(ADDR_WIDTH), 64 | .SLICE_WIDTH(SLICE_WIDTH) 65 | ) 66 | cam_inst ( 67 | .clk(clk), 68 | .rst(rst), 69 | .write_addr(write_addr), 70 | .write_data(write_data), 71 | .write_delete(write_delete), 72 | .write_enable(write_enable), 73 | .write_busy(write_busy), 74 | .compare_data(compare_data), 75 | .match_many(match_many), 76 | .match_single(match_single), 77 | .match_addr(match_addr), 78 | .match(match) 79 | ); 80 | end else if (CAM_STYLE == "BRAM") begin 81 | cam_bram #( 82 | .DATA_WIDTH(DATA_WIDTH), 83 | .ADDR_WIDTH(ADDR_WIDTH), 84 | .SLICE_WIDTH(SLICE_WIDTH) 85 | ) 86 | cam_inst ( 87 | .clk(clk), 88 | .rst(rst), 89 | .write_addr(write_addr), 90 | .write_data(write_data), 91 | .write_delete(write_delete), 92 | .write_enable(write_enable), 93 | .write_busy(write_busy), 94 | .compare_data(compare_data), 95 | .match_many(match_many), 96 | .match_single(match_single), 97 | .match_addr(match_addr), 98 | .match(match) 99 | ); 100 | end 101 | endgenerate 102 | 103 | endmodule 104 | 105 | `include "cam_srl.vh" 106 | `include "priority_encoder.vh" 107 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/cam/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import urllib.request 4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/cam_srl.v', 'cam_srl.vh') 5 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/cam_bram.v', 'cam_bram.vh') 6 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/priority_encoder.v', 'priority_encoder.vh') 7 | urllib.request.urlretrieve('https://raw.githubusercontent.com/alexforencich/verilog-cam/32a2b86b0b1fee22f975bf15a64432b60540ac0e/rtl/ram_dp.v', 'ram_dp.vh') 8 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/cordic/.gitignore: -------------------------------------------------------------------------------- 1 | cordic_*.v 2 | *.vvp 3 | *.vcd -------------------------------------------------------------------------------- /verilog/benchmarks_large/cordic/README.md: -------------------------------------------------------------------------------- 1 | # CORDIC - Coordinate Rotation DIgital Computer 2 | 3 | The CORDIC is a class of algorithm is used to efficiently calculate sine, cosine, tangent, arcsine, arccos, arctangent, vector magnitude and more. 4 | 5 | Here, a pipelined CORDIC algorithm is used to calculate sine and cosine. 6 | 7 | The Python script generates multiple versions varying in the number of CORDIC stages and input widths. 8 | 9 | Reference: https://en.wikipedia.org/wiki/CORDIC 10 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/cordic/cordic.template: -------------------------------------------------------------------------------- 1 | // pipelined CORDIC algorithm to calculate sin/cos pair from a given angle (0..1) 2 | // Author: Niels A. Moseley 3 | // 4 | 5 | 6 | // one stage of the cordic iteration with registered outputs 7 | module cordic_stage_(clk, rst_n, x_in, y_in, angle_in, angle_adj, x_out, y_out, angle_out); 8 | parameter SHIFT = 1; 9 | 10 | // inputs 11 | input clk; 12 | input rst_n; 13 | input signed [-1:0] x_in; 14 | input signed [-1:0] y_in; 15 | input signed [-1:0] angle_in; 16 | input signed [-1:0] angle_adj; 17 | 18 | // outputs 19 | output reg signed [-1:0] x_out; 20 | output reg signed [-1:0] y_out; 21 | output reg signed [-1:0] angle_out; 22 | 23 | // internal signal 24 | reg signed [-1:0] new_x; 25 | reg signed [-1:0] new_y; 26 | reg signed [-1:0] new_angle; 27 | 28 | wire sign; 29 | wire signed [-1:0] shifted_x; 30 | wire signed [-1:0] shifted_y; 31 | 32 | assign sign = angle_in[-1]; // angle sign bit 33 | assign shifted_x = x_in >>> SHIFT; 34 | assign shifted_y = y_in >>> SHIFT; 35 | 36 | always @(*) 37 | begin 38 | new_x = sign ? (x_in + shifted_y) : (x_in - shifted_y); 39 | new_y = sign ? (y_in - shifted_x) : (y_in + shifted_x); 40 | new_angle = sign ? (angle_in + angle_adj) : (angle_in - angle_adj); 41 | end 42 | 43 | always @(posedge clk) 44 | begin 45 | if (rst_n == 1'b0) 46 | begin 47 | x_out <= 0; 48 | y_out <= 0; 49 | angle_out <= 0; 50 | end 51 | else begin 52 | x_out <= new_x; 53 | y_out <= new_y; 54 | angle_out <= new_angle; 55 | end 56 | end 57 | 58 | endmodule 59 | 60 | 61 | module cordic__(clk, rst_n, angle_in, cos_out, sin_out); 62 | 63 | // inputs 64 | input clk; 65 | input rst_n; 66 | input signed [-1:0] angle_in; 67 | 68 | // outputs 69 | output signed [-1:0] cos_out; 70 | output signed [-1:0] sin_out; 71 | 72 | // internal signals 73 | reg signed [-1:0] x_in; 74 | reg signed [-1:0] y_in; 75 | reg signed [-1:0] z_in; 76 | 77 | wire signed [-1:0] xbus [0:-1]; 78 | wire signed [-1:0] ybus [0:-1]; 79 | wire signed [-1:0] zbus [0:-1]; 80 | 81 | assign cos_out = xbus[-1]; 82 | assign sin_out = ybus[-1]; 83 | 84 | always @(*) 85 | begin 86 | case($unsigned(angle_in[-1:-2])) 87 | 2'b00: 88 | begin 89 | x_in <= ; 90 | y_in <= 0; 91 | z_in <= angle_in; 92 | end 93 | 2'b11: 94 | begin 95 | x_in <= ; 96 | y_in <= 0; 97 | z_in <= angle_in; 98 | end 99 | 2'b01: 100 | begin 101 | x_in <= 0; 102 | y_in <= ; 103 | z_in <= $signed({2'b00, angle_in[-3:0]}); 104 | end 105 | 2'b10: 106 | begin 107 | x_in <= 0; 108 | y_in <= -; 109 | z_in <= $signed({2'b11, angle_in[-3:0]}); 110 | end 111 | endcase 112 | end 113 | 114 | // generate instances of cordic_stage 115 | 116 | 117 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_large/cordic/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate a pipelined CORDIC with a certain number of iteration stages 4 | ## The script must also generate the angle table 5 | ## 6 | ## = bit width of cordic stage 7 | ## = number of stages 8 | ## = cordic vector start magnitude, approx 0.6199505 9 | ## = generated calls to cordic_stage 10 | ## 11 | 12 | import math 13 | 14 | def gen_cordic(stages, bits, template): 15 | ## calculate the CORDIC gain so we can compensate this 16 | ## by reducing the input vector length to avoid overflow. 17 | ## 18 | ## the Nth stage has a gain of sqrt(1.0 + 2^-2N) when counting 19 | ## stages from 0. 20 | ## 21 | ## Total gain for 4 stages : 1.64248406575 22 | ## 5 stages : 1.64568891576 23 | ## 6 stages : 1.64649227871 24 | ## 25 | 26 | amp = 1.0 27 | for I in range(0,stages): 28 | amp = amp * math.sqrt(1.0 + math.pow(2.0,-2*I)) 29 | 30 | startval = int( math.floor((2**(bits-1)-1) / amp) ) 31 | 32 | template = template.replace("", str(bits)).replace("", str(stages)) 33 | template = template.replace("", str(bits)+"'d"+str(startval)) 34 | 35 | ## generate calls to cordic_stage 36 | 37 | gen = " cordic_stage_ #(0) stage0(clk, rst_n, x_in, y_in, z_in, , xbus[0], ybus[0], zbus[0]);\n" 38 | tanval = int( round((2**(bits)) * 0.125,0) ) 39 | gen = gen.replace("", str(bits) + "'sd" + str(tanval)) 40 | gen = gen.replace("", str(bits)) 41 | 42 | s = " cordic_stage_ #() stage(clk, rst_n, xbus[], ybus[], zbus[], , xbus[], ybus[], zbus[]);" 43 | for I in range(1,stages): 44 | tanval = int( round((2**(bits)) * math.atan(math.pow(2.0, -I))/(2.0*3.14159265359),0) ) 45 | gen_s = s.replace("", str(I-1)).replace("", str(bits) + "'sd" + str(tanval)) + "\n" 46 | gen_s = gen_s.replace("",str(I)) 47 | gen_s = gen_s.replace("",str(bits)) 48 | gen = gen + gen_s 49 | 50 | 51 | template = template.replace("", gen) 52 | 53 | with open("cordic_%d_%d.v" % (stages, bits), "w") as f: 54 | print(template, file=f) 55 | 56 | with open('cordic.template','rt') as templatefile: 57 | template = templatefile.read() 58 | 59 | for stages in [4,5,6,7,8,9,10]: 60 | for bits in [8,12,16]: 61 | gen_cordic(stages, bits, ''.join(template)) 62 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/cordic/run_cordic_tb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | iverilog -o cordic_tb.vvp cordic_10_16.v cordic_4_8.v cordic_tb.v 4 | vvp cordic_tb.vvp 5 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/.gitignore: -------------------------------------------------------------------------------- 1 | *.vh 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/README.md: -------------------------------------------------------------------------------- 1 | # DSP Filters - a selection of digital filters from @ZipCPU 2 | 3 | Source: https://github.com/ZipCPU/dspfilters/tree/49b9a0235f88c34b9a997b1aa9a634ad130ea719 4 | 5 | Currently three designs exist: 6 | 7 | - **fastfir_fixedtaps** A 1-output per clock finite impulse response (FIR) filter, 8 | configured as a 12-bit 128-tap band-pass filter. 9 | 10 | - **slowfil_fixedtaps** A 1-output per number-of-taps clocks finite impulse response 11 | (FIR) filter, configured as a 12-bit 128-tap band-pass filter. This original variant 12 | uses a ring-buffer to store all input samples. 13 | 14 | - **slowfil_srl_fixedtaps** A 1-output per number-of-taps clocks finite impulse 15 | response (FIR) filter, configured as a 12-bit 128-tap band-pass filter. This is a 16 | modified variant of the original slowfil that uses a shift-register approach to 17 | store all input samples. 18 | 19 | The 12-bit 128-tap band pass filter has the following performance characteristics: 20 | - 0-200Hz: -119.27dB 21 | - 300-500Hz: 0.00dB 22 | - 600-1000Hz: -119.27dB 23 | coefficients generated using http://t-filter.engineerjs.com 24 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/fastfir_dynamictaps.v: -------------------------------------------------------------------------------- 1 | module fastfir_dynamictaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_result); 2 | `ifdef FORMAL 3 | parameter NTAPS=16, IW=9, TW=IW, OW=2*IW+5; 4 | `else 5 | parameter NTAPS=128, IW=12, TW=IW, OW=2*IW+7; 6 | `endif 7 | parameter [0:0] FIXED_TAPS=0; 8 | input wire i_clk, i_reset; 9 | // 10 | input wire i_tap_wr; // Ignored if FIXED_TAPS 11 | input wire [(TW-1):0] i_tap; // Ignored if FIXED_TAPS 12 | // 13 | input wire i_ce; 14 | input wire [(IW-1):0] i_sample; 15 | output wire [(OW-1):0] o_result; 16 | 17 | fastfir #(.FIXED_TAPS(0), .NTAPS(NTAPS), .IW(IW), .TW(TW)) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_result(o_result)); 18 | endmodule 19 | 20 | `include "fastfir.vh" 21 | `include "firtap.vh" 22 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/fastfir_fixedtaps.v: -------------------------------------------------------------------------------- 1 | module fastfir_fixedtaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_result); 2 | `ifdef FORMAL 3 | parameter NTAPS=16, IW=9, TW=IW, OW=2*IW+5; 4 | `else 5 | parameter NTAPS=128, IW=12, TW=IW, OW=2*IW+7; 6 | `endif 7 | parameter [0:0] FIXED_TAPS=0; 8 | input wire i_clk, i_reset; 9 | // 10 | input wire i_tap_wr; // Ignored if FIXED_TAPS 11 | input wire [(TW-1):0] i_tap; // Ignored if FIXED_TAPS 12 | // 13 | input wire i_ce; 14 | input wire [(IW-1):0] i_sample; 15 | output wire [(OW-1):0] o_result; 16 | 17 | fastfir #(.FIXED_TAPS(1), .NTAPS(NTAPS), .IW(IW), .TW(TW)) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_result(o_result)); 18 | endmodule 19 | 20 | `include "fastfir.vh" 21 | `include "firtap.vh" 22 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import urllib.request 4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/dspfilters/75756b71d162ca621d6905224d2c836f45efa425/rtl/fastfir.v', 'fastfir.vh') 5 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/dspfilters/75756b71d162ca621d6905224d2c836f45efa425/rtl/slowfil.v', 'slowfil.vh') 6 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/dspfilters/75756b71d162ca621d6905224d2c836f45efa425/rtl/firtap.v', 'firtap.vh') 7 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/slowfil_fixedtaps.v: -------------------------------------------------------------------------------- 1 | module slowfil_fixedtaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_ce, o_result); 2 | `ifdef FORMAL 3 | parameter NTAPS=16, IW=9, TW=IW, OW=2*IW+5; 4 | `else 5 | parameter NTAPS=128, IW=12, TW=IW, OW=2*IW+7; 6 | `endif 7 | parameter [0:0] FIXED_TAPS=0; 8 | input wire i_clk, i_reset; 9 | // 10 | input wire i_tap_wr; // Ignored if FIXED_TAPS 11 | input wire [(TW-1):0] i_tap; // Ignored if FIXED_TAPS 12 | // 13 | input wire i_ce; 14 | input wire [(IW-1):0] i_sample; 15 | output wire o_ce; 16 | output wire [(OW-1):0] o_result; 17 | 18 | slowfil #(.FIXED_TAPS(1), .NTAPS(NTAPS), .IW(IW), .TW(TW), .INITIAL_COEFFS("taps.hex")) fir (.i_clk(i_clk), .i_reset(i_reset), .i_tap_wr(i_tap_wr), .i_tap(i_tap), .i_ce(i_ce), .i_sample(i_sample), .o_ce(o_ce), .o_result(o_result)); 19 | endmodule 20 | 21 | `include "slowfil.vh" 22 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/dspfilters/slowfil_srl.vh: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Filename: slowfil_srl.v 4 | // 5 | // Project: DSP Filtering Example Project 6 | // 7 | // Purpose: Unlike fastfir.v and genericfir.v, both of which require one 8 | // hardware multiply element per tap, this slowfil design requires 9 | // only one multiply element in total. It is useful for those times and 10 | // cases when there are fewer taps than there are clock intervals between 11 | // incoming samples. In all other respects, however, it remains quite 12 | // generic. 13 | // 14 | // Creator: Dan Gisselquist, Ph.D. 15 | // Gisselquist Technology, LLC 16 | // 17 | // Note: This is a modified version of slowfil.v by Dan Gisselquist that 18 | // uses a shift-register based approach, over a memory-based one. 19 | // 20 | //////////////////////////////////////////////////////////////////////////////// 21 | // 22 | // Copyright (C) 2017-2019, Gisselquist Technology, LLC 23 | // 24 | // This file is part of the DSP filtering set of designs. 25 | // 26 | // The DSP filtering designs are free RTL designs: you can redistribute them 27 | // and/or modify any of them under the terms of the GNU Lesser General Public 28 | // License as published by the Free Software Foundation, either version 3 of 29 | // the License, or (at your option) any later version. 30 | // 31 | // The DSP filtering designs are distributed in the hope that they will be 32 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 33 | // MERCHANTIBILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 34 | // General Public License for more details. 35 | // 36 | // You should have received a copy of the GNU Lesser General Public License 37 | // along with these designs. (It's in the $(ROOT)/doc directory. Run make 38 | // with no target there if the PDF file isn't present.) If not, see 39 | // for a copy. 40 | // 41 | // License: LGPL, v3, as defined and found on www.gnu.org, 42 | // http://www.gnu.org/licenses/lgpl.html 43 | // 44 | //////////////////////////////////////////////////////////////////////////////// 45 | // 46 | // 47 | `default_nettype none 48 | // 49 | module slowfil_srl(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_ce, o_result); 50 | parameter LGNTAPS = 7, IW=16, TW=16, OW = IW+TW+LGNTAPS; 51 | parameter [LGNTAPS:0] NTAPS = 110; // (1< 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | * 16 | */ 17 | 18 | module marlann_compute #( 19 | parameter integer NB = 2, 20 | parameter integer CODE_SIZE = 512, 21 | parameter integer COEFF_SIZE = 512 22 | ) ( 23 | input clock, 24 | input reset, 25 | output busy, 26 | 27 | input cmd_valid, 28 | output cmd_ready, 29 | input [31:0] cmd_insn, 30 | 31 | output mem_ren, 32 | output [ 7:0] mem_wen, 33 | output [15:0] mem_addr, 34 | output [63:0] mem_wdata, 35 | input [63:0] mem_rdata, 36 | 37 | output tick_simd, 38 | output tick_nosimd 39 | ); 40 | integer i; 41 | 42 | reg [31:0] code_mem [0:CODE_SIZE-1]; 43 | reg [64*NB-1:0] coeff_mem [0:COEFF_SIZE-1]; 44 | 45 | reg [31:0] acc0, acc1; 46 | 47 | reg [16:0] VBP, LBP, SBP; 48 | reg [ 8:0] CBP; 49 | 50 | reg mem_rd0_en; 51 | reg [15:0] mem_rd0_addr; 52 | 53 | reg mem_rd1_en; 54 | reg [15:0] mem_rd1_addr; 55 | 56 | reg [ 7:0] mem_wr_en; 57 | reg [15:0] mem_wr_addr; 58 | reg [63:0] mem_wr_wdata; 59 | 60 | assign mem_ren = mem_rd0_en || mem_rd1_en; 61 | assign mem_wen = mem_wr_en; 62 | assign mem_addr = ({16{mem_rd0_en}} & mem_rd0_addr) | ({16{mem_rd1_en}} & mem_rd1_addr) | ({16{|mem_wr_en}} & mem_wr_addr); 63 | assign mem_wdata = mem_wr_wdata; 64 | 65 | wire [16:0] cmd_insn_maddr = cmd_insn[31:15]; 66 | wire [8:0] cmd_insn_caddr = cmd_insn[14:6]; 67 | wire [5:0] cmd_insn_opcode = cmd_insn[5:0]; 68 | 69 | 70 | /**** staging ****/ 71 | 72 | reg s1_en; 73 | wire [ 31:0] s1_insn; 74 | wire s1_stall; 75 | 76 | reg s2_en; 77 | reg [ 31:0] s2_insn; 78 | 79 | reg s3_en; 80 | reg [ 31:0] s3_insn; 81 | 82 | reg s3a_en; 83 | reg [ 31:0] s3a_insn; 84 | 85 | reg s4_en; 86 | reg [ 31:0] s4_insn; 87 | reg [ NB*64-1:0] s4_coeff; 88 | 89 | reg s5_en; 90 | reg [ 31:0] s5_insn; 91 | reg [ 8*9-1:0] s5_max; 92 | 93 | reg s6_en; 94 | reg [ 31:0] s6_insn; 95 | reg [ 4*9-1:0] s6_max; 96 | 97 | reg s7_en; 98 | reg [ 31:0] s7_insn; 99 | wire [ NB*128-1:0] s7_prod; 100 | reg [ 2*9-1:0] s7_max; 101 | 102 | reg s8_en; 103 | reg [ 31:0] s8_insn; 104 | reg [ 19:0] s8_sum0; 105 | reg [ 19:0] s8_sum1; 106 | reg [ 8:0] s8_max; 107 | reg s8_maxen; 108 | 109 | reg s9_en; 110 | reg [ 31:0] s9_insn; 111 | 112 | 113 | /**** memory and max interlock ****/ 114 | 115 | reg [9:0] memlock_res; 116 | reg [9:0] memlock_mask; 117 | reg memlock_expect; 118 | 119 | always @* begin 120 | memlock_mask = 0; 121 | 122 | case (s1_insn[5:0]) 123 | /* LoadCode, LoadCoeff0, LoadCoeff1 */ 124 | 4, 5, 6: memlock_mask = 1 << 0; 125 | 126 | /* LdSet, LdSet0, LdSet1, LdAdd, LdAdd0, LdAdd1 */ 127 | 28, 29, 30, 32, 33, 34: begin 128 | memlock_mask = 1 << 4; 129 | end 130 | 131 | /* MACC, MMAX, MACCZ, MMAXZ, MMAXN */ 132 | 40, 41, 42, 43, 45: memlock_mask = 1 << 0; 133 | 134 | /* Store, Store0, Store1, ReLU, ReLU0, ReLU1, Save, Save0, Save1 */ 135 | 16, 17, 18, 20, 21, 22, 24, 25, 26: memlock_mask = 1 << 9; 136 | endcase 137 | 138 | if (!s1_en || reset) 139 | memlock_mask = 0; 140 | end 141 | 142 | reg maxlock_a; 143 | reg maxlock_b; 144 | reg maxlock_a_q; 145 | 146 | always @* begin 147 | maxlock_a = 0; 148 | maxlock_b = 0; 149 | 150 | case (s1_insn[5:0] & 6'b 1111_00) 151 | 28, 32, 40, 44: maxlock_a = 1; 152 | endcase 153 | 154 | case (s1_insn[5:0]) 155 | 41, 43, 45, 47: maxlock_b = 1; 156 | endcase 157 | 158 | if (!s1_en || reset) begin 159 | maxlock_a = 0; 160 | maxlock_b = 0; 161 | end 162 | end 163 | 164 | assign s1_stall = |(memlock_res & memlock_mask) || (maxlock_b && maxlock_a_q); 165 | 166 | always @(posedge clock) begin 167 | {memlock_res, memlock_expect} <= memlock_res | (s1_stall ? 10'b 0 : memlock_mask); 168 | maxlock_a_q <= maxlock_a && !s1_stall; 169 | 170 | if (reset) begin 171 | memlock_res <= 0; 172 | memlock_expect <= 0; 173 | maxlock_a_q <= 0; 174 | end 175 | end 176 | 177 | assign cmd_ready = !s1_stall; 178 | 179 | assign busy = |{s1_en, s2_en, s3_en, s4_en, s5_en, s6_en, s7_en, s8_en}; 180 | 181 | 182 | /**** stage 1 ****/ 183 | 184 | reg [31:0] s1_insn_direct; 185 | reg [31:0] s1_insn_codemem; 186 | reg s1_insn_sel; 187 | 188 | assign s1_insn = s1_insn_sel ? s1_insn_codemem : s1_insn_direct; 189 | 190 | wire [16:0] s1_insn_maddr = s1_insn[31:15]; 191 | wire [8:0] s1_insn_caddr = s1_insn[14:6]; 192 | wire [5:0] s1_insn_opcode = s1_insn[5:0]; 193 | 194 | always @(posedge clock) begin 195 | if (!s1_stall) begin 196 | s1_en <= cmd_valid && cmd_ready; 197 | s1_insn_direct <= cmd_insn; 198 | s1_insn_codemem <= code_mem[cmd_insn[14:6]]; 199 | s1_insn_sel <= cmd_insn[5:0] == 3; 200 | end 201 | 202 | if (reset) begin 203 | s1_en <= 0; 204 | end 205 | end 206 | 207 | 208 | /**** stage 2 ****/ 209 | 210 | reg s2_tick_simd; 211 | 212 | always @(posedge clock) begin 213 | s2_en <= 0; 214 | s2_insn <= s1_insn; 215 | s2_tick_simd <= 0; 216 | 217 | mem_rd0_en <= 0; 218 | mem_rd0_addr <= 'bx; 219 | 220 | if (!reset && s1_en && !s1_stall) begin 221 | s2_en <= 1; 222 | 223 | case (s1_insn[5:0]) 224 | /* LoadCode, LoadCoeff0, LoadCoeff1 */ 225 | 4, 5, 6: begin 226 | mem_rd0_en <= 1; 227 | mem_rd0_addr <= s1_insn[31:15] >> 1; 228 | end 229 | 230 | /* SetVBP, AddVBP */ 231 | 8, 9: begin 232 | VBP <= s1_insn[31:15] + (s1_insn[0] ? VBP : 0); 233 | end 234 | 235 | /* MACC, MMAX, MACCZ, MMAXZ, MMAXN */ 236 | 40, 41, 42, 43, 45: begin 237 | mem_rd0_en <= 1; 238 | mem_rd0_addr <= (s1_insn[31:15] + VBP) >> 1; 239 | s2_tick_simd <= 1; 240 | end 241 | endcase 242 | end 243 | end 244 | 245 | assign tick_simd = s2_tick_simd; 246 | assign tick_nosimd = s2_en && !tick_simd; 247 | 248 | 249 | /**** stage 3 ****/ 250 | 251 | always @(posedge clock) begin 252 | s3_en <= 0; 253 | s3_insn <= s2_insn; 254 | 255 | if (!reset && s2_en) begin 256 | s3_en <= 1; 257 | end 258 | end 259 | 260 | 261 | /**** stage 3A ****/ 262 | 263 | always @(posedge clock) begin 264 | s3a_en <= 0; 265 | s3a_insn <= s3_insn; 266 | 267 | if (!reset && s3_en) begin 268 | s3a_en <= 1; 269 | end 270 | end 271 | 272 | 273 | /**** stage 4 ****/ 274 | 275 | always @(posedge clock) begin 276 | s4_en <= 0; 277 | s4_insn <= s3a_insn; 278 | s4_coeff <= coeff_mem[s3a_insn[14:6] + CBP]; 279 | 280 | if (!reset && s3a_en) begin 281 | s4_en <= 1; 282 | 283 | /* SetCBP, AddCBP */ 284 | if (s3a_insn[5:0] == 14 || s3a_insn[5:0] == 15) begin 285 | CBP <= s3a_insn[14:6] + (s3a_insn[0] ? CBP : 0); 286 | end 287 | end 288 | end 289 | 290 | 291 | /**** stage 5 ****/ 292 | 293 | always @(posedge clock) begin 294 | s5_en <= 0; 295 | s5_insn <= s4_insn; 296 | 297 | s5_max[0*9 +: 9] <= s4_coeff[0*8 +: 8] ? $signed(mem_rdata[0*8 +: 8]) : 9'h100; 298 | s5_max[1*9 +: 9] <= s4_coeff[1*8 +: 8] ? $signed(mem_rdata[1*8 +: 8]) : 9'h100; 299 | s5_max[2*9 +: 9] <= s4_coeff[2*8 +: 8] ? $signed(mem_rdata[2*8 +: 8]) : 9'h100; 300 | s5_max[3*9 +: 9] <= s4_coeff[3*8 +: 8] ? $signed(mem_rdata[3*8 +: 8]) : 9'h100; 301 | s5_max[4*9 +: 9] <= s4_coeff[4*8 +: 8] ? $signed(mem_rdata[4*8 +: 8]) : 9'h100; 302 | s5_max[5*9 +: 9] <= s4_coeff[5*8 +: 8] ? $signed(mem_rdata[5*8 +: 8]) : 9'h100; 303 | s5_max[6*9 +: 9] <= s4_coeff[6*8 +: 8] ? $signed(mem_rdata[6*8 +: 8]) : 9'h100; 304 | s5_max[7*9 +: 9] <= s4_coeff[7*8 +: 8] ? $signed(mem_rdata[7*8 +: 8]) : 9'h100; 305 | 306 | mem_rd1_en <= 0; 307 | mem_rd1_addr <= 'bx; 308 | 309 | if (!reset && s4_en) begin 310 | s5_en <= 1; 311 | 312 | case (s4_insn[5:0]) 313 | /* LoadCode */ 314 | 4: begin 315 | code_mem[s4_insn[14:6]] <= mem_rdata[31:0]; 316 | end 317 | 318 | /* LoadCoeff0 */ 319 | 5: begin 320 | coeff_mem[s4_insn[14:6]][63:0] <= mem_rdata; 321 | end 322 | 323 | /* LoadCoeff1 */ 324 | 6: begin 325 | coeff_mem[s4_insn[14:6]][127:64] <= mem_rdata; 326 | end 327 | 328 | /* SetLBP, AddLBP */ 329 | 10, 11: begin 330 | LBP <= s4_insn[31:15] + (s4_insn[0] ? LBP : 0); 331 | end 332 | 333 | /* LdSet, LdSet0, LdSet1, LdAdd, LdAdd0, LdAdd1 */ 334 | 28, 29, 30, 32, 33, 34: begin 335 | mem_rd1_en <= 1; 336 | mem_rd1_addr <= (s4_insn[31:15] + LBP) >> 1; 337 | end 338 | endcase 339 | end 340 | end 341 | 342 | 343 | /**** stage 6 ****/ 344 | 345 | always @(posedge clock) begin 346 | s6_en <= 0; 347 | s6_insn <= s5_insn; 348 | 349 | s6_max[0*9 +: 9] <= $signed(s5_max[0*9 +: 9]) > $signed(s5_max[1*9 +: 9]) ? s5_max[0*9 +: 9] : s5_max[1*9 +: 9]; 350 | s6_max[1*9 +: 9] <= $signed(s5_max[2*9 +: 9]) > $signed(s5_max[3*9 +: 9]) ? s5_max[2*9 +: 9] : s5_max[3*9 +: 9]; 351 | s6_max[2*9 +: 9] <= $signed(s5_max[4*9 +: 9]) > $signed(s5_max[5*9 +: 9]) ? s5_max[4*9 +: 9] : s5_max[5*9 +: 9]; 352 | s6_max[3*9 +: 9] <= $signed(s5_max[6*9 +: 9]) > $signed(s5_max[7*9 +: 9]) ? s5_max[6*9 +: 9] : s5_max[7*9 +: 9]; 353 | 354 | if (!reset && s5_en) begin 355 | s6_en <= 1; 356 | end 357 | end 358 | 359 | 360 | /**** stage 7 ****/ 361 | 362 | wire [NB*64-1:0] mulA = {mem_rdata, mem_rdata}; 363 | 364 | marlann_compute_mul2 mul [NB*4-1:0] ( 365 | .clock (clock ), 366 | .A (mulA ), 367 | .B (s4_coeff), 368 | .X (s7_prod ) 369 | ); 370 | 371 | always @(posedge clock) begin 372 | s7_en <= 0; 373 | s7_insn <= s6_insn; 374 | 375 | s7_max[0*9 +: 9] <= $signed(s6_max[0*9 +: 9]) > $signed(s6_max[1*9 +: 9]) ? s6_max[0*9 +: 9] : s6_max[1*9 +: 9]; 376 | s7_max[1*9 +: 9] <= $signed(s6_max[2*9 +: 9]) > $signed(s6_max[3*9 +: 9]) ? s6_max[2*9 +: 9] : s6_max[3*9 +: 9]; 377 | 378 | if (!reset && s6_en) begin 379 | s7_en <= 1; 380 | end 381 | end 382 | 383 | 384 | /**** stage 8 ****/ 385 | 386 | reg [31:0] acc0zn; 387 | 388 | always @* begin 389 | acc0zn = s7_insn[1] ? 0 : acc0; 390 | acc0zn = s7_insn[2] ? 32'h 8000_0000 : acc0zn; 391 | end 392 | 393 | always @(posedge clock) begin 394 | s8_en <= 0; 395 | s8_insn <= s7_insn; 396 | 397 | s8_sum0 <= $signed(s7_prod[ 0 +: 16]) + $signed(s7_prod[ 16 +: 16]) + $signed(s7_prod[ 32 +: 16]) + $signed(s7_prod[ 48 +: 16]) + 398 | $signed(s7_prod[ 64 +: 16]) + $signed(s7_prod[ 80 +: 16]) + $signed(s7_prod[ 96 +: 16]) + $signed(s7_prod[112 +: 16]); 399 | 400 | s8_sum1 <= $signed(s7_prod[128 +: 16]) + $signed(s7_prod[144 +: 16]) + $signed(s7_prod[160 +: 16]) + $signed(s7_prod[176 +: 16]) + 401 | $signed(s7_prod[192 +: 16]) + $signed(s7_prod[208 +: 16]) + $signed(s7_prod[224 +: 16]) + $signed(s7_prod[240 +: 16]); 402 | 403 | s8_max <= $signed(s7_max[0*9 +: 9]) > $signed(s7_max[1*9 +: 9]) ? s7_max[0*9 +: 9] : s7_max[1*9 +: 9]; 404 | s8_maxen <= ($signed(s7_max[0*9 +: 9]) > $signed(acc0zn)) || ($signed(s7_max[1*9 +: 9]) > $signed(acc0zn)); 405 | 406 | if (!reset && s7_en) begin 407 | s8_en <= 1; 408 | end 409 | end 410 | 411 | 412 | /**** stage 9 ****/ 413 | 414 | reg [31:0] new_acc0_add; 415 | reg [31:0] new_acc1_add; 416 | 417 | reg [31:0] new_acc0_max; 418 | 419 | reg [31:0] new_acc0; 420 | reg [31:0] new_acc1; 421 | 422 | wire [31:0] acc0_shifted = $signed(acc0) >>> s8_insn[14:6]; 423 | wire [31:0] acc1_shifted = $signed(acc1) >>> s8_insn[14:6]; 424 | 425 | reg [7:0] acc0_saturated; 426 | reg [7:0] acc1_saturated; 427 | 428 | reg new_acc0_max_cmp; 429 | reg new_acc0_max_cmp_q; 430 | 431 | always @* begin 432 | new_acc0_add = s8_insn[1] ? 0 : acc0; 433 | new_acc1_add = s8_insn[1] || s8_insn[2] ? 0 : acc1; 434 | 435 | new_acc0_max = s8_insn[2] ? 32'h 8000_0000 : new_acc0_add; 436 | 437 | new_acc0_add = $signed(new_acc0_add) + $signed(s8_sum0); 438 | new_acc1_add = $signed(new_acc1_add) + $signed(s8_sum1); 439 | 440 | if (s8_max != 9'h 100) 441 | new_acc0_max = s8_maxen ? s8_max : new_acc0_max; 442 | 443 | new_acc0 = s8_insn[0] ? new_acc0_max : new_acc0_add; 444 | new_acc1 = new_acc1_add; 445 | end 446 | 447 | always @(posedge clock) begin 448 | s9_en <= 0; 449 | s9_insn <= s8_insn; 450 | 451 | if (!reset && s8_en) begin 452 | s9_en <= 1; 453 | 454 | /* MACC, MMAX, MMACZ, MMAXZ, MMAXN */ 455 | if (s8_insn[5:3] == 3'b 101) begin 456 | acc0 <= new_acc0; 457 | acc1 <= new_acc1; 458 | end 459 | 460 | /* LdSet, LdSet0 */ 461 | if (s8_insn[5:0] == 28 || s8_insn[5:0] == 29) begin 462 | acc0 <= mem_rdata[31:0]; 463 | end 464 | 465 | /* LdSet, LdSet1 */ 466 | if (s8_insn[5:0] == 28 || s8_insn[5:0] == 30) begin 467 | acc1 <= mem_rdata[63:32]; 468 | end 469 | 470 | /* LdAdd, LdAdd0 */ 471 | if (s8_insn[5:0] == 32 || s8_insn[5:0] == 33) begin 472 | acc0 <= acc0 + mem_rdata[31:0]; 473 | end 474 | 475 | /* LdAdd, LdAdd1 */ 476 | if (s8_insn[5:0] == 32 || s8_insn[5:0] == 34) begin 477 | acc1 <= acc1 + mem_rdata[63:32]; 478 | end 479 | end 480 | 481 | if (&acc0_shifted[31:7] == |acc0_shifted[31:7]) 482 | acc0_saturated <= acc0_shifted[7:0]; 483 | else 484 | acc0_saturated <= acc0_shifted[31] ? -128 : 127; 485 | 486 | if (&acc1_shifted[31:7] == |acc1_shifted[31:7]) 487 | acc1_saturated <= acc1_shifted[7:0]; 488 | else 489 | acc1_saturated <= acc1_shifted[31] ? -128 : 127; 490 | end 491 | 492 | 493 | /**** write back ****/ 494 | 495 | reg [ 7:0] pre_mem_wr_en; 496 | reg [16:0] pre_mem_wr_addr; 497 | reg [63:0] pre_mem_wr_wdata; 498 | 499 | always @* begin 500 | if (pre_mem_wr_addr[0]) begin 501 | mem_wr_en = pre_mem_wr_en << 1; 502 | mem_wr_addr = pre_mem_wr_addr >> 1; 503 | mem_wr_wdata = pre_mem_wr_wdata << 8; 504 | end else begin 505 | mem_wr_en = pre_mem_wr_en; 506 | mem_wr_addr = pre_mem_wr_addr >> 1; 507 | mem_wr_wdata = pre_mem_wr_wdata; 508 | end 509 | end 510 | 511 | wire [5:0] s9_insn_opcode = s9_insn[5:0]; 512 | 513 | always @(posedge clock) begin 514 | pre_mem_wr_en <= 0; 515 | pre_mem_wr_addr <= s9_insn[31:15] + SBP; 516 | pre_mem_wr_wdata <= { 517 | {8{!s9_insn[2] || !acc1_saturated[7]}} & acc1_saturated, 518 | {8{!s9_insn[2] || !acc0_saturated[7]}} & acc0_saturated 519 | }; 520 | 521 | if (s9_en) begin 522 | /* Store, Store0, Store1, ReLU, ReLU0, ReLU1 */ 523 | if (s9_insn[5:3] == 3'b 010) begin 524 | pre_mem_wr_en <= {!s9_insn[0], !s9_insn[1]}; 525 | end 526 | 527 | /* Save, Save0, Save1 */ 528 | if (s9_insn[5:2] == 4'b 0110) begin 529 | pre_mem_wr_en <= {{4{!s9_insn[0]}}, {4{!s9_insn[1]}}}; 530 | pre_mem_wr_wdata <= {acc1, acc0}; 531 | end 532 | 533 | /* SetSBP, AddSBP */ 534 | if (s9_insn[5:0] == 12 || s9_insn[5:0] == 13) begin 535 | SBP <= s9_insn[31:15] + (s9_insn[0] ? SBP : 0); 536 | end 537 | end 538 | 539 | if (reset || !s9_en) begin 540 | pre_mem_wr_en <= 0; 541 | end 542 | end 543 | endmodule 544 | 545 | module marlann_compute_mul2 ( 546 | input clock, 547 | input [15:0] A, B, 548 | output [31:0] X 549 | ); 550 | reg [15:0] r1A, r2A, r3A; 551 | reg [15:0] r1B, r2B, r3B; 552 | 553 | always @(posedge clock) begin 554 | r1A <= $signed(A[7:0]) * $signed(B[7:0]); 555 | r1B <= $signed(A[15:8]) * $signed(B[15:8]); 556 | r2A <= r1A; 557 | r2B <= r1B; 558 | r3A <= r2A; 559 | r3B <= r2B; 560 | end 561 | 562 | assign X = {r3B, r3A}; 563 | endmodule 564 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/mux/.gitignore: -------------------------------------------------------------------------------- 1 | mux_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/mux/README.md: -------------------------------------------------------------------------------- 1 | # Large multiplexers 2 | 3 | The python script generates a selection of multiplexer descriptions, using variable 4 | length index (e.g. `assign a = b [c]`) as well as `case` and `if`-`else` (balanced 5 | and unbalanced) styles, across a variety of power-of-2 and non-power-of-2 values 6 | for a number of inputs, as well as input width. 7 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/mux/common.py: -------------------------------------------------------------------------------- 1 | ../../benchmarks_small/mux/common.py -------------------------------------------------------------------------------- /verilog/benchmarks_large/mux/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from common import * 4 | 5 | if __name__ == "__main__": 6 | for N in [63,64,65] + [127,128,129] + [255,256,257]: 7 | for W in [8,16,32]: 8 | gen_mux_index(N,W) 9 | gen_mux_case(N,W) 10 | gen_mux_if_bal(N,W) 11 | gen_mux_if_unbal(N,W) 12 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/opensparc/README.md: -------------------------------------------------------------------------------- 1 | # OpenSPARC Benchmarks 2 | 3 | ## T2 Core 4 | 5 | Downloaded from http://download.oracle.com/technetwork/systems/opensparc/OpenSPARCT2.1.3.tar.bz2 6 | 7 | Based on FPGA configuration, with main memory size reduced and `mem_harness` rewritten to map better 8 | to block RAM 9 | 10 | Copyright: 11 | ``` 12 | Copyright (C) 1995-2007 Sun Microsystems, Inc. All Rights Reserved 13 | 4150 Network Circle, Santa Clara, California 95054, U.S.A. 14 | 15 | This program is free software; you can redistribute it and/or modify 16 | it under the terms of the GNU General Public License as published by 17 | the Free Software Foundation; version 2 of the License. 18 | 19 | This program is distributed in the hope that it will be useful, 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | GNU General Public License for more details. 23 | 24 | You should have received a copy of the GNU General Public License 25 | along with this program; if not, write to the Free Software 26 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 27 | 28 | For the avoidance of doubt, and except that if any non-GPL license 29 | choice is available it will apply instead, Sun elects to use only 30 | the General Public License version 2 (GPLv2) at this time for any 31 | software where a choice of GPL license versions is made 32 | available with the language indicating that GPLv2 or any later version 33 | may be used, or where a choice of which version of the GPL is applied is 34 | otherwise unspecified. 35 | ``` -------------------------------------------------------------------------------- /verilog/benchmarks_large/opensparc/t2.v.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/opensparc/t2.v.gz -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/.gitignore: -------------------------------------------------------------------------------- 1 | picorv32 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/README.md: -------------------------------------------------------------------------------- 1 | # PICORV32 - a size-optimized RISC-V core 2 | 3 | Source: https://github.com/cliffordwolf/picorv32/tree/v1.0 4 | 5 | There are three cores: small, regular and large. 6 | 7 | - **PicoRV32 (small):** The `picorv32` module without counter instructions, 8 | without two-stage shifts, with externally latched `mem_rdata`, and without 9 | catching of misaligned memory accesses and illegal instructions. 10 | 11 | - **PicoRV32 (regular):** The `picorv32` module in its default configuration. 12 | 13 | - **PicoRV32 (large):** The `picorv32` module with enabled PCPI, IRQ, MUL, 14 | DIV, BARREL_SHIFTER, and COMPRESSED_ISA features. 15 | 16 | - **PicoSoC:** The `picosoc` module with `picorv32`, flash, UART, and SRAM 17 | IP on a simple System-on-Chip. 18 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os, subprocess 4 | if not os.path.isdir('picorv32'): 5 | subprocess.run(['git', 'clone', 'https://github.com/cliffordwolf/picorv32']) 6 | subprocess.run(['git', 'reset', '--hard', 'v1.0'], cwd='picorv32') 7 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/picorv32.vh: -------------------------------------------------------------------------------- 1 | picorv32/picorv32.v -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/picorv32_large.v: -------------------------------------------------------------------------------- 1 | module picorv32_large ( 2 | input clk, resetn, 3 | 4 | output mem_valid, 5 | output mem_instr, 6 | input mem_ready, 7 | 8 | output [31:0] mem_addr, 9 | output [31:0] mem_wdata, 10 | output [ 3:0] mem_wstrb, 11 | input [31:0] mem_rdata 12 | ); 13 | top_large picorv32( 14 | .clk (clk ), 15 | .resetn (resetn ), 16 | .mem_valid(mem_valid), 17 | .mem_instr(mem_instr), 18 | .mem_ready(mem_ready), 19 | .mem_addr (mem_addr ), 20 | .mem_wdata(mem_wdata), 21 | .mem_wstrb(mem_wstrb), 22 | .mem_rdata(mem_rdata) 23 | ); 24 | endmodule 25 | 26 | `include "synth_area_top.vh" 27 | `include "picorv32.vh" 28 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/picorv32_regular.v: -------------------------------------------------------------------------------- 1 | module picorv32_regular ( 2 | input clk, resetn, 3 | 4 | output mem_valid, 5 | output mem_instr, 6 | input mem_ready, 7 | 8 | output [31:0] mem_addr, 9 | output [31:0] mem_wdata, 10 | output [ 3:0] mem_wstrb, 11 | input [31:0] mem_rdata 12 | ); 13 | top_regular picorv32( 14 | .clk (clk ), 15 | .resetn (resetn ), 16 | .mem_valid(mem_valid), 17 | .mem_instr(mem_instr), 18 | .mem_ready(mem_ready), 19 | .mem_addr (mem_addr ), 20 | .mem_wdata(mem_wdata), 21 | .mem_wstrb(mem_wstrb), 22 | .mem_rdata(mem_rdata) 23 | ); 24 | endmodule 25 | 26 | `include "synth_area_top.vh" 27 | `include "picorv32.vh" 28 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/picorv32_small.v: -------------------------------------------------------------------------------- 1 | module picorv32_small ( 2 | input clk, resetn, 3 | 4 | output mem_valid, 5 | output mem_instr, 6 | input mem_ready, 7 | 8 | output [31:0] mem_addr, 9 | output [31:0] mem_wdata, 10 | output [ 3:0] mem_wstrb, 11 | input [31:0] mem_rdata 12 | ); 13 | top_small picorv32( 14 | .clk (clk ), 15 | .resetn (resetn ), 16 | .mem_valid(mem_valid), 17 | .mem_instr(mem_instr), 18 | .mem_ready(mem_ready), 19 | .mem_addr (mem_addr ), 20 | .mem_wdata(mem_wdata), 21 | .mem_wstrb(mem_wstrb), 22 | .mem_rdata(mem_rdata) 23 | ); 24 | endmodule 25 | 26 | `include "synth_area_top.vh" 27 | `include "picorv32.vh" 28 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/picosoc.vh: -------------------------------------------------------------------------------- 1 | picorv32/picosoc/picosoc.v -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/picosoc_top.v: -------------------------------------------------------------------------------- 1 | module picosoc_top ( 2 | input clk, 3 | input resetn, 4 | 5 | output iomem_valid, 6 | input iomem_ready, 7 | output [ 3:0] iomem_wstrb, 8 | output [31:0] iomem_addr, 9 | output [31:0] iomem_wdata, 10 | input [31:0] iomem_rdata, 11 | 12 | input irq_5, 13 | input irq_6, 14 | input irq_7, 15 | 16 | output ser_tx, 17 | input ser_rx, 18 | 19 | output flash_csb, 20 | output flash_clk, 21 | 22 | output flash_io0_oe, 23 | output flash_io1_oe, 24 | output flash_io2_oe, 25 | output flash_io3_oe, 26 | 27 | output flash_io0_do, 28 | output flash_io1_do, 29 | output flash_io2_do, 30 | output flash_io3_do, 31 | 32 | input flash_io0_di, 33 | input flash_io1_di, 34 | input flash_io2_di, 35 | input flash_io3_di 36 | ); 37 | 38 | picosoc top ( 39 | .clk(clk), 40 | .resetn(resetn), 41 | 42 | .iomem_valid(iomem_valid), 43 | .iomem_ready(iomem_ready), 44 | .iomem_wstrb(iomem_wstrb), 45 | .iomem_addr(iomem_addr), 46 | .iomem_wdata(iomem_wdata), 47 | .iomem_rdata(iomem_rdata), 48 | 49 | .irq_5(irq_5), 50 | .irq_6(irq_6), 51 | .irq_7(irq_7), 52 | 53 | .ser_tx(ser_tx), 54 | .ser_rx(ser_rx), 55 | 56 | .flash_csb(flash_csb), 57 | .flash_clk(flash_clk), 58 | 59 | .flash_io0_oe(flash_io0_oe), 60 | .flash_io1_oe(flash_io1_oe), 61 | .flash_io2_oe(flash_io2_oe), 62 | .flash_io3_oe(flash_io3_oe), 63 | 64 | .flash_io0_do(flash_io0_do), 65 | .flash_io1_do(flash_io1_do), 66 | .flash_io2_do(flash_io2_do), 67 | .flash_io3_do(flash_io3_do), 68 | 69 | .flash_io0_di(flash_io0_di), 70 | .flash_io1_di(flash_io1_di), 71 | .flash_io2_di(flash_io2_di), 72 | .flash_io3_di(flash_io3_di) 73 | ); 74 | 75 | endmodule 76 | 77 | `include "picosoc.vh" 78 | `include "simpleuart.vh" 79 | `include "spimemio.vh" 80 | `include "picorv32.vh" 81 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/simpleuart.vh: -------------------------------------------------------------------------------- 1 | picorv32/picosoc/simpleuart.v -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/spimemio.vh: -------------------------------------------------------------------------------- 1 | picorv32/picosoc/spimemio.v -------------------------------------------------------------------------------- /verilog/benchmarks_large/picosoc/synth_area_top.vh: -------------------------------------------------------------------------------- 1 | picorv32/scripts/vivado/synth_area_top.v -------------------------------------------------------------------------------- /verilog/benchmarks_large/riscv-bitmanip/README.md: -------------------------------------------------------------------------------- 1 | # RISC-V Bitmanip (Bit Manipulation) Extension 2 | 3 | Source: https://github.com/riscv/riscv-bitmanip 4 | 5 | The bitmanip instructions extend the RISC-V instruction set to enable 6 | efficent bit manipulation. 7 | 8 | Currently, just two reference designs: 9 | 10 | - **shifter64** 11 | 12 | - **smartbextdep** 13 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/riscv-bitmanip/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import urllib.request 4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/riscv/riscv-bitmanip/dadfdcbae935815db429a1db0bfed6973548bbf5/verilog/shifter64.v', 'shifter64.v') 5 | urllib.request.urlretrieve('https://raw.githubusercontent.com/riscv/riscv-bitmanip/dadfdcbae935815db429a1db0bfed6973548bbf5/verilog/smartbextdep.v', 'smartbextdep.v') 6 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/README.md: -------------------------------------------------------------------------------- 1 | # A second-order sigma-delta DAC with single-bit quantizer 2 | 3 | ![./architecture.png](./architecture.png) 4 | 5 | ## Functional verification 6 | This benchmark has a functional verification testbench. It generates a sine wave of approximately 1kHz (amplitude = 10000/32767 or approximately -10.3 dB) and feeds this into the SDDAC. The output waveform is analysed by a Python 3 script and shows the output spectrum of the SDDAC (FFT with a Blackman window). 7 | 8 | To run the function verification testbench, execute `./run_sddac_tb.sh`. Prerequisites are Python3, Numpy and Matplotlib. 9 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YosysHQ/yosys-bench/52ff6fa991f2ab509618d8aaad02f307aac78848/verilog/benchmarks_large/sddac/architecture.png -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": 3 | [ 4 | "sddac.v" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/genspectrumplot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Generate spectrum of sigma-delta output bit sequence 4 | # Author: Niels A. Moseley - Moseley Instruments / Symbiotic EDA 5 | # 6 | # requires matplotlib and numpy 7 | # 8 | 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | 12 | sdout = np.genfromtxt('sddac_out.txt', dtype=np.float) 13 | sdout = sdout*2 - 1 14 | 15 | # skip first 1000 samples to avoid FFTing startup transients 16 | sdout = sdout[1001:] 17 | 18 | N = sdout.size 19 | print(N) 20 | sdout_fft = np.abs(np.fft.fft(np.multiply(sdout, np.blackman(N)))) 21 | 22 | plt.figure(1) 23 | plt.title("SDDAC output spectrum") 24 | plt.xlabel("Frequency") 25 | plt.ylabel("Signal Amplitude (dB)") 26 | plt.grid() 27 | freqaxis = np.linspace(0,N//2-1,N//2)/N 28 | plt.semilogx(freqaxis, 20.0*np.log10(sdout_fft[:N // 2]/(N/4))) 29 | 30 | plt.show() 31 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/run_sddac_tb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | iverilog -m va_math -o sddac_tb.vvp sddac_tb.v sddac.v 4 | #iverilog -m va_math -o sddac_tb.vvp sddac_tb.v sddac.v_netlist.v ../../../celllibs/supergate/supergate.v 5 | 6 | vvp sddac_tb.vvp 7 | python3 genspectrumplot.py 8 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/sddac.v: -------------------------------------------------------------------------------- 1 | // Second order sigma-delta dac 2 | // 3 | // For benchmarking purposes only -- don't use this for an actual design. 4 | // There are far more performant architectures. 5 | // 6 | // Author: Niels A. Moseley, n.a.moseley@moseleyinstruments.com 7 | // 8 | 9 | `ifdef DEBUG_SDDAC 10 | `include "constants.vams" 11 | `endif 12 | 13 | module sddac(clk, rst_n, sig_in, sd_out); 14 | 15 | // inputs 16 | input clk; // clock 17 | input rst_n; // synchronous reset, active low 18 | input signed [15:0] sig_in; // 16 bits in Q(1,15) format 19 | 20 | // outputs 21 | output reg sd_out = 0; 22 | 23 | // internal signals 24 | reg signed [17:0] state1 = 0; // Q(1,17) 25 | reg signed [19:0] state2 = 0; // Q(1,19) 26 | reg signed [16:0] state1_in; // Q(0,17) 27 | reg signed [18:0] state2_in; // Q(0,19) 28 | reg signed [20:0] quant_in; // Q(2,19) 29 | reg signed [16:0] qq; 30 | reg [7:0] lfsr_reg = 0; 31 | reg quantizer; 32 | wire lfsr_fb; 33 | 34 | // linear feedback shift register feedback 35 | assign lfsr_fb = (lfsr_reg[4] ^ lfsr_reg[2]); 36 | 37 | // combination process 38 | always @(*) 39 | begin 40 | `ifdef DEBUG_SDDAC 41 | qq = $signed(quantizer ? -17'h8000 : 17'h8000); 42 | `endif 43 | quant_in = state2 + $signed(lfsr_fb ? -21'h4000 : 21'h4000); 44 | quantizer = quant_in[20]; 45 | state1_in = sig_in - $signed(quantizer ? -17'h8000 : 17'h8000); // Q(-1,17) - Q(0,17) -> Q(0,17) 46 | state2_in = state1 - $signed(quantizer ? -19'h10000 : 19'h10000); // Q(-1,19) - Q(0,19) -> Q(0,19) 47 | end 48 | 49 | // clocked process 50 | always @(posedge clk) 51 | begin 52 | if (rst_n == 1'b0) 53 | begin 54 | state1 <= 0; 55 | state2 <= 0; 56 | lfsr_reg <= 8'hff; 57 | end 58 | else begin 59 | `ifdef DEBUG_SDDAC 60 | $display("feedback : %f", qq*$pow(2.0,-15)); 61 | $display("state1_in: %f", state1_in*$pow(2.0,-17)); 62 | $display("state2_in: %f", state2_in*$pow(2.0,-19)); 63 | $display(""); 64 | `endif 65 | state1 <= state1 + $signed({ state1_in[16], state1_in}); 66 | state2 <= state2 + $signed({ state2_in[18], state2_in}); 67 | sd_out <= !quantizer; 68 | lfsr_reg <= {lfsr_reg[6:0], lfsr_fb}; 69 | end 70 | end 71 | 72 | endmodule 73 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/sddac/sddac_tb.v: -------------------------------------------------------------------------------- 1 | // Testbench for sddac.v 2 | // Author: Niels A. Moseley 3 | 4 | `include "constants.vams" 5 | 6 | module tb; 7 | 8 | reg clk = 0; 9 | reg rst_n = 0; 10 | reg signed [15:0] sig = 0; 11 | wire dac_out; 12 | 13 | real phase = 0.0; 14 | integer fhandle; 15 | 16 | // clock generation 17 | always #1 clk=~clk; 18 | 19 | // devices under test 20 | sddac dut(clk, rst_n, sig, dac_out); 21 | 22 | initial 23 | begin 24 | $dumpfile("sddac_tb.vcd"); 25 | $dumpvars; 26 | 27 | fhandle = $fopen("sddac_out.txt","w"); 28 | 29 | #4 rst_n = 1'b1; 30 | 31 | #526288 $finish; // 2^18 + 1000 startup samples 32 | end 33 | 34 | always @(posedge clk) 35 | begin 36 | if (rst_n == 1'b1) 37 | begin 38 | $fwrite(fhandle, "%d\n", dac_out); 39 | sig <= $sin(`M_TWO_PI*phase)*10000.0; 40 | phase <= phase + 0.001; 41 | end 42 | end 43 | 44 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_large/vexriscv/README.md: -------------------------------------------------------------------------------- 1 | # VEXRISCV - a FPGA friend 32 bit RISC-V CPU implementation 2 | 3 | Source: https://github.com/SpinalHDL/VexRiscv/tree/64e8919 4 | 5 | There is one core currently. 6 | 7 | - **VexRiscV (Full):** RTL generated using `sbt "runMain vexriscv.demo.GenFull"` 8 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/wb2axip/README.md: -------------------------------------------------------------------------------- 1 | # WB2AXIP: A Pipelined Wishbone B4 to AXI4 bridge 2 | 3 | Source: https://github.com/ZipCPU/wb2axip 4 | 5 | Currently, just one design: 6 | 7 | - **AXILXBAR** is a fully functional, formally verified, 8 | N master to M slave AXI-lite crossbar interconnect. 9 | As such, it permits min(N,M) active channel connections 10 | between masters and slaves all at once. 11 | -------------------------------------------------------------------------------- /verilog/benchmarks_large/wb2axip/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import urllib.request 4 | urllib.request.urlretrieve('https://raw.githubusercontent.com/ZipCPU/wb2axip/c6d7fb0390dd0a355963b2882cde1e98f1208087/rtl/axilxbar.v', 'axilxbar.v') 5 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/addertree/.gitignore: -------------------------------------------------------------------------------- 1 | addertree_*_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/addertree/README.md: -------------------------------------------------------------------------------- 1 | # addertree - adding multiple inputs 2 | 3 | The python script generates modules which add up to eight inputs and using various input widths. 4 | No overflow checking is done. 5 | 6 | The goal of this benchmark is to see which structure the synthesis tool generates. 7 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/addertree/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate a+b+c+d .. with inputs all the same width 4 | 5 | import math 6 | 7 | ## extend the port given by the string 'word' by 'bits' zero bits 8 | def extend(word, bits): 9 | return "{'d0, }".replace("",str(bits)).replace("", word); 10 | 11 | def gen_adder(nInputs, nWidth): 12 | bits = int(math.ceil(math.log2(nInputs))) 13 | with open("addertree_%d_%d.v" % (nInputs, nWidth), "w") as f: 14 | body = extend("din[" + str(nWidth-1) + ":0]",bits) 15 | for I in range(2, nInputs+1): 16 | body = body + " + " + extend("din[" + str(nWidth*I-1) + ":" + str(nWidth*(I-1)) + "]", bits) 17 | body = body + ";" 18 | print(""" 19 | module addertree__ (input [-1:0] din, output [-1:0] dout); 20 | assign dout = 21 | endmodule 22 | """.replace("", body).replace("", str(nInputs)).replace("", str(nWidth)).replace("", str(nWidth*nInputs)), file=f) 23 | 24 | for nInputs in [3, 4, 5, 6, 7, 8]: 25 | for nWidth in [4, 5, 6, 7, 8]: 26 | gen_adder(nInputs, nWidth) 27 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/arith_ops/.gitignore: -------------------------------------------------------------------------------- 1 | add_*_*.v 2 | sub_*_*.v 3 | mul_*_*.v 4 | adds_*_*.v 5 | subs_*_*.v 6 | muls_*_*.v 7 | shl_*_*.v 8 | shr_*_*.v 9 | sshr_*_*.v 10 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/arith_ops/README.md: -------------------------------------------------------------------------------- 1 | # arith_ops - various basic arithmetic operations 2 | 3 | The python script generates a plethora of modules consisting of simple arithmetic operations, 4 | such as addition, subtraction, multiplication, left-shift, logical right-shift, arithmetic right-shift. 5 | 6 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/arith_ops/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Add, Sub, Mul 4 | 5 | ops = [ 6 | ("add", "+", lambda i, j: max(i, j)+1), 7 | ("sub", "-", lambda i, j: max(i, j)+1), 8 | ("mul", "*", lambda i, j: i + j), 9 | ] 10 | 11 | for opname, opstr, opsize in ops: 12 | for i in range(1, 16): 13 | for j in range(i, 16): 14 | for signed in [False, True]: 15 | name = "%s%s_%d_%d" % (opname, "s" if signed else "", i, j) 16 | signed_str = " signed" if signed else "" 17 | with open("%s.v" % name, "w") as f: 18 | print("module %s (input%s [%d:0] A, input%s [%d:0] B, output [%d:0] Y);" % 19 | (name, signed_str, i-1, signed_str, j-1, opsize(i, j)-1), file=f) 20 | print(" assign Y = A %s B;" % opstr, file=f) 21 | print("endmodule", file=f) 22 | 23 | ## Shift Ops 24 | 25 | ops = [ 26 | ("shl", "<<", ""), 27 | ("shr", ">>", ""), 28 | ("sshr", ">>>", " signed"), 29 | ] 30 | 31 | for opname, opstr, signed_str in ops: 32 | for i in range(1, 32): 33 | for j in range(1, 6): 34 | name = "%s_%d_%d" % (opname, i, j) 35 | with open("%s.v" % name, "w") as f: 36 | print("module %s (input%s [%d:0] A, input [%d:0] B, output [%d:0] Y);" % 37 | (name, signed_str, i-1, j-1, i-1), file=f) 38 | print(" assign Y = A %s B;" % opstr, file=f) 39 | print("endmodule", file=f) 40 | 41 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/cic/.gitginore: -------------------------------------------------------------------------------- 1 | *.vcd 2 | *.vvp 3 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/cic/README.md: -------------------------------------------------------------------------------- 1 | # CIC5 - Cascaded Integrator-Comb DSP structure 2 | 3 | This module decimates the incoming data stream by a factor of 5, 4 | using a cascaded integrators and comb filters. It is a well-known 5 | efficient DSP structure primarily found in high-speed A/D conversion 6 | applications, such as Software Defined Radios (SDR). 7 | 8 | The data widths are: 16-bit signed input, 28 bit signed output. 9 | 10 | Reference: https://en.wikipedia.org/wiki/Cascaded_integrator%E2%80%93comb_filter 11 | 12 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/cic/cic5.v: -------------------------------------------------------------------------------- 1 | // 5th order CIC filter with decimation factor of 5 2 | // Author: Niels A. Moseley 3 | // Symbiotic EDA / Moseley Instruments 4 | // 12-11-2018 5 | 6 | module cic5( 7 | input clk, 8 | input rst_n, 9 | input signed [15:0] d_in, 10 | output reg signed [27:0] d_out, 11 | output reg d_out_valid 12 | ); 13 | 14 | reg signed [27:0] int_s [1:5]; // integrator states 15 | reg signed [27:0] comb_s [1:5]; // comb filter states 16 | reg signed [27:0] tmp [1:5]; // temporary var 17 | reg [2:0] decimation_count; 18 | 19 | integer i; 20 | 21 | always @(posedge clk) 22 | begin 23 | if (rst_n == 1'b0) 24 | begin 25 | for (i=1; i<=5; i=i+1) begin 26 | int_s[i] <= 16'd0; 27 | comb_s[i] <= 28'd0; 28 | end 29 | decimation_count <= 0; 30 | d_out_valid <= 0; 31 | d_out <= 0; 32 | end 33 | else 34 | begin 35 | // default updates 36 | d_out_valid <= 1'b0; 37 | decimation_count <= decimation_count + 1; 38 | 39 | // update the integrator filter states 40 | int_s[1] <= int_s[1] + d_in; 41 | for (i=2; i<=5; i=i+1) begin 42 | int_s[i] <= int_s[i] + int_s[i-1]; 43 | end 44 | 45 | // check if we can output new data 46 | // at the decimated rate 47 | 48 | if (decimation_count == 3'd4) 49 | begin 50 | // update the comb filter states 51 | tmp[1] = int_s[5] - comb_s[1]; 52 | comb_s[1] <= int_s[5]; 53 | for (i=2; i<=5; i=i+1) begin 54 | tmp[i] = tmp[i-1] - comb_s[i]; 55 | comb_s[i] <= tmp[i-1]; 56 | end 57 | 58 | decimation_count <= 0; 59 | d_out_valid <= 1'b1; 60 | d_out <= tmp[5]; 61 | end; 62 | end; 63 | end 64 | 65 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/cic/cic5_tb.v: -------------------------------------------------------------------------------- 1 | // Testbench for 5th order CIC filter with decimation factor of 5 2 | // Author: Niels A. Moseley 3 | // Symbiotic EDA / Moseley Instruments 4 | // 5 | // 12-11-2018 6 | 7 | 8 | module tb; 9 | 10 | reg clk = 0; 11 | reg rst_n = 0; 12 | reg signed [15:0] d_in = 0; 13 | wire signed [27:0] d_out = 0; 14 | wire d_out_valid; 15 | 16 | // clock generation 17 | always #1 clk=~clk; 18 | 19 | // devices under test 20 | cic5 dut(clk, rst_n, d_in, d_out, d_out_valid); 21 | 22 | initial 23 | begin 24 | $dumpfile("cic5_tb.vcd"); 25 | $dumpvars; 26 | d_in <= 16'h7fff; 27 | #4 rst_n = 1'b1; 28 | #60 d_in <= -16'h7fff; 29 | #60 $finish; 30 | end 31 | 32 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/cic/run_testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | iverilog -o cic5_tb.vvp cic5_tb.v cic5.v 4 | vvp cic5_tb.vvp 5 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/decoder/.gitignore: -------------------------------------------------------------------------------- 1 | decode_*.v 2 | set_*.v 3 | clr_*.v 4 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/decoder/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Simple decoders 4 | 5 | def gen_decode(n, t): 6 | if t == 0: body = "assign dout = din[sel];" 7 | if t == 1: body = "assign dout = din >> sel;" 8 | if t == 2: body = "wire [-1:0] p = din << sel; assign dout = p[-1];" 9 | with open("decode_%d_%d.v" % (n, t), "w") as f: 10 | s = 1 11 | while 2**s < n: 12 | s += 1 13 | print(""" 14 | module decode__ (input [-1:0] sel, input [-1:0] din, output dout); 15 | 16 | endmodule 17 | """.replace("", body).replace("", str(n)).replace("", str(t)).replace("", str(s)), file=f) 18 | 19 | for n in [1, 2, 3, 4, 5, 6, 7, 8, 10, 15, 24, 32, 55, 64]: 20 | for t in range(3): 21 | gen_decode(n, t) 22 | 23 | 24 | ## Set/Clear bit 25 | 26 | def gen_setclr(n, t, v): 27 | if t == 0: body = "dout[sel] = ;" 28 | if t == 1 and v == 0: body = "dout = dout & ~(1 << sel);" 29 | if t == 1 and v == 1: body = "dout = dout | (1 << sel);" 30 | with open("%s_%d_%d.v" % ("set" if v else "clr", n, t), "w") as f: 31 | s = 1 32 | while 2**s < n: 33 | s += 1 34 | print(""" 35 | module __ (input [-1:0] sel, input [-1:0] din, output reg [-1:0] dout); 36 | always @* begin dout = din; end 37 | endmodule 38 | """.replace("", body).replace("", "set" if v else "clr").replace("", str(n)) \ 39 | .replace("", str(t)).replace("", str(s)).replace("", str(v)), file=f) 40 | 41 | for n in [1, 2, 3, 4, 5, 6, 7, 8, 10, 15, 24, 32, 55, 64]: 42 | for t in range(2): 43 | for v in range(2): 44 | gen_setclr(n, t, v) 45 | 46 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/dspmac/.gitignore: -------------------------------------------------------------------------------- 1 | dspmac_*.v 2 | *.vvp -------------------------------------------------------------------------------- /verilog/benchmarks_small/dspmac/README.md: -------------------------------------------------------------------------------- 1 | # DSPMAC - A multiply-accumulate DSP structure 2 | 3 | This module takes two fixed-point operands (A and B) and can perform the following operations: 4 | 5 | * CLEAR - the accumulator is loaded with 0. 6 | * MUL - the accumulator is set to A*B. 7 | * MAC - the result of A*B is added to the accumulator. 8 | * NOP - the accumulator is left untouched. 9 | 10 | Several verions of the module are generated by the Python script, 11 | differing in the number of input and output bits. 12 | 13 | This module forms the computational heart of FIR/IIR filter engines 14 | and generic DSP processors. 15 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/dspmac/dspmac.template: -------------------------------------------------------------------------------- 1 | // DSP multiply-and-accumulate block without saturation 2 | // Author: Niels A. Moseley 3 | 4 | module dspmac__( 5 | input clk, 6 | input rst_n, 7 | input [1:0] opcode, 8 | input signed [-1:0] a_in, 9 | input signed [-1:0] b_in, 10 | output signed [-1:0] accu_out 11 | ); 12 | 13 | reg signed [-1:0] accu; 14 | 15 | parameter [1:0] OP_CLR = 2'b00, 16 | OP_MUL = 2'b01, 17 | OP_MAC = 2'b10, 18 | OP_NOP = 2'b11; 19 | 20 | always @(posedge clk or negedge rst_n) 21 | begin 22 | if (rst_n == 1'b0) 23 | accu <= 'd0; // set accumulator to zero 24 | else 25 | begin 26 | case(opcode) 27 | OP_CLR: 28 | accu <= 0; 29 | OP_MUL: 30 | accu <= a_in*b_in; 31 | OP_MAC: 32 | accu <= accu+a_in*b_in; 33 | OP_NOP: 34 | accu <= accu; 35 | default: 36 | accu <= accu; 37 | endcase 38 | end 39 | end 40 | 41 | assign accu_out = accu; 42 | 43 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/dspmac/dspmac_16_40_tb.v: -------------------------------------------------------------------------------- 1 | // Testbench for sddac.v 2 | // Author: Niels A. Moseley 3 | 4 | `include "constants.vams" 5 | 6 | module tb; 7 | 8 | reg clk = 0; 9 | reg rst_n = 0; 10 | reg [1:0] opcode = 2'b11; // nop 11 | reg signed [15:0] a_bus = 0; 12 | reg signed [15:0] b_bus = 0; 13 | wire signed [39:0] result; 14 | 15 | // clock generation 16 | always #1 clk=~clk; 17 | 18 | // devices under test 19 | dspmac_16_40 dut(clk, rst_n, opcode, a_bus, b_bus, result); 20 | 21 | initial 22 | begin 23 | $dumpfile("dspmac_16_40_tb.vcd"); 24 | $dumpvars; 25 | 26 | opcode = 2'b00; //CLR 27 | 28 | #4 rst_n = 1'b1; 29 | a_bus = 16'd32767; 30 | b_bus = 16'd32767; 31 | opcode = 2'b01; // MUL 32 | #2 opcode = 2'b10; // MAC 33 | #2 opcode = 2'b11; // NOP 34 | #2 opcode = 2'b11; // NOP 35 | #2 $finish; 36 | 37 | end 38 | 39 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/dspmac/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate population count of a specified width 4 | 5 | import math 6 | 7 | def gen_dspmac(opBits, accuBits, template): 8 | with open("dspmac_%d_%d.v" % (opBits, accuBits), "w") as f: 9 | print(template.replace("", str(opBits)).replace("", str(accuBits)), file=f) 10 | 11 | with open('dspmac.template','rt') as templatefile: 12 | template = templatefile.read() 13 | 14 | for opBits in [8,12,16,20,24]: 15 | gen_dspmac(opBits, opBits*2+8, ''.join(template)) 16 | 17 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/dspmac/run_testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | iverilog -m va_math -o dspmac_16_40_tb.vvp dspmac_16_40_tb.v dspmac_16_40.v 4 | vvp dspmac_16_40_tb.vvp -------------------------------------------------------------------------------- /verilog/benchmarks_small/lfsr/.gitignore: -------------------------------------------------------------------------------- 1 | lfsr_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/lfsr/README.md: -------------------------------------------------------------------------------- 1 | # LFSR - Linear Feedback Shift Register 2 | 3 | Linear feedback shift registers can be thought of as pseudo-random number generators. 4 | Possibly uses in digital design include efficient counters, noise/stimulus generators, etc. 5 | 6 | The python script generates maximal-length XNOR-based LFSRs from 3 to 168 bits with no more than 7 | 5 taps, based on coefficients https://www.xilinx.com/support/documentation/application_notes/xapp210.pdf 8 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/lfsr/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # From: https://www.xilinx.com/support/documentation/application_notes/xapp210.pdf 4 | taps = [ 5 | (3,2), 6 | (4,3), 7 | (5,3), 8 | (6,5), 9 | (7,6), 10 | (8,6,5,4), 11 | (9,5), 12 | (10,7), 13 | (11,9), 14 | (12,6,4,1), 15 | (13,4,3,1), 16 | (14,5,3,1), 17 | (15,14), 18 | (16,15,13,4), 19 | (17,14), 20 | (18,11), 21 | (19,6,2,1), 22 | (20,17), 23 | (21,19), 24 | (22,21), 25 | (23,18), 26 | (24,23,22,17), 27 | (25,22), 28 | (26,6,2,1), 29 | (27,5,2,1), 30 | (28,25), 31 | (29,27), 32 | (30,6,4,1), 33 | (31,28), 34 | (32,22,2,1), 35 | (33,20), 36 | (34,27,2,1), 37 | (35,33), 38 | (36,25), 39 | (37,5,4,3,2,1), 40 | (38,6,5,1), 41 | (39,35), 42 | (40,38,21,19), 43 | (40,38,21,19), 44 | (41,38), 45 | (42,41,20,19), 46 | (43,42,38,37), 47 | (44,43,18,17), 48 | (45,44,42,41), 49 | (46,45,26,25), 50 | (47,42), 51 | (48,47,21,20), 52 | (49,40), 53 | (50,49,24,23), 54 | (51,50,36,35), 55 | (52,49), 56 | (53,52,38,37), 57 | (54,53,18,17), 58 | (55,31), 59 | (56,55,35,34), 60 | (57,50), 61 | (58,39), 62 | (59,58,38,37), 63 | (60,59), 64 | (61,60,46,45), 65 | (62,61,6,5), 66 | (63,62), 67 | (64,63,61,60), 68 | (65,47), 69 | (66,65,57,56), 70 | (67,66,58,57), 71 | (68,59), 72 | (69,67,42,40), 73 | (70,69,55,54), 74 | (71,65), 75 | (72,66,25,19), 76 | (73,48), 77 | (74,73,59,58), 78 | (75,74,65,64), 79 | (76,75,41,40), 80 | (77,76,47,46), 81 | (78,77,59,58), 82 | (79,70), 83 | (80,79,43,42), 84 | (81,77), 85 | (82,79,47,44), 86 | (83,82,38,37), 87 | (84,71), 88 | (85,84,58,57), 89 | (86,85,74,73), 90 | (87,74), 91 | (88,87,17,16), 92 | (89,51), 93 | (90,89,72,71), 94 | (91,90,8,7), 95 | (92,91,80,79), 96 | (93,91), 97 | (94,73), 98 | (95,84), 99 | (96,94,49,47), 100 | (97,91), 101 | (98,87), 102 | (99,97,54,52), 103 | (100,63), 104 | (101,100,95,94), 105 | (102,101,36,35), 106 | (103,94), 107 | (104,103,94,93), 108 | (105,89), 109 | (106,91), 110 | (107,105,44,42), 111 | (108,77), 112 | (109,108,103,102), 113 | (110,109,98,97), 114 | (111,101), 115 | (112,110,69,67), 116 | (113,104), 117 | (114,113,33,32), 118 | (115,114,101,100), 119 | (116,115,46,45), 120 | (117,115,99,97), 121 | (118,85), 122 | (119,111), 123 | (120,113,9,2), 124 | (121,103), 125 | (122,121,63,62), 126 | (123,121), 127 | (124,87), 128 | (125,124,18,17), 129 | (126,125,90,89), 130 | (127,126), 131 | (128,126,101,99), 132 | (129,124), 133 | (130,127), 134 | (131,130,84,83), 135 | (132,103), 136 | (133,132,82,81), 137 | (134,77), 138 | (135,124), 139 | (136,135,11,10), 140 | (137,116), 141 | (138,137,131,130), 142 | (139,136,134,131), 143 | (140,111), 144 | (141,140,110,109), 145 | (142,121), 146 | (143,142,123,122), 147 | (144,143,75,74), 148 | (145,93), 149 | (146,145,87,86), 150 | (147,146,110,109), 151 | (148,121), 152 | (149,148,40,39), 153 | (150,97), 154 | (151,148), 155 | (152,151,87,86), 156 | (153,152), 157 | (154,152,27,25), 158 | (155,154,124,123), 159 | (156,155,41,40), 160 | (157,156,131,130), 161 | (158,157,132,131), 162 | (159,128), 163 | (160,159,142,141), 164 | (161,143), 165 | (162,161,75,74), 166 | (163,162,104,103), 167 | (164,163,151,150), 168 | (165,164,135,134), 169 | (166,165,128,127), 170 | (167,161), 171 | (168,166,153,151), 172 | ] 173 | 174 | def gen_lfsr(taps): 175 | length = taps[0] 176 | with open("lfsr_%d.v" % (length), "w") as f: 177 | print(""" 178 | (* top *) 179 | module lfsr_{0} (input clk, output dout); 180 | reg [{0}:1] state = {0}'b0; 181 | always @(posedge clk) 182 | state <= {{ state[{0}-1:1], {1} }}; 183 | assign dout = state[{0}]; 184 | endmodule 185 | """.format(length, ' ~^ '.join([ "state[%d]" % t for t in taps])), file=f) 186 | 187 | for t in taps: 188 | gen_lfsr(t) 189 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/macc/.gitignore: -------------------------------------------------------------------------------- 1 | macc_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/macc/common.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def rtl_macc(name, AW, BW, AREG, BREG, MREG, Asigned, Bsigned, CEA, CEB, CEM, CEP): 4 | return """(* top *) 5 | module {0} #(parameter AW={1}, BW={2}, AREG={3}, BREG={4}, MREG={5}) (input clk, CEA, CEB, CEM, CEP, input {6}[AW-1:0] A, input {7}[BW-1:0] B, output reg {8}[{9}-1:0] P); 6 | reg {6}[AW-1:0] Ar; 7 | reg {7}[BW-1:0] Br; 8 | reg {8}[AW+BW-1:0] Mr; 9 | generate 10 | if (AREG) begin 11 | always @(posedge clk) if ({10}) Ar <= A; 12 | end 13 | else 14 | always @* Ar <= A; 15 | if (BREG) begin 16 | always @(posedge clk) if ({11}) Br <= B; 17 | end 18 | else 19 | always @* Br <= B; 20 | if (MREG) begin 21 | always @(posedge clk) if ({12}) Mr <= Ar * Br; 22 | end 23 | else 24 | always @* Mr <= Ar * Br; 25 | always @(posedge clk) if ({13}) P <= P + Mr; 26 | endgenerate 27 | endmodule""".format(name, AW, BW, 28 | '1' if AREG else '0', '1' if BREG else '0', '1' if MREG else '0', 29 | 'signed ' if Asigned else '', 'signed ' if Bsigned else '', 'signed ' if Asigned and Bsigned else '', 30 | int(AW)+int(BW)+5, 31 | 'CEA' if CEA else '1', 'CEB' if CEB else '1', 'CEM' if CEM else '1', 'CEP' if CEP else '1') 32 | 33 | # https://stackoverflow.com/a/1482316 34 | from itertools import chain, combinations 35 | def powerset(iterable): 36 | "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)" 37 | s = list(iterable) 38 | return chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) 39 | 40 | def gen_macc(aRange, bRange, reg="AB"): 41 | for A in aRange: 42 | for B in bRange: 43 | for R in map(lambda i:''.join(i), powerset(reg.replace('P',''))): # Register existence 44 | for E in map(lambda i:''.join(i), powerset(R + 'P' if 'P' in reg else '')): # Enable 45 | with open("macc_%s_%s_%s_%s.v" % (A,B,R,E), "w") as f: 46 | print(rtl_macc('macc_%s_%s_%s_%s' % (A,B,R,E), # name 47 | A.rstrip('s'), B.rstrip('s'), # [AB]W 48 | 'A' in R, 'B' in R, 'M' in R, # [ABM]REG 49 | 's' in A, 's' in B, # [AB]_signed 50 | 'A' in E, 'B' in E, 'M' in E, 'P' in E, # CE[ABMP] 51 | ), file=f) 52 | if A != B: 53 | with open("macc_%s_%s_%s_%s.v" % (B,A,R,E), "w") as f: 54 | print(rtl_macc('macc_%s_%s_%s_%s' % (B,A,R,E), # name 55 | B.rstrip('s'), A.rstrip('s'), # [AB]W 56 | 'B' in R, 'A' in R, 'M' in R, # [ABM]REG 57 | 's' in B, 's' in A, # [AB]_signed 58 | 'B' in E, 'A' in E, 'M' in E, 'P' in E, # CE[ABMP] 59 | ), file=f) 60 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/macc/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from common import gen_macc 4 | 5 | ARange = ['16','16s','24','24s','32','32s'] 6 | BRange = ['2','2s','4','4s','8','8s','16','16s'] 7 | 8 | if __name__ == "__main__": 9 | gen_macc(ARange, BRange) 10 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mul/.gitignore: -------------------------------------------------------------------------------- 1 | mul_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mul/README.md: -------------------------------------------------------------------------------- 1 | # Small multipliers 2 | 3 | The python script generates a selection of multiplers of varying sizes. 4 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mul/common.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def rtl_mul(name, AW, BW, AREG, BREG, MREG, PREG, Asigned, Bsigned, CEA, CEB, CEM, CEP): 4 | return """(* top *) 5 | module {0} #(parameter AW={1}, BW={2}, AREG={3}, BREG={4}, MREG={5}, PREG={6}) (input clk, CEA, CEB, CEM, CEP, input {7}[AW-1:0] A, input {8}[BW-1:0] B, output reg {9}[AW+BW-1:0] P); 6 | reg {7}[AW-1:0] Ar; 7 | reg {8}[BW-1:0] Br; 8 | reg {9}[AW+BW-1:0] Mr; 9 | generate 10 | if (AREG) begin 11 | always @(posedge clk) if ({10}) Ar <= A; 12 | end 13 | else 14 | always @* Ar <= A; 15 | if (BREG) begin 16 | always @(posedge clk) if ({11}) Br <= B; 17 | end 18 | else 19 | always @* Br <= B; 20 | if (MREG) begin 21 | always @(posedge clk) if ({12}) Mr <= Ar * Br; 22 | end 23 | else 24 | always @* Mr <= Ar * Br; 25 | if (PREG) begin 26 | always @(posedge clk) if ({13}) P <= Mr; 27 | end 28 | else 29 | always @* P <= Mr; 30 | endgenerate 31 | endmodule""".format(name, AW, BW, 32 | '1' if AREG else '0', '1' if BREG else '0', '1' if MREG else '0', '1' if PREG else '0', 33 | 'signed ' if Asigned else '', 'signed ' if Bsigned else '', 'signed ' if Asigned and Bsigned else '', 34 | 'CEA' if CEA else '1', 'CEB' if CEB else '1', 'CEM' if CEM else '1', 'CEP' if CEP else '1') 35 | 36 | # https://stackoverflow.com/a/1482316 37 | from itertools import chain, combinations 38 | def powerset(iterable): 39 | "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)" 40 | s = list(iterable) 41 | return chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) 42 | 43 | def gen_mul(aRange, bRange, reg="ABP"): 44 | for A in aRange: 45 | for B in bRange: 46 | for R in map(lambda i:''.join(i), powerset(reg)): # Register existence 47 | for E in map(lambda i:''.join(i), powerset(R)): # Enable 48 | with open("mul_%s_%s_%s_%s.v" % (A,B,R,E), "w") as f: 49 | print(rtl_mul('mul_%s_%s_%s_%s' % (A,B,R,E), # name 50 | A.rstrip('s'), B.rstrip('s'), # [AB]W 51 | 'A' in R, 'B' in R, 'M' in R, 'P' in R, # [ABMP]REG 52 | 's' in A, 's' in B, # [AB]_signed 53 | 'A' in E, 'B' in E, 'M' in E, 'P' in E, # CE[ABMP] 54 | ), file=f) 55 | if A != B: 56 | with open("mul_%s_%s_%s_%s.v" % (B,A,R,E), "w") as f: 57 | print(rtl_mul('mul_%s_%s_%s_%s' % (B,A,R,E), # name 58 | B.rstrip('s'), A.rstrip('s'), # [AB]W 59 | 'B' in R, 'A' in R, 'M' in R, 'P' in R, # [ABMP]REG 60 | 's' in B, 's' in A, # [AB]_signed 61 | 'B' in E, 'A' in E, 'M' in E, 'P' in E, # CE[ABMP] 62 | ), file=f) 63 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mul/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from common import gen_mul 4 | 5 | ARange = ['16','16s','24','24s','32','32s'] 6 | BRange = ['2','2s','4','4s','8','8s','16','16s'] 7 | 8 | if __name__ == "__main__": 9 | gen_mul(ARange, BRange) 10 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/muladd/.gitignore: -------------------------------------------------------------------------------- 1 | muladd_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/muladd/common.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def rtl_muladd(name, AW, BW, CW, AREG, BREG, CREG, MREG, PREG, Asigned, Bsigned, Csigned, CEA, CEB, CEC, CEM, CEP): 4 | return """(* top *) 5 | module {name} #(parameter AW={AW}, BW={BW}, CW={CW}, AREG={AREG}, BREG={BREG}, CREG={CREG}, MREG={MREG}, PREG={PREG}) (input clk, CEA, CEB, CEC, CEM, CEP, input {Asigned}[AW-1:0] A, input {Bsigned}[BW-1:0] B, input {Csigned}[CW-1:0] C, output reg {Msigned}[CW-1:0] P); 6 | reg {Asigned}[AW-1:0] Ar; 7 | reg {Bsigned}[BW-1:0] Br; 8 | reg {Csigned}[CW-1:0] Cr; 9 | reg {Msigned}[CW-1:0] Mr; 10 | generate 11 | if (AREG) begin 12 | always @(posedge clk) if ({CEA}) Ar <= A; 13 | end 14 | else 15 | always @* Ar <= A; 16 | if (BREG) begin 17 | always @(posedge clk) if ({CEB}) Br <= B; 18 | end 19 | else 20 | always @* Br <= B; 21 | if (CREG) begin 22 | always @(posedge clk) if ({CEC}) Cr <= C; 23 | end 24 | else 25 | always @* Cr <= C; 26 | if (MREG) begin 27 | always @(posedge clk) if ({CEM}) Mr <= Ar * Br; 28 | end 29 | else 30 | always @* Mr <= Ar * Br; 31 | if (PREG) begin 32 | always @(posedge clk) if ({CEP}) P <= Cr + Mr; 33 | end 34 | else 35 | always @* P <= Cr + Mr; 36 | endgenerate 37 | endmodule""".format(name=name, AW=AW, BW=BW, CW=CW, 38 | AREG='1' if AREG else '0', BREG='1' if BREG else '0', CREG='1' if CREG else '0', MREG='1' if MREG else '0', PREG='1' if PREG else '0', 39 | Asigned='signed ' if Asigned else '', Bsigned='signed ' if Bsigned else '', Csigned='signed ' if Csigned else '', Msigned='signed ' if Asigned and Bsigned and Csigned else '', 40 | CEA='CEA' if CEA else '1', CEB='CEB' if CEB else '1', CEC='CEC' if CEC else '1', CEM='CEM' if CEM else '1', CEP='CEP' if CEP else '1') 41 | 42 | # https://stackoverflow.com/a/1482316 43 | from itertools import chain, combinations 44 | def powerset(iterable): 45 | "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)" 46 | s = list(iterable) 47 | return chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) 48 | 49 | def gen_muladd(aRange, bRange, cRange, reg="ABCP"): 50 | for A in aRange: 51 | for B in bRange: 52 | for C in cRange: 53 | for R in map(lambda i:''.join(i), powerset(reg)): # Register existence 54 | for E in map(lambda i:''.join(i), powerset(R)): # Enable 55 | with open("muladd_%s_%s_%s_%s_%s.v" % (A,B,C,R,E), "w") as f: 56 | print(rtl_muladd('muladd_%s_%s_%s_%s_%s' % (A,B,C,R,E), # name 57 | A.rstrip('s'), B.rstrip('s'), C.rstrip('s'), # [ABC]W 58 | 'A' in R, 'B' in R, 'C' in R, 'M' in R, 'P' in R, # [ABCMP]REG 59 | 's' in A, 's' in B, 's' in C, # [ABC]_signed 60 | 'A' in E, 'B' in E, 'C' in E, 'M' in E, 'P' in E, # CE[ABCMP] 61 | ), file=f) 62 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/muladd/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from common import gen_muladd 4 | 5 | ARange = ['16','32s'] 6 | BRange = ['8','16s'] 7 | CRange = ['32','40s'] 8 | 9 | if __name__ == "__main__": 10 | gen_muladd(ARange, BRange, CRange) 11 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mux/.gitignore: -------------------------------------------------------------------------------- 1 | mux_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mux/README.md: -------------------------------------------------------------------------------- 1 | # Small multiplexers 2 | 3 | The python script generates a selection of multiplexer descriptions, using variable 4 | length index (e.g. `assign a = b [c]`) as well as `case` and `if`-`else` (balanced 5 | and unbalanced) styles, across a variety of power-of-2 and non-power-of-2 values 6 | for a number of inputs, as well as input width. 7 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mux/common.py: -------------------------------------------------------------------------------- 1 | from math import log2, ceil 2 | 3 | def gen_mux_index(N,W): 4 | with open("mux_index_%d_%d.v" % (N,W), "w") as f: 5 | print(""" 6 | (* top *) 7 | module mux_index_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output [W-1:0] o); 8 | assign o = i[s*W+:W]; 9 | endmodule 10 | """.format(N,W), file=f) 11 | 12 | def gen_mux_case(N,W): 13 | with open("mux_case_%d_%d.v" % (N,W), "w") as f: 14 | print(""" 15 | (* top *) 16 | module mux_case_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output reg [W-1:0] o); 17 | always @* 18 | case (s)""".format(N,W), file=f) 19 | for i in range( N): 20 | print(" {0}: o <= i[{0}*W+:W];".format(i), file=f) 21 | print(""" default: o <= {W{1'bx}}; 22 | endcase 23 | endmodule 24 | """, file=f) 25 | 26 | def gen_mux_if_unbal(N,W): 27 | with open("mux_if_unbal_%d_%d.v" % (N,W), "w") as f: 28 | print(""" 29 | (* top *) 30 | module mux_if_unbal_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output reg [W-1:0] o); 31 | always @*""".format(N,W), file=f) 32 | print(" if (s == 0) o <= i[0*W+:W];", file=f) 33 | for i in range(1,N): 34 | print(" else if (s == {0}) o <= i[{0}*W+:W];".format(i), file=f) 35 | print(" else o <= {W{1'bx}};", file=f) 36 | print(""" 37 | endmodule 38 | """, file=f) 39 | 40 | def _gen_mux_if_bal_rec(f, N, depth): 41 | indent = ' ' * depth 42 | if len(N) == 1: 43 | print(" {0}o <= i[{1}*W+:W];".format(indent, N[0]), file=f) 44 | else: 45 | print(" {0}if (s[{1}] == 1'b0)".format(indent, depth), file=f) 46 | i = ceil(log2(len(N))) - 1 47 | _gen_mux_if_bal_rec(f, N[:2**i], depth+1) 48 | if N[2**i:] != [None]*len(N[2**i:]): 49 | print(" {0}else".format(indent), file=f) 50 | _gen_mux_if_bal_rec(f, N[2**i:], depth+1) 51 | 52 | def gen_mux_if_bal(N,W): 53 | with open("mux_if_bal_%d_%d.v" % (N,W), "w") as f: 54 | print(""" 55 | (* top *) 56 | module mux_if_bal_{0}_{1} #(parameter N={0}, parameter W={1}) (input [N*W-1:0] i, input [$clog2(N)-1:0] s, output reg [W-1:0] o); 57 | always @* begin""".format(N,W), file=f) 58 | pad = (2 ** int(ceil(log2(N)))) - N 59 | print(" o <= {{W{{1'bx}}}};", file=f) 60 | _gen_mux_if_bal_rec(f, list(range(N)) + [None]*pad, 0) 61 | print("""end 62 | endmodule 63 | """, file=f) 64 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/mux/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from common import * 4 | 5 | if __name__ == "__main__": 6 | for N in [2,3,4,5] + [7,8,9] + [15,16,17] + [31,32,33]: 7 | for W in [1,2,3,4,5,8]: 8 | gen_mux_index(N,W) 9 | gen_mux_case(N,W) 10 | gen_mux_if_bal(N,W) 11 | gen_mux_if_unbal(N,W) 12 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/onehot/.gitignore: -------------------------------------------------------------------------------- 1 | onehot2bin_*.v 2 | bin2onehot_*.v 3 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/onehot/README.md: -------------------------------------------------------------------------------- 1 | # Onehot - binary to one-hot and one-hot to binary encoder/decoders 2 | 3 | An N-bit one-hot decoder has 2^N output signals. 4 | Only one of the output signals can be '1' at any time. 5 | The index of the output that is set high, is equal to 6 | the (unsigned) binary value at the input of the decoder. 7 | 8 | The python script generates one-hot encoders and decoders of varying widths. 9 | 10 | Binary to one-hot decoders are frequently used in D/A converters and RAM/ROM 11 | row and column selection circuits. 12 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/onehot/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate one-hot to binary and binary to one-hot decoders 4 | 5 | import math 6 | 7 | def gen_bin2onehot(nBits): 8 | obits = int(math.pow(2,nBits)) # calculate output bits 9 | with open("bin2onehot_%d.v" % (nBits), "w") as f: 10 | print(""" 11 | module bin2onehot_ (input [-1:0] din, output reg [-1:0] dout); 12 | always @(din) begin 13 | dout = 0; 14 | dout[din] = 1'b1; 15 | end 16 | endmodule 17 | """.replace("", str(nBits)).replace("", str(obits)), file=f) 18 | 19 | 20 | def gen_onehot2bin(nBits): 21 | obits = int(math.ceil(math.log2(nBits))) # calculate output bits 22 | with open("onehot2bin_%d.v" % (nBits), "w") as f: 23 | print(""" 24 | module onehot2bin_ (input [-1:0] din, output reg [-1:0] dout); 25 | always @(din) begin 26 | dout = 0; 27 | case(din) 28 | """.replace("", str(nBits)).replace("", str(obits)), file=f) 29 | ## emit switch case data.. 30 | for I in range(0, nBits): 31 | print(""" 'd : dout = 'd;""".replace("", str(int(math.pow(2,I)))).replace("", str(I)).replace("", str(obits)).replace("", str(nBits)), file=f) 32 | print(""" 33 | default: ; 34 | endcase 35 | end 36 | endmodule""", file=f) 37 | 38 | 39 | for nBits in [1,2,3,4,5,6,7,8]: 40 | gen_bin2onehot(nBits) 41 | 42 | for nBits in [1,2,3,4,5,6,7,8,16,32,64]: 43 | gen_onehot2bin(nBits) 44 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/popcount/.gitignore: -------------------------------------------------------------------------------- 1 | popcount_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/popcount/README.md: -------------------------------------------------------------------------------- 1 | # popcount 2 | 3 | This module counts the number of '1' bits at it's input. 4 | It is known as the 'population count'. 5 | 6 | The python script generates various popcount modules varying in input width. 7 | 8 | Population counting is used in error correction coding, parity checking, machine learning and encryption applications. 9 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/popcount/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate population count of a specified width 4 | 5 | import math 6 | 7 | def gen_popcount(nBits): 8 | obits = int(math.ceil(math.log2(nBits))) # calculate output bits 9 | with open("popcount_%d.v" % (nBits), "w") as f: 10 | body = "din[0]" 11 | for I in range(1, nBits): 12 | body = body + " + din[" + str(I) + "]" 13 | body = body + ";" 14 | print(""" 15 | module popcount_ (input [-1:0] din, output [-1:0] dout); 16 | assign dout = 17 | endmodule 18 | """.replace("", body).replace("", str(nBits)).replace("", str(obits)), file=f) 19 | 20 | for nBits in [2,3,4,5,6,7,8,16,32,64]: 21 | gen_popcount(nBits) 22 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/priodecode/.gitignore: -------------------------------------------------------------------------------- 1 | priodecoder_*.v 2 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/priodecode/README.md: -------------------------------------------------------------------------------- 1 | # priodecode - Priority decoders 2 | 3 | The priority decoder takes a bit-vector of request signals and 4 | lets the request with the most weight through. I.e. only one of the 5 | output signals is high, or none of the output signals are high. 6 | 7 | The Python script generates priority decoders of various widths. 8 | 9 | Priority decoders are using in interrupt processing, where the 10 | interrupt with the highest priority should be serviced first. 11 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/priodecode/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Priority decoders 4 | 5 | import math 6 | 7 | def gen_priodecoder(nBits): 8 | with open("priodecoder_%d.v" % (nBits), "w") as f: 9 | print(""" 10 | module priodecoder_ (input [-1:0] din, output [-1:0] dout); 11 | assign dout = din & (~din-1); 12 | endmodule 13 | """.replace("", str(nBits)), file=f) 14 | 15 | for nBits in range(2,16+1): 16 | gen_priodecoder(nBits) 17 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/ram/.gitignore: -------------------------------------------------------------------------------- 1 | syncram_*.v 2 | dualport_syncram_*.v 3 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/ram/dualport_syncram.template: -------------------------------------------------------------------------------- 1 | // Dual-port Synchronous RAM template file 2 | // Author: Niels A. Moseley 3 | // 4 | // Modelled after the ICE40 SBRAM blocks but with only one clock domain 5 | // Also, it is assumed that the read enable is always asserted. 6 | // I don't know what happens to the read data when the address is 7 | // simultaneously written. 8 | // 9 | // 10 | 11 | module dualport_syncram__ ( 12 | clk, // common clock 13 | cs, // active-high chip select 14 | we, // active-high write enable 15 | waddr, // write address 16 | wdata, // write data input 17 | raddr, // read address 18 | rdata, // read data output 19 | ); 20 | 21 | input clk,cs,we; 22 | input [-1:0] waddr; 23 | input [-1:0] raddr; 24 | input [-1:0] wdata; 25 | output reg [-1:0] rdata; 26 | 27 | reg [-1:0] mem [0:-1]; 28 | 29 | always @(posedge clk) 30 | begin 31 | if (cs == 1'b1) begin 32 | if (we) begin 33 | mem[waddr] <= wdata; 34 | end 35 | rdata <= mem[raddr]; 36 | end // chip select 37 | end 38 | 39 | endmodule 40 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/ram/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate various synchronous RAM types with 2^ addresses and bit data path. 4 | 5 | import math 6 | 7 | # generic synchronous RAM 8 | def gen_syncram(addrWidth, ioWidth, template): 9 | with open("syncram_%d_%d.v" % (addrWidth, ioWidth), "w") as f: 10 | print(template.replace("", str(addrWidth)).replace("", str(ioWidth)), file=f) 11 | 12 | # generic synchronous RAM with transparent write-through 13 | def gen_syncram_tw(addrWidth, ioWidth, template): 14 | with open("syncram_tw_%d_%d.v" % (addrWidth, ioWidth), "w") as f: 15 | print(template.replace("", str(addrWidth)).replace("", str(ioWidth)), file=f) 16 | 17 | # generic single-write/single-read synchronous RAM 18 | def gen_dualportsyncram(addrWidth, ioWidth, template): 19 | with open("dualport_syncram_%d_%d.v" % (addrWidth, ioWidth), "w") as f: 20 | print(template.replace("", str(addrWidth)).replace("", str(ioWidth)), file=f) 21 | 22 | 23 | 24 | with open('syncram.template','rt') as templatefile: 25 | template = templatefile.read() 26 | 27 | for ioWidth in [4,8,12,16]: 28 | for addrBits in [4,8,9,10,11,12]: 29 | gen_syncram(addrBits, ioWidth, ''.join(template)) 30 | 31 | with open('syncram_tw.template','rt') as templatefile: 32 | template = templatefile.read() 33 | 34 | for ioWidth in [4,8,12,16]: 35 | for addrBits in [4,8,9,10,11,12]: 36 | gen_syncram_tw(addrBits, ioWidth, ''.join(template)) 37 | 38 | with open('dualport_syncram.template','rt') as templatefile: 39 | template = templatefile.read() 40 | 41 | for ioWidth in [4,7,8,12,16]: 42 | for addrBits in [4,8,9,10,11,12]: 43 | gen_dualportsyncram(addrBits, ioWidth, ''.join(template)) 44 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/ram/syncram.template: -------------------------------------------------------------------------------- 1 | // Synchronous RAM template file 2 | // Author: Niels A. Moseley 3 | // 4 | // Data out holds its previous value when data is written 5 | // 6 | 7 | module syncram__ ( 8 | clk, 9 | cs, // active-high chip select 10 | we, // active-high write enable 11 | addr_in, 12 | data_in, 13 | data_out 14 | ); 15 | 16 | input clk,cs,we; 17 | input [-1:0] addr_in; 18 | input [-1:0] data_in; 19 | output reg [-1:0] data_out; 20 | 21 | reg [-1:0] mem [0:-1]; 22 | 23 | always @(posedge clk) 24 | begin 25 | if (cs == 1'b1) begin 26 | if (we) begin 27 | mem[addr_in] <= data_in; 28 | end else begin 29 | data_out <= mem[addr_in]; 30 | end // write enable 31 | end // chip select 32 | end 33 | 34 | endmodule 35 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/ram/syncram_tw.template: -------------------------------------------------------------------------------- 1 | // Synchronous RAM template file 2 | // Author: Niels A. Moseley 3 | // 4 | // Featuring pass-through-on-write logic 5 | // 6 | 7 | module syncram_tw__ ( 8 | clk, 9 | cs, // active-high chip select 10 | we, // active-high write enable 11 | addr_in, 12 | data_in, 13 | data_out 14 | ); 15 | 16 | input clk,cs,we; 17 | input [-1:0] addr_in; 18 | input [-1:0] data_in; 19 | output reg [-1:0] data_out; 20 | 21 | reg [-1:0] mem [0:-1]; 22 | 23 | always @(posedge clk) 24 | begin 25 | if (cs == 1'b1) begin 26 | if (we) begin 27 | data_out <= data_in; // pass-through data at input to output during write operations 28 | mem[addr_in] <= data_in; 29 | end else begin 30 | data_out <= mem[addr_in]; 31 | end // write enable 32 | end // chip select 33 | end 34 | 35 | endmodule 36 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/.gitignore: -------------------------------------------------------------------------------- 1 | *.vcd 2 | *.vvp 3 | *_netlist.v 4 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/README.md: -------------------------------------------------------------------------------- 1 | # Various - various designs 2 | 3 | The following designs can be found here: 4 | 5 | ## latch8 6 | 7 | A simple 8-bit latch, like grandmother used to make them. 8 | 9 | ## crc32 10 | 11 | A 32-bit CRC based on https://msdn.microsoft.com/en-us/library/dd905031.aspx 12 | It has an 8-bit input and 32-bit output. 13 | The CRC is updated on every clock. 14 | 15 | ## pwm256 16 | 17 | An 8-bit counter and comparator can generate a pulse-width modulated single-bit output. 18 | This PWM module can be used to control the brightness of an LED, or be used (after analogue filtering) as a D/A converter featuring impressive intermodulation distortion. 19 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/crc32.v: -------------------------------------------------------------------------------- 1 | // CRC32 based on https://msdn.microsoft.com/en-us/library/dd905031.aspx 2 | // 3 | // 4 | // 5 | // 6 | // 7 | 8 | module crc32 (input [7:0] din, input clk, input rst_n, output [31:0] dout); 9 | 10 | reg [31:0] crctbl [255:0]; 11 | reg [31:0] crc; 12 | 13 | initial 14 | begin 15 | crctbl[0] <= 32'h00000000; 16 | crctbl[1] <= 32'h77073096; 17 | crctbl[2] <= 32'hEE0E612C; 18 | crctbl[3] <= 32'h990951BA; 19 | crctbl[4] <= 32'h076DC419; 20 | crctbl[5] <= 32'h706AF48F; 21 | crctbl[6] <= 32'hE963A535; 22 | crctbl[7] <= 32'h9E6495A3; 23 | crctbl[8] <= 32'h0EDB8832; 24 | crctbl[9] <= 32'h79DCB8A4; 25 | crctbl[10] <= 32'hE0D5E91E; 26 | crctbl[11] <= 32'h97D2D988; 27 | crctbl[12] <= 32'h09B64C2B; 28 | crctbl[13] <= 32'h7EB17CBD; 29 | crctbl[14] <= 32'hE7B82D07; 30 | crctbl[15] <= 32'h90BF1D91; 31 | crctbl[16] <= 32'h1DB71064; 32 | crctbl[17] <= 32'h6AB020F2; 33 | crctbl[18] <= 32'hF3B97148; 34 | crctbl[19] <= 32'h84BE41DE; 35 | crctbl[20] <= 32'h1ADAD47D; 36 | crctbl[21] <= 32'h6DDDE4EB; 37 | crctbl[22] <= 32'hF4D4B551; 38 | crctbl[23] <= 32'h83D385C7; 39 | crctbl[24] <= 32'h136C9856; 40 | crctbl[25] <= 32'h646BA8C0; 41 | crctbl[26] <= 32'hFD62F97A; 42 | crctbl[27] <= 32'h8A65C9EC; 43 | crctbl[28] <= 32'h14015C4F; 44 | crctbl[29] <= 32'h63066CD9; 45 | crctbl[30] <= 32'hFA0F3D63; 46 | crctbl[31] <= 32'h8D080DF5; 47 | crctbl[32] <= 32'h3B6E20C8; 48 | crctbl[33] <= 32'h4C69105E; 49 | crctbl[34] <= 32'hD56041E4; 50 | crctbl[35] <= 32'hA2677172; 51 | crctbl[36] <= 32'h3C03E4D1; 52 | crctbl[37] <= 32'h4B04D447; 53 | crctbl[38] <= 32'hD20D85FD; 54 | crctbl[39] <= 32'hA50AB56B; 55 | crctbl[40] <= 32'h35B5A8FA; 56 | crctbl[41] <= 32'h42B2986C; 57 | crctbl[42] <= 32'hDBBBC9D6; 58 | crctbl[43] <= 32'hACBCF940; 59 | crctbl[44] <= 32'h32D86CE3; 60 | crctbl[45] <= 32'h45DF5C75; 61 | crctbl[46] <= 32'hDCD60DCF; 62 | crctbl[47] <= 32'hABD13D59; 63 | crctbl[48] <= 32'h26D930AC; 64 | crctbl[49] <= 32'h51DE003A; 65 | crctbl[50] <= 32'hC8D75180; 66 | crctbl[51] <= 32'hBFD06116; 67 | crctbl[52] <= 32'h21B4F4B5; 68 | crctbl[53] <= 32'h56B3C423; 69 | crctbl[54] <= 32'hCFBA9599; 70 | crctbl[55] <= 32'hB8BDA50F; 71 | crctbl[56] <= 32'h2802B89E; 72 | crctbl[57] <= 32'h5F058808; 73 | crctbl[58] <= 32'hC60CD9B2; 74 | crctbl[59] <= 32'hB10BE924; 75 | crctbl[60] <= 32'h2F6F7C87; 76 | crctbl[61] <= 32'h58684C11; 77 | crctbl[62] <= 32'hC1611DAB; 78 | crctbl[63] <= 32'hB6662D3D; 79 | crctbl[64] <= 32'h76DC4190; 80 | crctbl[65] <= 32'h01DB7106; 81 | crctbl[66] <= 32'h98D220BC; 82 | crctbl[67] <= 32'hEFD5102A; 83 | crctbl[68] <= 32'h71B18589; 84 | crctbl[69] <= 32'h06B6B51F; 85 | crctbl[70] <= 32'h9FBFE4A5; 86 | crctbl[71] <= 32'hE8B8D433; 87 | crctbl[72] <= 32'h7807C9A2; 88 | crctbl[73] <= 32'h0F00F934; 89 | crctbl[74] <= 32'h9609A88E; 90 | crctbl[75] <= 32'hE10E9818; 91 | crctbl[76] <= 32'h7F6A0DBB; 92 | crctbl[77] <= 32'h086D3D2D; 93 | crctbl[78] <= 32'h91646C97; 94 | crctbl[79] <= 32'hE6635C01; 95 | crctbl[80] <= 32'h6B6B51F4; 96 | crctbl[81] <= 32'h1C6C6162; 97 | crctbl[82] <= 32'h856530D8; 98 | crctbl[83] <= 32'hF262004E; 99 | crctbl[84] <= 32'h6C0695ED; 100 | crctbl[85] <= 32'h1B01A57B; 101 | crctbl[86] <= 32'h8208F4C1; 102 | crctbl[87] <= 32'hF50FC457; 103 | crctbl[88] <= 32'h65B0D9C6; 104 | crctbl[89] <= 32'h12B7E950; 105 | crctbl[90] <= 32'h8BBEB8EA; 106 | crctbl[91] <= 32'hFCB9887C; 107 | crctbl[92] <= 32'h62DD1DDF; 108 | crctbl[93] <= 32'h15DA2D49; 109 | crctbl[94] <= 32'h8CD37CF3; 110 | crctbl[95] <= 32'hFBD44C65; 111 | crctbl[96] <= 32'h4DB26158; 112 | crctbl[97] <= 32'h3AB551CE; 113 | crctbl[98] <= 32'hA3BC0074; 114 | crctbl[99] <= 32'hD4BB30E2; 115 | crctbl[100] <= 32'h4ADFA541; 116 | crctbl[101] <= 32'h3DD895D7; 117 | crctbl[102] <= 32'hA4D1C46D; 118 | crctbl[103] <= 32'hD3D6F4FB; 119 | crctbl[104] <= 32'h4369E96A; 120 | crctbl[105] <= 32'h346ED9FC; 121 | crctbl[106] <= 32'hAD678846; 122 | crctbl[107] <= 32'hDA60B8D0; 123 | crctbl[108] <= 32'h44042D73; 124 | crctbl[109] <= 32'h33031DE5; 125 | crctbl[110] <= 32'hAA0A4C5F; 126 | crctbl[111] <= 32'hDD0D7CC9; 127 | crctbl[112] <= 32'h5005713C; 128 | crctbl[113] <= 32'h270241AA; 129 | crctbl[114] <= 32'hBE0B1010; 130 | crctbl[115] <= 32'hC90C2086; 131 | crctbl[116] <= 32'h5768B525; 132 | crctbl[117] <= 32'h206F85B3; 133 | crctbl[118] <= 32'hB966D409; 134 | crctbl[119] <= 32'hCE61E49F; 135 | crctbl[120] <= 32'h5EDEF90E; 136 | crctbl[121] <= 32'h29D9C998; 137 | crctbl[122] <= 32'hB0D09822; 138 | crctbl[123] <= 32'hC7D7A8B4; 139 | crctbl[124] <= 32'h59B33D17; 140 | crctbl[125] <= 32'h2EB40D81; 141 | crctbl[126] <= 32'hB7BD5C3B; 142 | crctbl[127] <= 32'hC0BA6CAD; 143 | crctbl[128] <= 32'hEDB88320; 144 | crctbl[129] <= 32'h9ABFB3B6; 145 | crctbl[130] <= 32'h03B6E20C; 146 | crctbl[131] <= 32'h74B1D29A; 147 | crctbl[132] <= 32'hEAD54739; 148 | crctbl[133] <= 32'h9DD277AF; 149 | crctbl[134] <= 32'h04DB2615; 150 | crctbl[135] <= 32'h73DC1683; 151 | crctbl[136] <= 32'hE3630B12; 152 | crctbl[137] <= 32'h94643B84; 153 | crctbl[138] <= 32'h0D6D6A3E; 154 | crctbl[139] <= 32'h7A6A5AA8; 155 | crctbl[140] <= 32'hE40ECF0B; 156 | crctbl[141] <= 32'h9309FF9D; 157 | crctbl[142] <= 32'h0A00AE27; 158 | crctbl[143] <= 32'h7D079EB1; 159 | crctbl[144] <= 32'hF00F9344; 160 | crctbl[145] <= 32'h8708A3D2; 161 | crctbl[146] <= 32'h1E01F268; 162 | crctbl[147] <= 32'h6906C2FE; 163 | crctbl[148] <= 32'hF762575D; 164 | crctbl[149] <= 32'h806567CB; 165 | crctbl[150] <= 32'h196C3671; 166 | crctbl[151] <= 32'h6E6B06E7; 167 | crctbl[152] <= 32'hFED41B76; 168 | crctbl[153] <= 32'h89D32BE0; 169 | crctbl[154] <= 32'h10DA7A5A; 170 | crctbl[155] <= 32'h67DD4ACC; 171 | crctbl[156] <= 32'hF9B9DF6F; 172 | crctbl[157] <= 32'h8EBEEFF9; 173 | crctbl[158] <= 32'h17B7BE43; 174 | crctbl[159] <= 32'h60B08ED5; 175 | crctbl[160] <= 32'hD6D6A3E8; 176 | crctbl[161] <= 32'hA1D1937E; 177 | crctbl[162] <= 32'h38D8C2C4; 178 | crctbl[163] <= 32'h4FDFF252; 179 | crctbl[164] <= 32'hD1BB67F1; 180 | crctbl[165] <= 32'hA6BC5767; 181 | crctbl[166] <= 32'h3FB506DD; 182 | crctbl[167] <= 32'h48B2364B; 183 | crctbl[168] <= 32'hD80D2BDA; 184 | crctbl[169] <= 32'hAF0A1B4C; 185 | crctbl[170] <= 32'h36034AF6; 186 | crctbl[171] <= 32'h41047A60; 187 | crctbl[172] <= 32'hDF60EFC3; 188 | crctbl[173] <= 32'hA867DF55; 189 | crctbl[174] <= 32'h316E8EEF; 190 | crctbl[175] <= 32'h4669BE79; 191 | crctbl[176] <= 32'hCB61B38C; 192 | crctbl[177] <= 32'hBC66831A; 193 | crctbl[178] <= 32'h256FD2A0; 194 | crctbl[179] <= 32'h5268E236; 195 | crctbl[180] <= 32'hCC0C7795; 196 | crctbl[181] <= 32'hBB0B4703; 197 | crctbl[182] <= 32'h220216B9; 198 | crctbl[183] <= 32'h5505262F; 199 | crctbl[184] <= 32'hC5BA3BBE; 200 | crctbl[185] <= 32'hB2BD0B28; 201 | crctbl[186] <= 32'h2BB45A92; 202 | crctbl[187] <= 32'h5CB36A04; 203 | crctbl[188] <= 32'hC2D7FFA7; 204 | crctbl[189] <= 32'hB5D0CF31; 205 | crctbl[190] <= 32'h2CD99E8B; 206 | crctbl[191] <= 32'h5BDEAE1D; 207 | crctbl[192] <= 32'h9B64C2B0; 208 | crctbl[193] <= 32'hEC63F226; 209 | crctbl[194] <= 32'h756AA39C; 210 | crctbl[195] <= 32'h026D930A; 211 | crctbl[196] <= 32'h9C0906A9; 212 | crctbl[197] <= 32'hEB0E363F; 213 | crctbl[198] <= 32'h72076785; 214 | crctbl[199] <= 32'h05005713; 215 | crctbl[200] <= 32'h95BF4A82; 216 | crctbl[201] <= 32'hE2B87A14; 217 | crctbl[202] <= 32'h7BB12BAE; 218 | crctbl[203] <= 32'h0CB61B38; 219 | crctbl[204] <= 32'h92D28E9B; 220 | crctbl[205] <= 32'hE5D5BE0D; 221 | crctbl[206] <= 32'h7CDCEFB7; 222 | crctbl[207] <= 32'h0BDBDF21; 223 | crctbl[208] <= 32'h86D3D2D4; 224 | crctbl[209] <= 32'hF1D4E242; 225 | crctbl[210] <= 32'h68DDB3F8; 226 | crctbl[211] <= 32'h1FDA836E; 227 | crctbl[212] <= 32'h81BE16CD; 228 | crctbl[213] <= 32'hF6B9265B; 229 | crctbl[214] <= 32'h6FB077E1; 230 | crctbl[215] <= 32'h18B74777; 231 | crctbl[216] <= 32'h88085AE6; 232 | crctbl[217] <= 32'hFF0F6A70; 233 | crctbl[218] <= 32'h66063BCA; 234 | crctbl[219] <= 32'h11010B5C; 235 | crctbl[220] <= 32'h8F659EFF; 236 | crctbl[221] <= 32'hF862AE69; 237 | crctbl[222] <= 32'h616BFFD3; 238 | crctbl[223] <= 32'h166CCF45; 239 | crctbl[224] <= 32'hA00AE278; 240 | crctbl[225] <= 32'hD70DD2EE; 241 | crctbl[226] <= 32'h4E048354; 242 | crctbl[227] <= 32'h3903B3C2; 243 | crctbl[228] <= 32'hA7672661; 244 | crctbl[229] <= 32'hD06016F7; 245 | crctbl[230] <= 32'h4969474D; 246 | crctbl[231] <= 32'h3E6E77DB; 247 | crctbl[232] <= 32'hAED16A4A; 248 | crctbl[233] <= 32'hD9D65ADC; 249 | crctbl[234] <= 32'h40DF0B66; 250 | crctbl[235] <= 32'h37D83BF0; 251 | crctbl[236] <= 32'hA9BCAE53; 252 | crctbl[237] <= 32'hDEBB9EC5; 253 | crctbl[238] <= 32'h47B2CF7F; 254 | crctbl[239] <= 32'h30B5FFE9; 255 | crctbl[240] <= 32'hBDBDF21C; 256 | crctbl[241] <= 32'hCABAC28A; 257 | crctbl[242] <= 32'h53B39330; 258 | crctbl[243] <= 32'h24B4A3A6; 259 | crctbl[244] <= 32'hBAD03605; 260 | crctbl[245] <= 32'hCDD70693; 261 | crctbl[246] <= 32'h54DE5729; 262 | crctbl[247] <= 32'h23D967BF; 263 | crctbl[248] <= 32'hB3667A2E; 264 | crctbl[249] <= 32'hC4614AB8; 265 | crctbl[250] <= 32'h5D681B02; 266 | crctbl[251] <= 32'h2A6F2B94; 267 | crctbl[252] <= 32'hB40BBE37; 268 | crctbl[253] <= 32'hC30C8EA1; 269 | crctbl[254] <= 32'h5A05DF1B; 270 | crctbl[255] <= 32'h2D02EF8D; 271 | end 272 | 273 | always @(posedge clk) 274 | begin 275 | if (rst_n == 1'b0) 276 | crc <= 32'hFFFFFFFF; 277 | else 278 | crc <= (crc >> 8) ^ crctbl[(crc[7:0] ^ din)]; 279 | end 280 | 281 | assign dout = crc; 282 | 283 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/latch.v: -------------------------------------------------------------------------------- 1 | // Generate a simple 8-bit latch 2 | // Author: Niels A. Moseley 3 | // Moseley Instruments / Symbiotic EDA 4 | // 02-11-2018 5 | // 6 | 7 | module latch(input [7:0] din, input gate, output reg [7:0] dout); 8 | 9 | reg [7:0] state; 10 | 11 | always @(gate or din) 12 | begin 13 | if (gate == 1'b1) 14 | begin 15 | dout <= din; 16 | end 17 | end 18 | 19 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/pwm256.v: -------------------------------------------------------------------------------- 1 | // 256-level PWM generator 2 | // Author: Niels A. Moseley 3 | // Symbiotic EDA / Moseley Instruments 4 | // 10-11-2018 5 | 6 | module pwm256( 7 | input clk, 8 | input rst_n, 9 | input [7:0] d_in, 10 | output reg pwm_out 11 | ); 12 | 13 | reg signed [7:0] counter; 14 | 15 | always @(posedge clk or negedge rst_n) 16 | begin 17 | if (rst_n == 1'b0) 18 | begin 19 | counter <= 8'd0; 20 | pwm_out <= 1'b0; 21 | end 22 | else 23 | begin 24 | counter <= counter + 8'd1; 25 | if (counter >= d_in) 26 | pwm_out <= 1'b1; 27 | else 28 | pwm_out <= 1'b0; 29 | end 30 | end 31 | 32 | endmodule 33 | -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/pwm256_tb.v: -------------------------------------------------------------------------------- 1 | // Testbench for 256-level PWM generator 2 | // Author: Niels A. Moseley 3 | // Symbiotic EDA / Moseley Instruments 4 | // 10-11-2018 5 | 6 | module tb; 7 | 8 | reg clk = 0; 9 | reg rst_n = 0; 10 | reg [7:0] d_in = 8'd128; 11 | wire pwm; 12 | 13 | // clock generation 14 | always #1 clk=~clk; 15 | 16 | // devices under test 17 | pwm256 dut(clk, rst_n, d_in, pwm); 18 | 19 | initial 20 | begin 21 | $dumpfile("pwm256_tb.vcd"); 22 | $dumpvars; 23 | 24 | #4 rst_n = 1'b1; 25 | #516 d_in = 8'd10; 26 | #1028 d_in = 8'd246; 27 | #1540 $finish; 28 | 29 | end 30 | 31 | endmodule -------------------------------------------------------------------------------- /verilog/benchmarks_small/various/run_testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | iverilog -o pwm256_tb.vvp pwm256_tb.v pwm256.v 4 | vvp pwm256_tb.vvp 5 | -------------------------------------------------------------------------------- /vhdl/benchmarks_large/cordic/.gitignore: -------------------------------------------------------------------------------- 1 | cordic_*_*.vhdl 2 | *.ghw 3 | -------------------------------------------------------------------------------- /vhdl/benchmarks_large/cordic/cordic.template: -------------------------------------------------------------------------------- 1 | -- pipelined CORDIC algorithm to calculate sin/cos pair from a given angle (0..1) 2 | -- Author: Niels A. Moseley 3 | -- 4 | 5 | library ieee; 6 | use ieee.std_logic_1164.all; 7 | use ieee.numeric_std.all; 8 | 9 | -- Define the cordic stage 10 | entity cordic_stage_ is 11 | generic (shiftN : integer); 12 | port 13 | ( 14 | clk : in std_logic; 15 | rst_n : in std_logic; 16 | x_in : in signed(-1 downto 0); 17 | y_in : in signed(-1 downto 0); 18 | angle_in : in signed(-1 downto 0); 19 | angle_adj : in signed(-1 downto 0); 20 | x_out : out signed(-1 downto 0); 21 | y_out : out signed(-1 downto 0); 22 | angle_out : out signed(-1 downto 0) 23 | ); 24 | end cordic_stage_; 25 | 26 | architecture rtl of cordic_stage_ is 27 | signal new_x : signed(-1 downto 0); 28 | signal new_y : signed(-1 downto 0); 29 | signal new_angle : signed(-1 downto 0); 30 | begin 31 | 32 | -- combination process 33 | proc_comb: process(x_in, y_in, angle_in, angle_adj) 34 | alias sign : std_logic is angle_in(-1); 35 | variable shifted_x : signed(-1 downto 0); 36 | variable shifted_y : signed(-1 downto 0); 37 | begin 38 | 39 | shifted_x := shift_right(x_in, shiftN); 40 | shifted_y := shift_right(y_in, shiftN); 41 | 42 | if (sign = '1') then 43 | new_x <= x_in + shifted_y; 44 | new_y <= y_in - shifted_x; 45 | new_angle <= angle_in + angle_adj; 46 | else 47 | new_x <= x_in - shifted_y; 48 | new_y <= y_in + shifted_x; 49 | new_angle <= angle_in - angle_adj; 50 | end if; 51 | end process proc_comb; 52 | 53 | -- clocked process 54 | proc_clk: process(clk) 55 | begin 56 | if (rising_edge(clk)) then 57 | if (rst_n = '0') then 58 | -- in reset 59 | x_out <= (others => '0'); 60 | y_out <= (others => '0'); 61 | angle_out <= (others => '0'); 62 | else 63 | x_out <= new_x; 64 | y_out <= new_y; 65 | angle_out <= new_angle; 66 | end if; 67 | end if; 68 | end process proc_clk; 69 | 70 | end rtl; 71 | 72 | 73 | 74 | library ieee; 75 | use ieee.std_logic_1164.all; 76 | use ieee.numeric_std.all; 77 | 78 | -- Define the main CORDIC entity 79 | entity cordic__ is 80 | port 81 | ( 82 | clk : in std_logic; 83 | rst_n : in std_logic; 84 | angle_in : in signed(-1 downto 0); 85 | sin_out : out signed(-1 downto 0); 86 | cos_out : out signed(-1 downto 0) 87 | ); 88 | end cordic__; 89 | 90 | architecture rtl of cordic__ is 91 | signal x_in : signed (-1 downto 0); 92 | signal y_in : signed (-1 downto 0); 93 | signal z_in : signed (-1 downto 0); 94 | 95 | type bus_t IS ARRAY (0 to -1) of signed(-1 downto 0); 96 | 97 | signal xbus : bus_t; 98 | signal ybus : bus_t; 99 | signal zbus : bus_t; 100 | begin 101 | 102 | -- combination process 103 | proc_comb: process(angle_in) 104 | begin 105 | case angle_in(-1 downto -2) is 106 | when "00" => 107 | x_in <= ; 108 | y_in <= (others=>'0'); 109 | z_in <= angle_in; 110 | when "11" => 111 | x_in <= ; 112 | y_in <= (others=>'0'); 113 | z_in <= angle_in; 114 | when "01" => 115 | x_in <= (others=>'0'); 116 | y_in <= ; 117 | z_in <= "00" & angle_in(-3 downto 0); 118 | when "10" => 119 | x_in <= (others=>'0'); 120 | y_in <= -; 121 | z_in <= "11" & angle_in(-3 downto 0); 122 | when others => 123 | x_in <= (others=>'0'); 124 | y_in <= (others=>'0'); 125 | z_in <= (others=>'0'); 126 | end case; 127 | end process proc_comb; 128 | 129 | -- generate instances of cordic_stage here.. 130 | 131 | 132 | -- permanently assign outputs 133 | cos_out <= xbus(-1); 134 | sin_out <= ybus(-1); 135 | 136 | end rtl; -------------------------------------------------------------------------------- /vhdl/benchmarks_large/cordic/cordic_tb.vhdl: -------------------------------------------------------------------------------- 1 | -- testbench for cordic_10_16.vhdl 2 | -- Author: Niels A. Moseley 3 | 4 | library ieee; 5 | use ieee.std_logic_1164.all; 6 | use ieee.numeric_std.all; 7 | 8 | entity cordic_tb is 9 | end cordic_tb; 10 | 11 | architecture tb of cordic_tb is 12 | signal clk : std_logic := '0'; 13 | signal rst_n : std_logic := '0'; 14 | signal angle_in : signed(15 downto 0) := (others => '0'); 15 | signal sin_out : signed(15 downto 0) := (others => '0'); 16 | signal cos_out : signed(15 downto 0) := (others => '0'); 17 | 18 | signal run_sim : std_logic := '1'; 19 | begin 20 | 21 | -- device under test 22 | dut: entity work.cordic_10_16 23 | port map(clk, rst_n, angle_in, cos_out, sin_out); 24 | 25 | proc_clk: process 26 | begin 27 | if (run_sim = '1') then 28 | wait for 1 ns; 29 | clk <= not clk; 30 | else 31 | wait; 32 | end if; 33 | end process proc_clk; 34 | 35 | proc_stim: process 36 | begin 37 | wait for 10 ns; 38 | rst_n <= '1'; 39 | 40 | wait for 2000 ns; 41 | 42 | run_sim <= '0'; 43 | 44 | wait; 45 | end process proc_stim; 46 | 47 | proc_angle: process(clk) 48 | begin 49 | if (rising_edge(clk) and (rst_n = '1')) then 50 | angle_in <= angle_in + to_signed(123,16); 51 | end if; 52 | end process proc_angle; 53 | 54 | end tb; -------------------------------------------------------------------------------- /vhdl/benchmarks_large/cordic/generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ## Generate a pipelined CORDIC with a certain number of iteration stages 4 | ## The script must also generate the angle table 5 | ## 6 | ## = bit width of cordic stage 7 | ## = number of stages 8 | ## = cordic vector start magnitude, approx 0.6199505 9 | ## = generated calls to cordic_stage 10 | ## 11 | 12 | 13 | import math 14 | 15 | def gen_cordic(stages, bits, template): 16 | ## calculate the CORDIC gain so we can compensate this 17 | ## by reducing the input vector length to avoid overflow. 18 | ## 19 | ## the Nth stage has a gain of sqrt(1.0 + 2^-2N) when counting 20 | ## stages from 0. 21 | ## 22 | ## Total gain for 4 stages : 1.64248406575 23 | ## 5 stages : 1.64568891576 24 | ## 6 stages : 1.64649227871 25 | ## 26 | 27 | amp = 1.0 28 | for I in range(0,stages): 29 | amp = amp * math.sqrt(1.0 + math.pow(2.0,-2*I)) 30 | 31 | startval = int( math.floor((2**(bits-1)-1) / amp) ) 32 | 33 | template = template.replace("", "to_signed(" + str(startval) + ",)") 34 | template = template.replace("", str(bits)).replace("", str(stages)) 35 | 36 | ## generate calls to cordic_stage 37 | 38 | gen = "\n" 39 | gen = gen + " "*4 + "stage_0: entity work.cordic_stage_(rtl)\n" 40 | gen = gen + " "*4 + " generic map (shiftN => 0)\n" 41 | gen = gen + " "*4 + " port map (clk, rst_n, x_in, y_in, z_in, , xbus(0), ybus(0), zbus(0));\n\n" 42 | #gen = gen + " clk => clk,\n" 43 | #gen = gen + " rst_n => rst_n,\n" 44 | #gen = gen + " x_in => x_in,\n" 45 | #gen = gen + " y_in => y_in,\n" 46 | #gen = gen + " y_in => y_in,\n" 47 | 48 | #gen = " cordic_stage_ #(0) stage0(clk, rst_n, x_in, y_in, z_in, , xbus[0], ybus[0], zbus[0]);\n" 49 | tanval = int( round((2**(bits)) * 0.125,0) ) 50 | gen = gen.replace("", "to_signed(" + str(tanval) + ",)") 51 | gen = gen.replace("", str(bits)) 52 | 53 | s = "" 54 | s = s + " "*4 + "stage_: entity work.cordic_stage_(rtl)\n" 55 | s = s + " "*4 + " generic map (shiftN => )\n" 56 | s = s + " "*4 + " port map (clk, rst_n, xbus(), ybus(), zbus(), , xbus(), ybus(), zbus());\n" 57 | 58 | for I in range(1,stages): 59 | tanval = int( round((2**(bits)) * math.atan(math.pow(2.0, -I))/(2.0*3.14159265359),0) ) 60 | gen_s = s.replace("", str(I-1)).replace("", "to_signed(" + str(tanval) + ",)") + "\n" 61 | gen_s = gen_s.replace("",str(I)) 62 | gen_s = gen_s.replace("",str(bits)) 63 | gen = gen + gen_s 64 | 65 | template = template.replace("", gen) 66 | 67 | with open("cordic_%d_%d.vhdl" % (stages, bits), "w") as f: 68 | print(template, file=f) 69 | 70 | with open('cordic.template','rt') as templatefile: 71 | template = templatefile.read() 72 | 73 | for stages in [4,5,6,7,8,9,10]: 74 | for bits in [8,12,16]: 75 | gen_cordic(stages, bits, ''.join(template)) 76 | -------------------------------------------------------------------------------- /vhdl/benchmarks_large/cordic/run_cordic_tb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ghdl -a cordic_10_16.vhdl 4 | ghdl -a cordic_tb.vhdl 5 | ghdl -e cordic_tb 6 | ghdl -r cordic_tb --wave=cordic_tb.ghw 7 | -------------------------------------------------------------------------------- /vhdl/benchmarks_small/cic/.gitignore: -------------------------------------------------------------------------------- 1 | *.cf 2 | *.ghw 3 | -------------------------------------------------------------------------------- /vhdl/benchmarks_small/cic/README.md: -------------------------------------------------------------------------------- 1 | # CIC5 - Cascaded Integrator-Comb DSP structure 2 | 3 | This module decimates the incoming data stream by a factor of 5, 4 | using a cascaded integrators and comb filters. It is a well-known 5 | efficient DSP structure primarily found in high-speed A/D conversion 6 | applications, such as Software Defined Radios (SDR). 7 | 8 | The data widths are: 16-bit signed input, 28 bit signed output. 9 | 10 | Reference: https://en.wikipedia.org/wiki/Cascaded_integrator%E2%80%93comb_filter 11 | 12 | -------------------------------------------------------------------------------- /vhdl/benchmarks_small/cic/cic5.m.vhdl: -------------------------------------------------------------------------------- 1 | -- cic5 - a 5th order CIC decimation filter 2 | -- with 5x decimation factor 3 | -- 4 | -- Author: Niels Moseley 5 | -- Symbiotic EDA / Moseley Instruments 6 | -- 12-11-2018 7 | -- 8 | 9 | library ieee; 10 | use ieee.std_logic_1164.all; 11 | use ieee.numeric_std.all; 12 | 13 | entity cic5 is 14 | port 15 | ( 16 | clk : in std_logic; 17 | rst_n : in std_logic; 18 | d_in : in signed(15 downto 0); 19 | d_out : out signed(27 downto 0); 20 | d_out_valid : out std_logic -- high for one clock cycle 21 | ); 22 | end cic5; 23 | 24 | architecture rtl of cic5 is 25 | signal decimation_cnt : unsigned(2 downto 0); 26 | 27 | type state5_t is array(1 to 5) of signed(27 downto 0); 28 | 29 | signal int_s : state5_t; -- integrator states 30 | signal comb_s : state5_t; 31 | begin 32 | 33 | proc_clk: process(clk) 34 | variable tmp : state5_t; 35 | begin 36 | if (rising_edge(clk)) then 37 | if (rst_n = '0') then 38 | -- reset all integrator states 39 | for I in 1 to 5 loop 40 | int_s(I) <= (others => '0'); 41 | comb_s(I) <= (others => '0'); 42 | end loop; 43 | 44 | decimation_cnt <= (others => '0'); 45 | d_out <= (others => '0'); 46 | d_out_valid <= '0'; 47 | else 48 | -- default updates when clocked 49 | decimation_cnt <= decimation_cnt + 1; 50 | d_out_valid <= '0'; 51 | 52 | -- calculate new integrator states 53 | int_s(1) <= int_s(1) + resize(d_in, int_s(1)'length); 54 | for I in 2 to 5 loop 55 | int_s(I) <= int_s(I) + int_s(I-1); 56 | end loop; 57 | 58 | -- check if we can output new data at the 59 | -- reduced rate 60 | if (decimation_cnt = to_unsigned(4,decimation_cnt'length)) then 61 | decimation_cnt <= to_unsigned(0, decimation_cnt'length); 62 | 63 | -- calculate the CIC comb filters at the lower rate 64 | -- and update their filter states 65 | tmp(1) := int_s(5) - comb_s(1); -- calculate comb #1 output 66 | comb_s(1) <= int_s(5); -- update comb #1 filter state 67 | for I in 2 to 5 loop 68 | tmp(I) := tmp(I-1) - comb_s(I); 69 | comb_s(I) <= tmp(I-1); 70 | end loop; 71 | 72 | -- output a signal! 73 | d_out <= tmp(5); 74 | d_out_valid <= '1'; 75 | end if; 76 | end if; 77 | end if; 78 | end process proc_clk; 79 | 80 | end rtl; 81 | -------------------------------------------------------------------------------- /vhdl/benchmarks_small/cic/cic5_tb.m.vhdl: -------------------------------------------------------------------------------- 1 | -- Testbench for cic5 - a 5th order CIC filter decimating 5x. 2 | -- Author: Niels Moseley 3 | -- Symbiotic EDA / Moseley Instruments 4 | -- 12-11-2018 5 | -- 6 | 7 | library ieee; 8 | use ieee.std_logic_1164.all; 9 | use ieee.numeric_std.all; 10 | use work.all; 11 | 12 | entity cic5_tb is 13 | end cic5_tb; 14 | 15 | architecture tb of cic5_tb is 16 | signal clk : std_logic := '0'; 17 | signal rst_n : std_logic := '1'; 18 | signal d_in : signed(15 downto 0) := X"0000"; 19 | signal d_out : signed(27 downto 0) := X"0000000"; 20 | signal d_out_valid : std_logic := '0'; 21 | 22 | signal do_sim : std_logic := '1'; 23 | begin 24 | 25 | u_dut: entity work.cic5 26 | port map 27 | ( 28 | clk => clk, 29 | rst_n => rst_n, 30 | d_in => d_in, 31 | d_out => d_out, 32 | d_out_valid => d_out_valid 33 | ); 34 | 35 | proc_sim: process 36 | begin 37 | d_in <= X"7FFF"; 38 | rst_n <= '0'; 39 | wait for 4 ns; 40 | rst_n <= '1'; 41 | wait for 2*5*6 ns; 42 | -- after 5*6 clocks and 7FFF as input, the 43 | -- CIC filter's output must be stable. 44 | -- given that the gain is 3125x, the output 45 | -- should be 102396875. 46 | assert (d_out = to_signed(102396875, d_out'length)) report "CIC5 filter output not correct" severity error; 47 | do_sim <= '0'; 48 | wait; 49 | end process proc_sim; 50 | 51 | proc_clk: process 52 | begin 53 | if (do_sim = '1') then 54 | clk <= not clk; 55 | wait for 1 ns; 56 | else 57 | wait; 58 | end if; 59 | end process proc_clk; 60 | 61 | end tb; -------------------------------------------------------------------------------- /vhdl/benchmarks_small/cic/run_testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ghdl -a cic5.m.vhdl 4 | ghdl -a cic5_tb.m.vhdl 5 | ghdl -e cic5_tb 6 | ghdl -r cic5_tb --wave=cic5_tb.ghw -------------------------------------------------------------------------------- /vhdl/benchmarks_small/various/.gitignore: -------------------------------------------------------------------------------- 1 | *.cf 2 | *.ghw 3 | -------------------------------------------------------------------------------- /vhdl/benchmarks_small/various/pwm256.m.vhdl: -------------------------------------------------------------------------------- 1 | -- PWM256 - a 256 level PWM generator 2 | -- Author: Niels Moseley 3 | -- Symbiotic EDA / Moseley Instruments 4 | -- 10-11-2018 5 | -- 6 | 7 | library ieee; 8 | use ieee.std_logic_1164.all; 9 | use ieee.numeric_std.all; 10 | 11 | entity pwm256 is 12 | port 13 | ( 14 | clk : in std_logic; 15 | rst_n : in std_logic; 16 | d_in : in unsigned(7 downto 0); 17 | pwm_out : out std_logic 18 | ); 19 | end pwm256; 20 | 21 | architecture rtl of pwm256 is 22 | signal counter : unsigned(7 downto 0); 23 | begin 24 | 25 | proc_clk: process(clk) 26 | begin 27 | if (rising_edge(clk)) then 28 | if (rst_n = '0') then 29 | counter <= (others => '0'); 30 | else 31 | counter <= counter + 1; 32 | end if; 33 | end if; 34 | 35 | if (counter <= d_in) then 36 | pwm_out <= '1'; 37 | else 38 | pwm_out <= '0'; 39 | end if; 40 | end process proc_clk; 41 | 42 | end rtl; 43 | -------------------------------------------------------------------------------- /vhdl/benchmarks_small/various/pwm256_tb.m.vhdl: -------------------------------------------------------------------------------- 1 | -- Testbench for PWM256 - a 256 level PWM generator 2 | -- Author: Niels Moseley 3 | -- Symbiotic EDA / Moseley Instruments 4 | -- 10-11-2018 5 | -- 6 | 7 | library ieee; 8 | use ieee.std_logic_1164.all; 9 | use ieee.numeric_std.all; 10 | use work.all; 11 | 12 | entity pwm256_tb is 13 | end pwm256_tb; 14 | 15 | architecture tb of pwm256_tb is 16 | signal clk : std_logic := '0'; 17 | signal rst_n: std_logic := '1'; 18 | signal d_in : unsigned(7 downto 0) := "00000000"; 19 | signal pwm : std_logic; 20 | 21 | signal do_sim : std_logic := '1'; 22 | begin 23 | 24 | u_dut: entity work.pwm256 25 | port map 26 | ( 27 | clk => clk, 28 | rst_n => rst_n, 29 | d_in => d_in, 30 | pwm_out=> pwm 31 | ); 32 | 33 | proc_sim: process 34 | begin 35 | d_in <= to_unsigned(128,8); 36 | rst_n <= '0'; 37 | wait for 4 ns; 38 | rst_n <= '1'; 39 | wait for 2*256 ns; 40 | d_in <= to_unsigned(10,8); 41 | wait for 2*256 ns; 42 | d_in <= to_unsigned(246,8); 43 | wait for 2*256 ns; 44 | do_sim <= '0'; 45 | wait; 46 | end process proc_sim; 47 | 48 | 49 | 50 | proc_clk: process 51 | begin 52 | if (do_sim = '1') then 53 | clk <= not clk; 54 | wait for 1 ns; 55 | else 56 | wait; 57 | end if; 58 | end process proc_clk; 59 | 60 | end tb; -------------------------------------------------------------------------------- /vhdl/benchmarks_small/various/run_testbench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ghdl -a pwm256.m.vhdl 4 | ghdl -a pwm256_tb.m.vhdl 5 | ghdl -e pwm256_tb 6 | ghdl -r pwm256_tb --wave=pwm256_tb.ghw 7 | --------------------------------------------------------------------------------