├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── LICENSE.BU ├── LICENSE.IBM ├── Makefile ├── Makefrag ├── README.md ├── build └── .gitignore ├── doc ├── binary-encodings-data-structures.md ├── dana-benchmark.md ├── debugging.md ├── exceptions.md ├── fpga-setup.md ├── htif-dtm.md ├── timing.md ├── toolflow.md └── u-boot.md ├── emulator ├── .gitignore └── Makefile ├── patches ├── fpga-mem-gen-add-arbitrary-verilog.patch ├── fpga-vsim-verilog-kludge.patch ├── fpga-zynq-dont-flatten-hierarchy.patch └── riscv-pk-xfiles-syscalls.patch ├── project ├── .gitignore ├── build.properties └── plugins.sbt ├── scalastyle-config.xml ├── src ├── main │ ├── scala │ │ ├── dana │ │ │ ├── ActivationFunction.scala │ │ │ ├── AsidNnidTableWalker.scala │ │ │ ├── CSRs.scala │ │ │ ├── Cache.scala │ │ │ ├── Configs.scala │ │ │ ├── Control.scala │ │ │ ├── Dana.scala │ │ │ ├── ProcessingElement.scala │ │ │ ├── ProcessingElementTable.scala │ │ │ ├── RegisterFile.scala │ │ │ ├── TransactionTable.scala │ │ │ ├── abi │ │ │ │ ├── Configs.scala │ │ │ │ └── Encodings.scala │ │ │ └── util │ │ │ │ ├── Memory.scala │ │ │ │ └── Util.scala │ │ ├── rocketchip │ │ │ ├── Configs.scala │ │ │ └── XFilesDanaTestSuite.scala │ │ ├── standalone │ │ │ ├── Configs.scala │ │ │ ├── DebugTests.scala │ │ │ ├── Main.scala │ │ │ ├── Standalone.scala │ │ │ └── XFilesTests.scala │ │ ├── util │ │ │ ├── QueueAf.scala │ │ │ ├── SRAM.scala │ │ │ ├── SRAMBlockIncrement.scala │ │ │ ├── SRAMElement.scala │ │ │ ├── SRAMElementCounter.scala │ │ │ ├── SRAMElementIncrement.scala │ │ │ └── SRAMVariant.scala │ │ └── xfiles │ │ │ ├── Arbiter.scala │ │ │ ├── Backend.scala │ │ │ ├── CSRs.scala │ │ │ ├── Configs.scala │ │ │ ├── DebugUnit.scala │ │ │ ├── TransactionTable.scala │ │ │ └── XFiles.scala │ └── verilog │ │ └── standalone.v └── test │ └── cpp │ ├── rocc_test.cpp │ ├── rocc_test.h │ ├── t_XFilesDana.cpp │ ├── t_debug.cpp │ ├── transaction.cpp │ ├── transaction.h │ ├── xcustom.cpp │ ├── xcustom.h │ ├── xfiles_dana.cc │ ├── xfiles_dana.h │ ├── xfiles_debug.cpp │ └── xfiles_debug.h ├── tests ├── .gitignore ├── Makefile.in ├── Makefrag ├── README.md ├── configure.ac ├── libs │ ├── .gitignore │ ├── Makefile │ └── src │ │ ├── include │ │ ├── xfiles-asid-nnid-table.h │ │ ├── xfiles-debug.h │ │ ├── xfiles-supervisor-types.h │ │ ├── xfiles-supervisor.h │ │ ├── xfiles-user-pk.h │ │ ├── xfiles-user.h │ │ └── xfiles.h │ │ ├── xfiles-asid-nnid-table.c │ │ ├── xfiles-debug.S │ │ ├── xfiles-debug.c │ │ ├── xfiles-supervisor.S │ │ ├── xfiles-supervisor.c │ │ ├── xfiles-user-pk.c │ │ ├── xfiles-user.S │ │ ├── xfiles-user.c │ │ └── xfiles.S ├── nets │ ├── .gitignore │ ├── Makefile │ ├── genericLearnTest.S │ ├── genericNetTest.S │ └── inference.S ├── pk │ ├── Makefile │ ├── antw-config.c │ ├── dana-benchmark.c │ ├── dana-benchmark.h │ ├── debug-test.c │ ├── hello.c │ ├── id.c │ ├── mt19937ar.c │ ├── mt19937ar.h │ ├── trap-00-new-request-no-asid.c │ ├── trap-00-supervisor-req-as-user.c │ ├── trap-00-write-register-no-asid.c │ ├── trap-01-request-antp-not-set.c │ ├── trap-02-request-oob-asid.c │ ├── trap-03-request-oob-nnid.c │ ├── trap-05-request-nn-config-zero-size.c │ └── trap-06-request-invalid-epb.c └── smoke │ ├── Makefile │ ├── csr.S │ ├── debug.S │ └── id.S ├── tools ├── .gitignore ├── Makefile ├── common │ ├── Makefrag-nets │ ├── Makefrag-rv │ ├── Makefrag-submodule │ ├── Makefrag-tools │ ├── Makefrag-video │ └── nets.txt ├── scripts │ ├── atanh-lut │ ├── binary-to-ram-init │ ├── danaCache │ ├── dana_memory_tool.py │ ├── dataset-tool.py │ ├── debug-table.awk │ ├── fann-change-fixed-point │ ├── fann-config-mr │ ├── fann-data-to-fixed │ ├── find-net │ ├── gen-boolean-data │ ├── gen-math-data │ ├── gen-random-fann-input │ ├── gen-trace-video │ ├── gen-video-soft │ ├── generate-ant │ ├── generate_test_mem.py │ ├── instrument_dpi │ ├── instrument_vpi │ ├── max-processors.sh │ ├── parse-af │ ├── parse-data-generic │ ├── parse-data-python │ ├── parse_emu_log.py │ ├── regression.sh │ ├── rocket-chip-setup.sh │ ├── rv-load-fpga │ ├── rvcon │ ├── rvreboot │ ├── rvstatus │ ├── rvwho │ ├── travis-before-install │ └── travis-script └── src │ ├── bin-config-to-c-header.c │ ├── copyright.h │ ├── encoding.h │ ├── fann-eval.c │ ├── fann-float-to-fixed.c │ ├── fann-image.c │ ├── fann-random.c │ ├── fann-train-to-c-header-fixed.c │ ├── fann-train-to-c-header.c │ ├── fann-train.c │ ├── generate-ant.c │ └── write-fann-config-for-accelerator.c └── util └── .vimrc /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *# 3 | target 4 | TAGS 5 | tags 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/fann"] 2 | path = fann 3 | url = https://github.com/libfann/fann.git 4 | [submodule "util/hdl-tools"] 5 | path = util/hdl-tools 6 | url = https://github.com/ibm/hdl-tools.git 7 | [submodule "src/main/scala/perfect"] 8 | path = src/main/scala/perfect 9 | url = https://github.com/ibm/perfect-chisel.git 10 | [submodule "rocc-software"] 11 | path = tests/rocc-software 12 | url = https://github.com/ibm/rocc-software.git 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | All contributors must agree to the Developer Certificate of Origin Version 1.1. (DCO 1.1) by signing their commits with: 2 | 3 | ``` 4 | Signed-off-by: [NAME] <[EMAIL]> 5 | ``` 6 | 7 | This can be simply achieved with `git commit -s` when formatting your commit message. 8 | 9 | The full text of the DCO 1.1 is as follows: 10 | 11 | ``` 12 | Developer Certificate of Origin 13 | Version 1.1 14 | 15 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 16 | 660 York Street, Suite 102, 17 | San Francisco, CA 94110 USA 18 | 19 | Everyone is permitted to copy and distribute verbatim copies of this 20 | license document, but changing it is not allowed. 21 | 22 | 23 | Developer's Certificate of Origin 1.1 24 | 25 | By making a contribution to this project, I certify that: 26 | 27 | (a) The contribution was created in whole or in part by me and I 28 | have the right to submit it under the open source license 29 | indicated in the file; or 30 | 31 | (b) The contribution is based upon previous work that, to the best 32 | of my knowledge, is covered under an appropriate open source 33 | license and I have the right under that license to submit that 34 | work with modifications, whether created in whole or in part 35 | by me, under the same open source license (unless I am 36 | permitted to submit under a different license), as indicated 37 | in the file; or 38 | 39 | (c) The contribution was provided directly to me by some other 40 | person who certified (a), (b) or (c) and I have not modified 41 | it. 42 | 43 | (d) I understand and agree that this project and the contribution 44 | are public and that a record of the contribution (including all 45 | personal information I submit with it, including my sign-off) is 46 | maintained indefinitely and may be redistributed consistent with 47 | this project or the open source license(s) involved. 48 | ``` 49 | -------------------------------------------------------------------------------- /LICENSE.BU: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Boston University (BU). All Rights Reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the 11 | distribution. 12 | 3. Neither the name of BU nor the names of its contributors may be 13 | used to endorse or promote products derived from this software 14 | without specific prior written permission. 15 | 16 | IN NO EVENT SHALL BU BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 17 | SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, 18 | ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 19 | BU HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 20 | 21 | BU SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED 22 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 23 | PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF 24 | ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". BU HAS NO OBLIGATION TO 25 | PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include Makefrag 2 | 3 | DIR_FANN = $(DIR_TOP)/submodules/fann 4 | 5 | # Common configuration 6 | DIR_SRC_SCALA = $(DIR_TOP)/src/main/scala 7 | DIR_SRC_V = $(DIR_TOP)/src/main/verilog 8 | DIR_SRC_C = $(DIR_TOP)/src/main/c 9 | DIR_SRC_CPP = $(DIR_TOP)/src/main/cpp 10 | DIR_TEST_CPP = $(DIR_TOP)/src/test/cpp 11 | DIR_TEST_RV = $(DIR_TOP)/src/test/rv 12 | DIR_MAIN_RES = $(DIR_TOP)/src/main/resources 13 | SEED = $(shell echo "$$RANDOM") 14 | SEED = 0 15 | 16 | # Miscellaneous crap 17 | COMMA = , 18 | 19 | DECIMAL_POINT_OFFSET=7 20 | DECIMAL_POINT_BITS=3 21 | MAX_DECIMAL_POINT=`echo "2 $(DECIMAL_POINT_BITS)^1-$(DECIMAL_POINT_OFFSET)+p"|dc` 22 | 23 | vpath %.scala $(DIR_SRC_SCALA) 24 | vpath %.cpp $(DIR_TEST_CPP) 25 | vpath %.cpp $(DIR_BUILD) 26 | vpath %.c $(DIR_SRC_C) 27 | vpath %.c $(DIR_TEST_RV) 28 | vpath %.h $(DIR_SRC_C) 29 | vpath %.v $(DIR_TEST_V) 30 | vpath %.v $(DIR_SRC_V) 31 | vpath %.v $(DIR_BUILD) 32 | vpath %-float.net $(DIR_MAIN_RES) 33 | 34 | .PHONY: all clean checkstyle debug doc mrproper nets tags tools vcd 35 | 36 | default: all 37 | 38 | all: nets 39 | 40 | SBT ?= sbt 41 | SBT_FLAGS ?= 42 | checkstyle: 43 | env ROCKETCHIP_ADDONS=$(ROCKETCHIP_ADDONS) $(SBT) $(SBT_FLAGS) scalastyle test:scalastyle 44 | 45 | include $(DIR_TOP)/tools/common/Makefrag-rv 46 | include $(DIR_TOP)/tools/common/Makefrag-submodule 47 | include $(DIR_TOP)/tools/common/Makefrag-tools 48 | include $(DIR_TOP)/tools/common/Makefrag-nets 49 | include $(DIR_TOP)/tools/common/Makefrag-video 50 | 51 | nets: $(NETS_BIN) $(TRAIN_FIXED) $(NETS_TEST) $(NETS_ANT_H) 52 | tools: $(NETS_TOOLS) 53 | 54 | #------------------- Miscellaneous 55 | TAGS_SCALA = \ 56 | $(rocketchip_dir)/src/main/scala \ 57 | $(rocketchip_dir)/chisel3/src \ 58 | $(rocketchip_dir)/chisel3/chiselFrontend \ 59 | $(rocketchip_dir)/firrtl/src/main/scala \ 60 | $(rocketchip_dir)/xfiles-dana/src/main/scala 61 | TAGS_C = \ 62 | $(rocketchip_dir)/csrc \ 63 | $(rocketchip_dir)/xfiles-dana/tests \ 64 | $(rocketchip_dir)/riscv-tools/riscv-fesvr/fesvr \ 65 | $(rocketchip_dir)/riscv-tools/riscv-pk/pk \ 66 | $(rocketchip_dir)/riscv-tools/riscv-pk/machine \ 67 | $(rocketchip_dir)/riscv-tools/riscv-pk/bbl 68 | TAGS_V = \ 69 | $(DIR_TOP)/../vsrc 70 | tags: 71 | find $(TAGS_SCALA) -name *.scala -exec ctags --output-format=etags {} + 72 | find $(TAGS_C) -exec ctags --append=yes --output-format=etags {} + 73 | find $(TAGS_V) -exec ctags --append=yes --output-format=etags {} + 74 | find $(TAGS_SCALA) -name *.scala -exec ctags {} + 75 | find $(TAGS_C) -exec ctags --append=yes {} + 76 | find $(TAGS_V) -exec ctags --append=yes {} + 77 | 78 | #--------- Generate ScalaDoc documentation 79 | doc: | $(DIR_BUILD)/doc 80 | scaladoc $(shell find $(TAGS_SCALA) -name *.scala) -d $(DIR_BUILD)/doc 81 | 82 | $(DIR_BUILD)/doc: 83 | mkdir -p $@ 84 | 85 | #------------------- Utility Targets 86 | clean: 87 | rm -rf $(DIR_BUILD)/* 88 | rm -rf target 89 | rm -f TAGS 90 | 91 | mrproper: clean 92 | $(MAKE) clean -C $(DIR_TOP)/tools 93 | $(MAKE) clean -C $(DIR_TOP)/tests/libs 94 | rm -rf $(DIR_TOP)/tests/build $(DIR_TOP)/tests/configure 95 | -------------------------------------------------------------------------------- /Makefrag: -------------------------------------------------------------------------------- 1 | #-*- mode: makefile-*- 2 | 3 | # Shared parameters (be careful messing with these) 4 | SHELL = /bin/bash 5 | DIR_TOP ?= $(abspath .) 6 | rocketchip_dir = $(abspath ../.) 7 | DIR_BUILD ?= $(DIR_TOP)/build 8 | ROCKETCHIP_ADDONS ?= xfiles-dana 9 | TARGET ?= riscv64-unknown-elf 10 | -------------------------------------------------------------------------------- /build/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /doc/dana-benchmark.md: -------------------------------------------------------------------------------- 1 | Benchmark Tests for Dana 2 | ====== 3 | 4 | ## Overview 5 | 6 | The dana_benchmark repo contains a testing program that can be used with the XFILES/DANA system. The DANA_BENCHMARK.c program creates a series of transactions which query the system and acquire the output. Currently, multiple transactions can be run on a single thread. 7 | 8 | ## Running Tests 9 | 10 | #### Command Line Inputs 11 | 12 | The dana_benchmark tests are defined by command line input. The command line arguments include: 13 | 14 | * Transactions 15 | * [NEURAL_NET],[NUMBER_OF_INPUTS],[NUMBER_OF_OUTPUTS] 16 | * Number of concurrent transactions to run 17 | * Will always be second to last command line argument 18 | * If this exceeds the total number of transactions it will be set to the total number of transactions 19 | * Debug Option 20 | * Will always be last command line argument 21 | * If set to 1, debug information will be printed 22 | 23 | The transaction arguments are seperated by a comma with no space, multiple transactions are seperated with a space. 24 | 25 | #### Compilation 26 | 27 | The dana_benchmark program is compiled with 28 | 29 | `riscv64-unknown-elf-gcc-5.3.0` 30 | 31 | The following command is used to compile the program: 32 | 33 | `riscv64-unknown-elf-gcc-5.3.0 DANA_BENCHMARK.c -o danabench` 34 | 35 | This will produce the binary 'danabench' which can then be run on an fpga. 36 | 37 | #### Example Run 38 | 39 | The following command is an example of how to run the dana_benchmark program. This is running on an fpga using fesver-zynq and the proxy kernel. 40 | 41 | `./fesvr-zynq pk danabench net0,3,2 net1,5,1 1 1` 42 | 43 | The above command will create two transactions: 44 | 45 | * Transaction 1 46 | * Neural Network = net0 47 | * Input array length = 3 48 | * Output array length = 2 49 | * Transaction 2 50 | * Neural Network = net1 51 | * Input array length = 5 52 | * Output array length = 1 53 | 54 | The command will run each transaction separately (indicated by the 1 as the second to last command line argument) and the debugging will be turned on (indicated by the 1 as the last command line arguement) 55 | 56 | The following is an example of running the program with concurrent transactions without debugging: 57 | 58 | `./fesvr-zynq pk danabench net0,3,2 net1,5,1 2 0` 59 | 60 | The 2 as the second to last command line argument indicates that both transactions will be run at the same time. The zero as the last command line argument indicates that debugging is off. 61 | 62 | ### Further Documentation and Questions 63 | 64 | For information on specific functions in the DANA_BENCHMARK.c program, the DANA_BENCHMARK.h file can be consulted which is located in the dana_benchmark/c directory. 65 | 66 | For information on the functions that are used to interact with the XFILES/DANA system, the xfiles* files in the dana_benchmark/c directory can be consulted. 67 | 68 | 69 | If there are any further questions, please email einstein@bu.edu! 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /doc/debugging.md: -------------------------------------------------------------------------------- 1 | # Rough Guide to Debugging 2 | 3 | Within the rocket-chip repository there are directories for Chisel backends: 4 | * emulator -- C++ model 5 | * fsim -- FPGA-syntehsizable Verilog 6 | * vsim -- VLSI-compatible / VCS simulated Verilog 7 | 8 | For most of our debugging we use the C++ backend. 9 | 10 | ## Git Setup for Clearing Issues 11 | I create a new branch named after the issue and then switch to that: 12 | 13 | ``` 14 | git branch issue-54-incremental-learning 15 | git checkout issue-54-incremental-learning 16 | ``` 17 | 18 | ## Building the C++ Model 19 | 20 | There are two Chisel sources used to configure a build of the C++ model: 21 | * src/main/scala/Configs.scala 22 | * configs/XFilesDanaConfigs.scala 23 | 24 | The XFilesDanaConfigs.scala is known to the rocket-chip repository by symlinking this into rocket-chip/src/main/scala. 25 | 26 | Looking in XFilesDanaConfigs.scala, we can select one of the C++ configurations to build, e.g., `XFilesDanaCppPe1Epb4Config`, and build it: 27 | 28 | ``` 29 | cd rocket-chip/emulator 30 | make CONFIG=XFilesDanaCppPe1Epb4Config ROCKETCHIP_ADDONS=xfiles-dana 31 | ``` 32 | 33 | In the event that you have more than one addon, you can specify these with a comma delimited list. 34 | 35 | The name of the emulator will include the configuration. For the code above we should now have a `emulator-Top-XFilesDanaCppPe1Epb4Config` in the rocket-chip/emulator directory. Once built, you can run a test program with: 36 | 37 | ``` 38 | ./emulator-Top-XFilesDanaCppPe1Epb4Config pk ../xfiles-dana/build/hello.rv 39 | ``` 40 | 41 | However, this does not provide any more information than you get from the FPGA, so we need to dump additional debug information. Any `printf` that you put inside of Chisel code translates to statement that will print when you run an emulator with the `+verbose` option. Unfortunately, this dumps a ton of information from rocket-chip each cycle. To get rid of this, remove all lines that being with `C`: 42 | 43 | ``` 44 | ./emulator-Top-XFilesDanaCppPe1Epb4Config +verbose [binary] 2>&1 | grep -v ^C 45 | ``` 46 | 47 | ### Memory Tool for Debugging Help 48 | One of the common issues that comes up when debugging is checking that the address computations for the Register File/Intermediate Storage area are working correctly. I have a tool which helps with this that will dump out a CSV formatted table of what the Register File should look like internally. This can then be pushed through `column` to get something reasonable to look at. You can do this with a call like the following: 49 | ``` 50 | $ ./usr/bin/dana-memory-tool -b8 \ 51 | -n build/nets/xor-sigmoid-4i-fixed.net \ 52 | -l | column -s, -t -o" " 53 | E[out] 0x0 0 54 | In 0x8 8 9 a b 55 | H[0] 0x10 10 11 12 13 56 | Out 0x18 18 57 | DW 0x20 20 21 22 23 58 | Bias H[0] 0x28 28 29 2a 2b 59 | Bias Out 0x30 30 60 | Slope H[0][0] 0x38 38 39 3a 3b 61 | Slope H[0][1] 0x40 40 41 42 43 62 | Slope H[0][2] 0x48 48 49 4a 4b 63 | Slope H[0][3] 0x50 50 51 52 53 64 | Slope Out[0] 0x58 58 59 5a 5b 65 | ``` 66 | -------------------------------------------------------------------------------- /doc/exceptions.md: -------------------------------------------------------------------------------- 1 | # List of X-FILES/DANA Exceptions 2 | 3 | This is being tacked by [issue #4](https://github.com/bu-icsg/xfiles-dana/issues/4). 4 | 5 | ## Candidate Exceptions 6 | 7 | ### ASID--NNID Table 8 | 9 | * NNID Out of Bounds 10 | * Tried to read an NN configuration with an elements per block size differing from this build of Dana 11 | -------------------------------------------------------------------------------- /doc/fpga-setup.md: -------------------------------------------------------------------------------- 1 | # FPGA Setup and Management 2 | The general strategy here is to setup a bunch of FPGAs (e.g., Zedboards) behind a NAT connected to a HOST. Users with access to the HOST are then able to connect using the `xfiles-dana/scripts/rv*` scripts to grab an FPGA and do work on it. The HOST is running an NFS file server that makes the home directories of the HOST available to specific clients. 3 | 4 | ## HOST Configuration 5 | On the HOST side, several files need to be properly maintained. This is Ubuntu specific, but should be roughly general in terms of effect but for different files: 6 | * `/etc/hosts` -- setup the HOST to know about the IPs of the FPGAs 7 | * `/etc/hosts.allow` -- enable access to the HOST system from the FPGAs 8 | * `/etc/dhcp/dhcpd.conf` -- create entries in the subnet for each of the FPGAs that will give assign a specific IP address to a specific MAC address. Each FPGA must have a unique MAC 9 | * `/etc/exports` -- enable access to specific directories via NFS, e.g., `/home` 10 | 11 | ## FPGA Configuration 12 | On the FPGA side, certain files need to be configured inside of the ramdisk: 13 | * `/etc/hosts` -- setup the hostname mappings for the FPGAs and the HOST 14 | * `/etc/network/interfaces` -- give the FPGA a static IP and set the network/gateway 15 | * `/etc/hostname` -- give the FPGA a hostname 16 | * `/etc/fstab` -- setup this to auto-mount some NFS location for easier work on the FPGA 17 | 18 | Presently, all of these files need to be changed manually whenever you are pushing changes to the ramdisk. 19 | 20 | ### Set the FPGA MAC address 21 | Each of the FPGAs comes with a built in MAC address of `00:0A:35:00:01:22`. If you run more than one of these on the same network then network traffic will get lost. Consequently, the FPGAs need to be given unique MAC addresses. This can be accomplished from configuring this in Vivado (I think), but it's easier (and persistent!) to just handle this from u-boot. The procedure here is as follows: 22 | 1. Login to u-boot using the serial console (i.e., use `rvcon`) 23 | 2. Set the `ethaddr` environment variable and save it: 24 | ``` 25 | setenv ethaddr 00:0A:35:00:01:[XX] 26 | saveenv 27 | ``` 28 | 29 | ### Updating the Programmable Logic from Linux 30 | The Programmable Logic is exposed as a device to Linux which you can `cat` in a new bit file. However, you need to get the explicit bit file and convert it to a binary using `bitgen` first, however. You can do this with: 31 | ``` 32 | bootgen -image .bif -split bin -o i BOOT.BIN 33 | ``` 34 | 35 | I'm unsure of how to massage the syntax here that will run this and not overwrite the old boot.bin. Consequently, you currently need to overwrite the old boot.bin with the `-w` flag. 36 | 37 | Following this, you can then `cat` this bit file to the `xdevcf` device from Linux running on the ARM core: 38 | ``` 39 | cat .bit.bin > /dev/xdevcfg 40 | ``` 41 | -------------------------------------------------------------------------------- /doc/timing.md: -------------------------------------------------------------------------------- 1 | # Timing Documentation 2 | 3 | ## dana.TransactionTable inFirst, inLast, etc. 4 | 5 | These signals serve different purposes, but are grouped by their sensitivity: 6 | 7 | ### "Early Signals" -- PE Request Sensitive 8 | 9 | This signal changes state as soon as the last processing element in a layer is allocated. This signal is used by the state machines responsible for getting the next layer information into the Transaction Table. These actions can occur as soon as the last PE is allocated, hence, the need for this signal. 10 | 11 | * `inLastEarly` 12 | 13 | ### "Late Signals" -- Layer Response/Done Sensitive 14 | 15 | These signals change state only when the Register File (Scratchpad Memory) responds that it has all the information needed for a specific neural network layer. These signals cannot be used by any of the Cache sate machines that can operate in the interim. These signals can be used by any PE logic. 16 | 17 | * `inFirst` 18 | * `inLast` 19 | -------------------------------------------------------------------------------- /doc/u-boot.md: -------------------------------------------------------------------------------- 1 | # U Boot Documentation 2 | 3 | ## Accessing U-Boot 4 | The FPGAs are setup to boot from the uramdisk.image.gz automatically. To access U-Boot, you need to login over the serial console and manually reboot the ARM core. You will then have the opportunity to interrupt the boot process and access U-Boot. After logging in over the serial console, run the `reboot` command: 5 | ``` 6 | root@fpga3:~# reboot 7 | 8 | Broadcast message from root@fpga3 ng down for reboot NOW! 9 | INIT: Switching to runlevel: 6 10 | INIT: Sending processes the TERM signal 11 | INIT: Stopping Dropbear SSH server: stopped /usr/sbin/dropbear (pid 818) 12 | dropbear. 13 | Stopping tcf-agent: OK 14 | not deconfiguring network interfaces: network file systems still mounted. 15 | Sending all processes the TERM signal... 16 | Sending all processes the KILL signal... 17 | Unmounting remote filesystems... 18 | Deactivating swap... 19 | Unmounting local filesystems... 20 | �ebooting... reboot: Restarting system 21 | 22 | U-Boot 2014.07-01982-gf634657-dirty (Sep 24 2014 - 07:54:13) 23 | 24 | Board: Xilinx Zynq 25 | I2C: ready 26 | DRAM: ECC disabled 256 MiB 27 | MMC: zynq_sdhci: 0 28 | SF: Detected S25FL128S_64K with page size 512 Bytes, erase size 128 KiB, total 32 MiB 29 | In: serial 30 | Out: serial 31 | Err: serial 32 | Net: Gem.e000b000 33 | Hit any key to stop autoboot: 0 34 | zynq-uboot> 35 | ``` 36 | 37 | ## Manually Loading the SD Card 38 | It is possible that the uramdisk.image.gz will get FUBARed during a transfer to a remote FPGA and all you can get to is U-Boot. If this is the case, you can reload whatever you need via U-Boot. I've only done this with minicom, however. 39 | 40 | Connect to the FPGA (using /dev/ttyACM0 as an example): 41 | ``` 42 | minicom -D /dev/ttyACM0 43 | ``` 44 | 45 | Get U-Boot into a mode to receive files via the y-modem protocol 46 | ``` 47 | zynq-uboot> loady 48 | ``` 49 | 50 | Use an escape sequence to do a serial transfer through minicom, `C-a s`, selecting y-modem as the protocol. You can then navigate to the local file that you want to transfer to the RAM of the board. The file transfer will go through at the baud rate of the connection, so this can take some time. Take note of what this shows when it finishes as this indicates the start address of the loaded file and the size of the transfer: 51 | ``` 52 | ## Ready for binary (ymodem) download to 0x00000000 at 115200 bps... 53 | CxyzModem - CRC mode, 48072(SOH)/0(STX)/0(CAN) packets, 6 retries 54 | ## Total Size = 0x005de2ce = 6152910 Bytes 55 | ``` 56 | 57 | Above, the download went to start address 0x0 and was of size 6152910. This file happened to be uramdisk.image.gz, which will be referenced below. Now we need to copy this from RAM to the SD card. The SD card is an mmc device which you can get some information about with `mmc info` or `mmc list` to find out what the device number is. 58 | ``` 59 | zynq-uboot> mmc info 60 | Device: zynq_sdhci 61 | Manufacturer ID: 27 62 | OEM: 5048 63 | Name: SD04G 64 | Tran Speed: 50000000 65 | Rd Block Len: 512 66 | SD version 3.0 67 | High Capacity: Yes 68 | Capacity: 3.7 GiB 69 | Bus Width: 4-bit 70 | zynq-uboot> mmc list 71 | zynq_sdhci: 0 72 | ``` 73 | 74 | Here, the device is 0. We can also look at the contents of the SD card with: 75 | ``` 76 | zynq-uboot> fatls mmc 0 77 | riscv/ 78 | 102050064 uramdisk.image.gz 79 | 3396232 uimage 80 | 9243 devicetree.dtb 81 | 4517524 boot.bin 82 | .trash-1000/ 83 | 862920 simple_train_arm 84 | 36 xor.data 85 | 1969 xor_float.net 86 | 4015544 tst-ieee754-riscv 87 | 557056 zed.out 88 | 4322320 simple_train_riscv 89 | 0 boot.bif 90 | 3701208 cmath_test_riscv 91 | 557055 #zed.out# 92 | 4517524 boot.bin.orig 93 | 94 | 15 file(s), 2 dir(s) 95 | ``` 96 | 97 | Let's go ahead and overwrite the bad uramdisk.image.gz: 98 | ``` 99 | zynq-uboot> fatwrite mmc 0 0x0 uramdisk.image.gz 6152910 100 | writing uramdisk.image.gz 101 | 102050064 bytes written 102 | ``` 103 | 104 | Following that, everything should work, i.e., you can boot the board: 105 | ``` 106 | zynq-uboot> boot 107 | ``` 108 | 109 | Using this method you can recover the system from anything so long as U-Boot is intact. 110 | -------------------------------------------------------------------------------- /emulator/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !Makefile -------------------------------------------------------------------------------- /patches/fpga-mem-gen-add-arbitrary-verilog.patch: -------------------------------------------------------------------------------- 1 | diff --git a/fsim/fpga_mem_gen b/fsim/fpga_mem_gen 2 | index 2c9e249..2890a04 100755 3 | --- a/fsim/fpga_mem_gen 4 | +++ b/fsim/fpga_mem_gen 5 | @@ -5,6 +5,8 @@ 6 | 7 | import sys 8 | import math 9 | +import re 10 | +import argparse 11 | 12 | use_latches = False 13 | 14 | @@ -182,13 +184,42 @@ def gen_mem(name, width, depth, ports, mask_gran): 15 | '\n '.join(sequential), 16 | '\n '.join(combinational)) 17 | 18 | - 19 | +def add_verilog(verilog_file, include_paths): 20 | + found_file = False 21 | + for inc_dir in include_paths: 22 | + try: 23 | + open_verilog_file = open(inc_dir + '/' + verilog_file) 24 | + except IOError: 25 | + continue 26 | + else: 27 | + re_include = re.compile("^`include \"(.+)\" ?$") 28 | + for line in open_verilog_file: 29 | + match = re_include.match(line) 30 | + if match: 31 | + add_verilog(match.group(1), include_paths) 32 | + continue 33 | + sys.stdout.write(line) 34 | + return 35 | + raise IOError('Unable to find file ' + verilog_file) 36 | 37 | def main(): 38 | - if len(sys.argv) < 2: 39 | - sys.exit('Please give a .conf file as input') 40 | - for line in open(sys.argv[1]): 41 | - print(gen_mem(*parse_line(line))) 42 | + parser = argparse.ArgumentParser(description='Add SRAMs to Chisel-generated Verilog backend using Chisel conf file and specific Verilog files.') 43 | + parser.add_argument('-c', '--configuration', type=open, 44 | + help='Chisel configuration file to parse') 45 | + parser.add_argument('-I', '--include', help='Append directory to include directory. This is used to dereference any specified Verilog files (-v) or their \"`include\" Verilog directives. The current directory is searched by default.', 46 | + action='append', default=['.']) 47 | + parser.add_argument('-v', '--verilog_file', 48 | + help='Verilog file to append to the output of the Chisel Verilog backend.', 49 | + action='append') 50 | + args = parser.parse_args() 51 | + 52 | + if (args.configuration): 53 | + for line in args.configuration: 54 | + print(gen_mem(*parse_line(line))) 55 | + 56 | + if (args.verilog_file): 57 | + for verilog_file in args.verilog_file: 58 | + add_verilog(verilog_file, args.include) 59 | 60 | 61 | if __name__ == '__main__': 62 | -------------------------------------------------------------------------------- /patches/fpga-vsim-verilog-kludge.patch: -------------------------------------------------------------------------------- 1 | diff --git a/vsim/Makefrag-verilog b/vsim/Makefrag-verilog 2 | index 66034bc..e2906a2 100644 3 | --- a/vsim/Makefrag-verilog 4 | +++ b/vsim/Makefrag-verilog 5 | @@ -7,7 +7,11 @@ $(generated_dir)/$(MODEL).$(CONFIG).v $(generated_dir)/$(MODEL).$(CONFIG).d $(ge 6 | cd $(base_dir) && mkdir -p $(generated_dir) && $(SBT) "run $(CHISEL_ARGS) --configDump --noInlineMem" 7 | cd $(generated_dir) && \ 8 | if [ -a $(MODEL).$(CONFIG).conf ]; then \ 9 | - $(mem_gen) $(generated_dir)/$(MODEL).$(CONFIG).conf >> $(generated_dir)/$(MODEL).$(CONFIG).v; \ 10 | + if [[ $(CONFIG) == *XFiles* ]]; then \ 11 | + $(mem_gen) -c $(generated_dir)/$(MODEL).$(CONFIG).conf -I ../../xfiles-dana/src/main/verilog -I ../../xfiles-dana/submodules/verilog/src -v sram_r1_w1_rw0.v >> $(generated_dir)/$(MODEL).$(CONFIG).v; \ 12 | + else \ 13 | + $(mem_gen) -c $(generated_dir)/$(MODEL).$(CONFIG).conf >> $(generated_dir)/$(MODEL).$(CONFIG).v; \ 14 | + fi; \ 15 | fi 16 | 17 | $(generated_dir)/memdessertMemDessert.$(CONFIG).v $(generated_dir)/memdessertMemDessert.$(CONFIG).d: $(base_dir)/$(src_path)/*.scala $(base_dir)/uncore/$(src_path)/*.scala 18 | -------------------------------------------------------------------------------- /patches/fpga-zynq-dont-flatten-hierarchy.patch: -------------------------------------------------------------------------------- 1 | diff --git a/common/zynq_rocketchip.tcl b/common/zynq_rocketchip.tcl 2 | index a2ae542..eff9e17 100644 3 | --- a/common/zynq_rocketchip.tcl 4 | +++ b/common/zynq_rocketchip.tcl 5 | @@ -100,6 +100,7 @@ if {[string equal [get_runs -quiet synth_1] ""]} { 6 | set obj [get_runs synth_1] 7 | set_property "needs_refresh" "1" $obj 8 | set_property "part" "PART_NUMBER_HERE" $obj 9 | +set_property "STEPS.SYNTH_DESIGN.ARGS.FLATTEN_HIERARCHY" "none" $obj 10 | 11 | # Create 'impl_1' run (if not found) 12 | if {[string equal [get_runs -quiet impl_1] ""]} { 13 | -------------------------------------------------------------------------------- /project/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !build.properties 4 | !build.scala 5 | !plugins.sbt -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.11 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "simplytyped" at "http://simplytyped.github.io/repo/releases" 2 | 3 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0") 4 | 5 | addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.4") 6 | 7 | addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1") 8 | 9 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.0") 10 | 11 | addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.7") 12 | 13 | addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.3") 14 | 15 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.6.1") 16 | -------------------------------------------------------------------------------- /src/main/scala/dana/Configs.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package dana 4 | 5 | import chisel3._ 6 | import cde._ 7 | import xfiles.{BuildXFilesBackend, XFilesBackendParameters} 8 | 9 | import dana.util._ 10 | import dana.abi._ 11 | 12 | class DefaultHardwareConfig extends Config ( topDefinitions = { 13 | (pname,site,here) => pname match { 14 | // ANTW Parameters 15 | case AntwRobEntries => 32 16 | // Field widths 17 | case NnidWidth => 16 18 | // Processing Element Table 19 | case PeTableNumEntries => 2 20 | case PeCooldownWidth => 8 21 | // Configuration Cache 22 | case CacheNumEntries => 2 23 | case CacheSizeBytes => 512 * 1024 // KiB 24 | case CacheNumBlocks => divUp(divUp((site(CacheSizeBytes) * 8), 25 | site(DanaDataBits)), site(ElementsPerBlock)) 26 | case CacheInit => Nil 27 | // Register File 28 | case ScratchpadBytes => 8 * 1024 // KiB 29 | case ScratchpadElements => divUp(site(ScratchpadBytes) * 8, 30 | site(DanaDataBits)) 31 | // Enables support for in-hardware learning 32 | case LearningEnabled => true 33 | case BitsPerBlock => site(ElementsPerBlock) * site(DanaDataBits) 34 | case BytesPerBlock => site(BitsPerBlock) / 8 35 | case RegFileNumBlocks => divUp(site(ScratchpadElements), 36 | site(ElementsPerBlock)) 37 | case NNConfigNeuronWidth => 64 38 | case BuildXFilesBackend => XFilesBackendParameters( 39 | generator = (p: Parameters) => Module(new Dana()(p)), 40 | csrFile_gen = (p: Parameters) => Module(new dana.CSRFile()(p)), 41 | csrStatus_gen = (p: Parameters) => new DanaStatus()(p), 42 | csrProbes_gen = (p: Parameters) => new DanaProbes()(p), 43 | info = packInfo(site(ElementsPerBlock), site(PeTableNumEntries), 44 | site(CacheNumEntries))) 45 | case _ => throw new CDEMatchError 46 | }} 47 | ) 48 | 49 | class DefaultDanaConfig extends Config (new Abi32Bit ++ 50 | new DefaultHardwareConfig) 51 | 52 | class DanaNoLearningConfig extends Config ( topDefinitions = { 53 | (pname,site,here) => pname match { 54 | case LearningEnabled => false 55 | case _ => throw new CDEMatchError 56 | }} 57 | ) 58 | 59 | class DanaConfig 60 | (numPes: Int = 1, 61 | epb: Int = 4, 62 | cache: Int = 2, 63 | cacheSize: Int = 32 * 1024, 64 | scratchpad: Int = 8 * 1024, 65 | learning: Boolean = true) 66 | extends Config( topDefinitions = { 67 | (pname,site,here) => pname match { 68 | case LearningEnabled => learning 69 | case PeTableNumEntries => numPes 70 | case ElementsPerBlock => epb 71 | case CacheNumEntries => cache 72 | case CacheSizeBytes => cacheSize 73 | case ScratchpadBytes => scratchpad 74 | case _ => throw new CDEMatchError 75 | }}) 76 | 77 | case class CacheInitParameters(asid: Int, nnid: Int) 78 | 79 | class CacheInitialized extends Config( topDefinitions = { 80 | (pname,site,here) => pname match { 81 | case CacheInit => Seq( 82 | CacheInitParameters(asid = 1, nnid = 0)) 83 | case _ => throw new CDEMatchError 84 | }}) 85 | 86 | class DanaAsicConfig extends Config( 87 | new DanaConfig(numPes=4, cache=1, scratchpad=2048, cacheSize=128*1024)) 88 | -------------------------------------------------------------------------------- /src/main/scala/dana/abi/Configs.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details 2 | 3 | package dana.abi 4 | 5 | import chisel3._ 6 | import cde._ 7 | 8 | case class GlobalInfo_t ( 9 | decimal_point: Int, 10 | error_function: Int, 11 | binary_format: Int, 12 | _unused_0: Int, 13 | total_weight_blocks: Int, 14 | total_neurons: Int, 15 | total_layers: Int, 16 | ptr_first_layer: Int, 17 | ptr_weights: Int 18 | ) 19 | 20 | case class LayerInfo_t ( 21 | ptr_neuron: Int, 22 | num_neurons: Int, 23 | num_neurons_previous: Int 24 | ) 25 | 26 | case class NeuronInfo_t ( 27 | ptr_weight_offset: Int, 28 | num_weights: Int, 29 | activation_function: Int, 30 | steepness: Int, 31 | _unused_0: Int, 32 | _unused_1: Int, 33 | bias: Int 34 | ) 35 | 36 | class Abi32Bit extends Config ( topDefinitions = { 37 | (pname,site,here) => pname match { 38 | case DanaPtrBits => 32 39 | case DanaDataBits => 32 40 | 41 | case GlobalInfo => GlobalInfo_t ( 42 | decimal_point = 3, 43 | error_function = 1, 44 | binary_format = 3, 45 | _unused_0 = 9, 46 | total_weight_blocks = 16, 47 | total_neurons = 16, 48 | total_layers = 16, 49 | ptr_first_layer = site(DanaPtrBits), 50 | ptr_weights = site(DanaPtrBits)) 51 | 52 | case LayerInfo => LayerInfo_t ( 53 | ptr_neuron = site(DanaPtrBits), 54 | num_neurons = 16, 55 | num_neurons_previous = 16) 56 | 57 | case NeuronInfo => NeuronInfo_t ( 58 | ptr_weight_offset = site(DanaPtrBits), 59 | num_weights = 16, 60 | activation_function = 5, 61 | steepness = 3, 62 | _unused_0 = 8, 63 | _unused_1 = 32, 64 | bias = site(DanaDataBits)) 65 | case DecimalPointOffset => 7 66 | case SteepnessOffset => 4 67 | }} 68 | ) 69 | -------------------------------------------------------------------------------- /src/main/scala/dana/abi/Encodings.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details 2 | 3 | package dana.abi 4 | 5 | import chisel3._ 6 | import cde.{Parameters, Field} 7 | import _root_.util.ParameterizedBundle 8 | 9 | case object DanaPtrBits extends Field[Int] 10 | case object DanaDataBits extends Field[Int] 11 | case object GlobalInfo extends Field[GlobalInfo_t] 12 | case object LayerInfo extends Field[LayerInfo_t] 13 | case object NeuronInfo extends Field[NeuronInfo_t] 14 | case object DecimalPointOffset extends Field[Int] 15 | case object SteepnessOffset extends Field[Int] 16 | 17 | class NnConfigHeader(implicit p: Parameters) extends ParameterizedBundle()(p) { 18 | val info = p(GlobalInfo) 19 | val weightsPointer = UInt(info.ptr_weights.W) 20 | val firstLayerPointer = UInt(info.ptr_first_layer.W) 21 | val totalLayers = UInt(info.total_layers.W) 22 | val totalNeurons = UInt(info.total_neurons.W) 23 | val totalWeightBlocks = UInt(info.total_weight_blocks.W) 24 | val _unused = UInt(info._unused_0.W) 25 | val elementsPerBlockCode = UInt(info.binary_format.W) 26 | val errorFunction = UInt(info.error_function.W) 27 | val decimalPoint = UInt(info.decimal_point.W) 28 | } 29 | 30 | class NnConfigLayer(implicit p: Parameters) extends ParameterizedBundle()(p) { 31 | val info = p(LayerInfo) 32 | val neuronsInPreviousLayer = UInt(info.num_neurons_previous.W) 33 | val neuronsInLayer = UInt(info.num_neurons.W) 34 | val neuronPointer = UInt(info.ptr_neuron.W) 35 | } 36 | 37 | class NnConfigNeuron(implicit p: Parameters) extends ParameterizedBundle()(p) { 38 | val info = p(NeuronInfo) 39 | val bias = SInt(info.bias.W) 40 | val _unused_1 = UInt(info._unused_1.W) 41 | val _unused_0 = UInt(info._unused_0.W) 42 | val steepness = UInt(info.steepness.W) 43 | val activationFunction = UInt(info.activation_function.W) 44 | val numberOfWeights = UInt(info.num_weights.W) 45 | val weightOffset = UInt(info.ptr_weight_offset.W) 46 | } 47 | -------------------------------------------------------------------------------- /src/main/scala/dana/util/Memory.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | // See LICENSE.IBM for license details. 3 | 4 | package dana.util 5 | 6 | import chisel3._ 7 | import chisel3.util._ 8 | import cde._ 9 | 10 | import dana._ 11 | 12 | class MemoryInterface(implicit p: Parameters) extends DanaBundle()(p) { 13 | val cache = Flipped(new CacheAntwInterface) 14 | } 15 | 16 | class Memory(implicit p: Parameters) extends DanaModule()(p) { 17 | val io = IO(new MemoryInterface) 18 | 19 | // The output is connected, but does not do anything. So, these 20 | // values are just set to defaults. 21 | io.cache.cmd.ready := true.B 22 | io.cache.load.valid := false.B 23 | io.cache.load.bits.done := false.B 24 | io.cache.load.bits.data := 0.U((elementsPerBlock * elementWidth).W) 25 | io.cache.load.bits.cacheIndex := 0.U(log2Up(cacheNumEntries).W) 26 | io.cache.load.bits.addr := 0.U(log2Up(cacheNumBlocks).W) 27 | 28 | // Assertions 29 | 30 | // This module doesn't do anything, but we should be concerned if 31 | // the cache starts talking to it. Consequently, I have an assertion 32 | // here that will fire if we see an inbound request. 33 | assert(!(io.cache.cmd.valid === true.B), 34 | "Black box memory module received a valid request from the cache") 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/dana/util/Util.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package dana.util 4 | 5 | import chisel3._ 6 | 7 | object divUp { 8 | def apply(dividend: Int, divisor: Int): Int = { 9 | (dividend + divisor - 1) / divisor} 10 | } 11 | 12 | object packInfo { 13 | def apply(epb: Int, pes: Int, cache: Int): Int = { 14 | var x = epb << (6 + 4); 15 | x = x | pes << 4; 16 | x = x | cache; 17 | x} 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/rocketchip/Configs.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | // See LICENSE.IBM for license details. 3 | 4 | package rocketchip 5 | 6 | import chisel3._ 7 | import rocket._ 8 | import cde._ 9 | 10 | class HasDanaRocc extends Config ( topDefinitions = { 11 | (pname,site,here) => pname match { 12 | case BuildRoCC => Seq( 13 | RoccParameters( 14 | opcodes = OpcodeSet.custom0, 15 | generator = (p: Parameters) => Module(new xfiles.XFiles()(p)), 16 | nPTWPorts = 1)) 17 | case RoccMaxTaggedMemXacts => 1 18 | case uncore.agents.CacheName => "L1D" 19 | }}) 20 | 21 | class DanaEmulatorConfig extends Config ( 22 | new HasDanaRocc ++ 23 | new xfiles.DefaultXFilesConfig ++ 24 | new dana.DanaConfig( 25 | numPes = 4, 26 | cache = 1, 27 | cacheSize = 512 * 1024, 28 | scratchpad = 16 * 1024) ++ 29 | new dana.DefaultDanaConfig ++ 30 | new BaseConfig) 31 | -------------------------------------------------------------------------------- /src/main/scala/rocketchip/XFilesDanaTestSuite.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles 4 | 5 | import chisel3._ 6 | import scala.collection.mutable.LinkedHashSet 7 | 8 | class AssemblyTests(rocc: String, testDir: String, 9 | names: LinkedHashSet[String])(envName: String) extends 10 | rocketchip.AssemblyTestSuite(rocc + "-" + testDir, names)(envName) { 11 | override val dir = s"$$(base_dir)/$rocc/tests/build/$testDir" 12 | } 13 | 14 | class RegressionTests(rocc: String, testDir: String, 15 | names: LinkedHashSet[String]) extends rocketchip.RegressionTestSuite(names) { 16 | override val dir = s"$$(base_dir)/$rocc/tests/build/$testDir" 17 | } 18 | 19 | object XFilesDanaTestSuites { 20 | val smoke = LinkedHashSet ( 21 | "debug", 22 | "id" 23 | ) 24 | 25 | val nets = LinkedHashSet ( 26 | "xorSigmoidSymmetric", 27 | "xorSigmoidSymmetric-smp" 28 | ) 29 | 30 | val xfilesDanaRegrTestNames = LinkedHashSet ( 31 | "xfiles-dana-smoke-p-debug", 32 | "xfiles-dana-nets-p-xorSigmoidSymmetric", 33 | "xfiles-dana-nets-p-xorSigmoidSymmetric-smp" 34 | ) 35 | 36 | val xfilesDanaSmoke = new AssemblyTests("xfiles-dana", "smoke", smoke)(_) 37 | val xfilesDanaNets = new AssemblyTests("xfiles-dana", "nets", nets)(_) 38 | val xfilesDanaRegressions = new RegressionTests("xfiles-dana", "all_tests", 39 | xfilesDanaRegrTestNames) 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/standalone/Configs.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles.standalone 4 | 5 | import cde._ 6 | import rocket._ 7 | import rocketchip._ 8 | import uncore.tilelink._ 9 | 10 | class AsStandalone extends Config ( topDefinitions = { 11 | (pname, site, here) => { 12 | pname match { 13 | case TileLinkRAMSize => 1024 * 1024} 14 | }}) 15 | -------------------------------------------------------------------------------- /src/main/scala/standalone/DebugTests.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles.standalone 4 | 5 | import chisel3._ 6 | import chisel3.util._ 7 | import cde._ 8 | import xfiles.XFilesUserRequests 9 | 10 | class DebugTester(implicit p: Parameters) extends XFilesTester()(p) { 11 | // val s_INIT :: s_WRITE :: s_READ :: s_DONE :: Nil = Enum(UInt(), 4) 12 | // val t_ECHO :: t_UTLW :: t_UTLR :: t_L1R :: t_L1W :: t_V2P :: Nil = 13 | // Enum(UInt(), 6) 14 | // val lastTest = t_UTLR 15 | 16 | // val state = Reg(init = s_INIT) 17 | // val test = Reg(init = t_ECHO) 18 | 19 | // when (state === s_INIT) { state := s_WRITE } 20 | 21 | // val data = Seq(0xaaaa, 0xbbbb, 0xcccc, 0xdddd) 22 | // when (state === s_WRITE) { 23 | // switch (test) { 24 | // is (t_ECHO) { debug_echo_via_reg(data(0)) } 25 | // is (t_L1W) { debug_write_mem(0, data(1)) } 26 | // is (t_L1R) { debug_read_mem(0) } 27 | // is (t_V2P) { debug_virt_to_phys(0) } 28 | // is (t_UTLW) { debug_write_utl(0, data(2)) } 29 | // is (t_UTLR) { debug_read_utl(0x0) } 30 | // } 31 | // state := s_READ 32 | // } 33 | 34 | // when (state === s_READ && dut.io.resp.fire()) { 35 | // printf("[INFO] Saw response 0x%x\n", dut.io.resp.bits.data) 36 | // state := Mux(test === lastTest, s_DONE, s_WRITE) 37 | // test := test + UInt(1) 38 | // val r = dut.io.resp.bits.data 39 | // switch (test) { 40 | // is (t_ECHO) { assert(r===UInt(data(0)), "XFiles did not echo sent data") } 41 | // is (t_L1W) { } 42 | // is (t_L1R) { } 43 | // is (t_V2P) { } 44 | // is (t_UTLW) { } 45 | // is (t_UTLR) { assert(r===UInt(data(2)), "XFiles bad read over AUTL") } 46 | // } 47 | // } 48 | 49 | // when (state === s_DONE) { stop() } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/standalone/Main.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles.standalone 4 | 5 | // import chisel3._ 6 | import chisel3.internal.firrtl.Circuit 7 | // import java.io._ 8 | import _root_.util.GeneratorApp 9 | 10 | object Standalone extends GeneratorApp { 11 | val longName = names.topModuleProject + "." + names.configs 12 | generateFirrtl 13 | } 14 | -------------------------------------------------------------------------------- /src/main/scala/standalone/Standalone.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles.standalone 4 | 5 | import chisel3._ 6 | import chisel3.util._ 7 | import chisel3.testers.BasicTester 8 | import cde._ 9 | import rocket.{RoCCCommand, RoCCResponse, RoCC, RoccNPTWPorts} 10 | import xfiles._ 11 | import dana._ 12 | import uncore.devices.TileLinkTestRAM 13 | import uncore.tilelink.HasTileLinkParameters 14 | 15 | case object TileLinkRAMSize extends Field[Int] 16 | 17 | class HoneyPot[T <: Bundle](name: String = "", fatal: Boolean = true) extends Module { 18 | val io = IO(new Bundle { 19 | val req = Flipped(Decoupled(new Bundle{})) 20 | val resp = Valid(new Bundle{}) 21 | }) 22 | 23 | io.req.ready := true.B 24 | io.resp.valid := false.B 25 | val i = s"Module tried to access HoneyPot $name" 26 | if (fatal) 27 | assert(!(io.req.valid), i) 28 | else 29 | when (io.req.valid) { printf(s"[WARN] HoneyPot: $i") } 30 | } 31 | 32 | class RoccTester[T <: RoCC](gen: => T)(implicit val p: Parameters) 33 | extends Module with HasTileLinkParameters { 34 | val io = IO(new Bundle { 35 | val cmd = Flipped(Decoupled(new RoCCCommand)) 36 | val resp = Decoupled(new RoCCResponse) 37 | val busy = Output(Bool()) 38 | val interrupt = Output(Bool()) 39 | val exception = Output(Bool()) 40 | }) 41 | val dut = gen 42 | 43 | // Memory Honeypot 44 | val mem = Module(new HoneyPot(name="Memory")) 45 | mem.io.req.valid := dut.io.mem.req.valid 46 | dut.io.mem.req.ready := mem.io.req.ready 47 | dut.io.mem.resp.valid := mem.io.resp.valid 48 | 49 | // Real AUTL 50 | val autl = Module(new TileLinkTestRAM(p(TileLinkRAMSize)/tlDataBits)(p)) 51 | autl.io <> dut.io.autl 52 | 53 | // PTW Honeypot 54 | val ptw = Vec(Seq.fill(p(RoccNPTWPorts))(Module(new HoneyPot(name="PTW")).io)) 55 | ptw.zipWithIndex map { case (p, i) => 56 | p.req.valid := dut.io.ptw(i).req.valid 57 | dut.io.ptw(i).req.ready := p.req.ready 58 | dut.io.ptw(i).resp.valid := p.resp.valid 59 | } 60 | 61 | // Expose the internal Cmd/Resp bits 62 | io.cmd <> dut.io.cmd 63 | io.resp <> dut.io.resp 64 | } 65 | 66 | class XFilesTester(implicit p: Parameters) extends RoccTester(Module(new XFiles))(p) 67 | -------------------------------------------------------------------------------- /src/main/scala/standalone/XFilesTests.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles.standalone 4 | 5 | import chisel3._ 6 | import chisel3.util._ 7 | import cde._ 8 | import xfiles.XFilesUserRequests 9 | 10 | abstract class XFilesTests(implicit p: Parameters) extends XFilesTester { 11 | // New Transaction 12 | 13 | 14 | // Poll Until done 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/util/QueueAf.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | package xfiles 4 | 5 | import chisel3._ 6 | import chisel3.util._ 7 | 8 | class QueueIOAf[T <: Data](gen: T, entries: Int) extends QueueIO[T](gen, entries) { 9 | val almostFull = Output(Bool()) 10 | override def cloneType = new QueueIOAf(gen, entries).asInstanceOf[this.type] 11 | } 12 | 13 | class QueueAf[T <: Data](gen: T, entries: Int, almostFullEntries: Int, 14 | pipe: Boolean = false, flow: Boolean = false, 15 | override_reset: Option[Bool] = None) 16 | extends Module(override_reset = override_reset) { 17 | 18 | val io = IO(new QueueIOAf(gen, entries)) 19 | val queue = Module(new Queue(gen, entries, pipe, flow, override_reset)) 20 | 21 | io.enq <> queue.io.enq 22 | io.deq <> queue.io.deq 23 | io.count := queue.io.count 24 | io.almostFull := queue.io.count >= almostFullEntries.U 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/util/SRAM.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | // See LICENSE.IBM for license details. 3 | 4 | package dana 5 | 6 | import chisel3._ 7 | import chisel3.util._ 8 | 9 | class SRAMInterface( 10 | val dataWidth: Int, 11 | val numReadPorts: Int, 12 | val numWritePorts: Int, 13 | val numReadWritePorts: Int, 14 | val sramDepth: Int 15 | ) extends Bundle { 16 | override def cloneType = new SRAMInterface( 17 | dataWidth = dataWidth, 18 | numReadPorts = numReadPorts, 19 | numWritePorts = numWritePorts, 20 | numReadWritePorts = numReadWritePorts, 21 | sramDepth = sramDepth 22 | ).asInstanceOf[this.type] 23 | // Data Input 24 | val din = Input(Vec(numReadWritePorts, UInt(dataWidth.W))) 25 | val dinW = Input(Vec(numWritePorts, UInt(dataWidth.W))) 26 | // Data Output 27 | val dout = Output(Vec(numReadWritePorts, UInt(dataWidth.W))) 28 | val doutR = Output(Vec(numReadPorts, UInt(dataWidth.W))) 29 | // Addresses 30 | val addr = Input(Vec(numReadWritePorts, UInt(log2Up(sramDepth).W))) 31 | val addrR = Input(Vec(numReadPorts, UInt(log2Up(sramDepth).W))) 32 | val addrW = Input(Vec(numWritePorts, UInt(log2Up(sramDepth).W))) 33 | // Write enable 34 | val we = Input(Vec(numReadWritePorts, Bool())) 35 | val weW = Input(Vec(numWritePorts, Bool())) 36 | // Read enable 37 | val re = Input(Vec(numReadWritePorts, Bool())) 38 | val reR = Input(Vec(numReadPorts, Bool())) 39 | } 40 | 41 | class SRAM ( 42 | val id: Int = 0, 43 | val dataWidth: Int = 8, 44 | val sramDepth: Int = 64, 45 | val numReadPorts: Int = 0, 46 | val numWritePorts: Int = 0, 47 | val numReadWritePorts: Int = 2, 48 | val initSwitch: Int = -1, 49 | val elementsPerBlock: Int = -1 50 | ) extends Module { 51 | val io = IO(new SRAMInterface( 52 | numReadPorts = numReadPorts, 53 | numWritePorts = numWritePorts, 54 | numReadWritePorts = numReadWritePorts, 55 | dataWidth = dataWidth, 56 | sramDepth = sramDepth)) 57 | 58 | val mem = SeqMem(sramDepth, UInt(dataWidth.W)) 59 | 60 | for (i <- 0 until numReadWritePorts) { 61 | when (io.we(i)) { mem(io.addr(i)) := io.din(i) } 62 | when (io.re(i)) { io.dout(i) := mem(io.addr(i)) }} 63 | 64 | for (i <- 0 until numReadPorts) { 65 | when (io.reR(i)) { io.doutR(i) := mem(io.addrR(i)) }} 66 | 67 | for (i <- 0 until numWritePorts) { 68 | when (io.weW(i)) { mem(io.addrW(i)) := io.dinW(i) }} 69 | } 70 | 71 | class SRAMSinglePortInterface( 72 | val dataWidth: Int, 73 | val sramDepth: Int 74 | ) extends Bundle { 75 | override def cloneType = new SRAMDualPortInterface( 76 | dataWidth = dataWidth, 77 | sramDepth = sramDepth).asInstanceOf[this.type] 78 | val we = Output(Bool()) 79 | val din = Output(UInt(dataWidth.W)) 80 | val addr = Output(UInt(log2Up(sramDepth).W)) 81 | val dout = Input(UInt(dataWidth.W)) 82 | } 83 | 84 | class SRAMDualPortInterface( 85 | val dataWidth: Int, 86 | val sramDepth: Int 87 | ) extends Bundle { 88 | override def cloneType = new SRAMDualPortInterface( 89 | dataWidth = dataWidth, 90 | sramDepth = sramDepth).asInstanceOf[this.type] 91 | val we = Output(Vec(2, Bool())) 92 | val din = Output(Vec(2, UInt(dataWidth.W))) 93 | val addr = Output(Vec(2, UInt(log2Up(sramDepth).W))) 94 | val dout = Input(Vec(2, UInt(dataWidth.W))) 95 | } 96 | 97 | class SRAMDualPort( 98 | val dataWidth: Int, 99 | val sramDepth: Int 100 | ) extends Module { 101 | val io = Flipped(new SRAMDualPortInterface( 102 | dataWidth = dataWidth, 103 | sramDepth = sramDepth)) 104 | val sram = Module(new SRAM( 105 | dataWidth = dataWidth, 106 | numReadPorts = 0, 107 | numWritePorts = 0, 108 | numReadWritePorts = 2, 109 | initSwitch = -1, 110 | elementsPerBlock = -1, 111 | sramDepth = sramDepth)).io 112 | 113 | for (i <- 0 until 2) { 114 | sram.we(i) := io.we(i) 115 | sram.din(i) := io.din(i) 116 | sram.addr(i) := io.addr(i) 117 | io.dout(i) := sram.dout(i) 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/scala/util/SRAMElement.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | // See LICENSE.IBM for license details. 3 | 4 | package dana 5 | 6 | import chisel3._ 7 | import chisel3.util._ 8 | 9 | // [TODO] Miscellaneous todos: 10 | // * A read immediately following a write is going to result in screwy 11 | // behavior. It's reasonable that either this should be prohibited 12 | // via pushback on the asynchronous inteface or with an assertion. 13 | 14 | class SRAMElementInterface ( 15 | override val dataWidth: Int, 16 | override val sramDepth: Int, 17 | override val numPorts: Int, 18 | val elementWidth: Int 19 | ) extends SRAMVariantInterface(dataWidth, sramDepth, numPorts) { 20 | override def cloneType = new SRAMElementInterface( 21 | dataWidth = dataWidth, 22 | sramDepth = sramDepth, 23 | numPorts = numPorts, 24 | elementWidth = elementWidth).asInstanceOf[this.type] 25 | val dinElement = Input(Vec(numPorts, UInt(elementWidth.W))) 26 | override val addr = Input(Vec(numPorts, 27 | UInt((log2Up(sramDepth) + log2Up(dataWidth / elementWidth)).W))) 28 | } 29 | 30 | class WritePendingBundle ( 31 | val elementWidth: Int, 32 | val dataWidth: Int, 33 | val sramDepth: Int 34 | ) extends Bundle { 35 | override def cloneType = new WritePendingBundle ( 36 | elementWidth = elementWidth, 37 | dataWidth = dataWidth, 38 | sramDepth = sramDepth).asInstanceOf[this.type] 39 | val valid = Bool() 40 | val data = UInt(elementWidth.W) 41 | val addrHi = UInt(log2Up(sramDepth).W) 42 | val addrLo = UInt(log2Up(dataWidth / elementWidth).W) 43 | } 44 | 45 | // A special instance of the generic SRAM that allows for masked 46 | // writes to the SRAM. Reads happen normally, but writes happen using 47 | // a 2-cyle read-modify-write operation. Due to the nature of this 48 | // operation, each write port needs an associated read port. 49 | // Consequently, this only has RW ports. 50 | class SRAMElement ( 51 | override val dataWidth: Int = 32, 52 | override val sramDepth: Int = 64, 53 | override val numPorts: Int = 1, 54 | val elementWidth: Int = 8 55 | ) extends SRAMVariant(dataWidth, sramDepth, numPorts) { 56 | override lazy val io = IO(new SRAMElementInterface( 57 | dataWidth = dataWidth, 58 | sramDepth = sramDepth, 59 | numPorts = numPorts, 60 | elementWidth = elementWidth 61 | )) 62 | 63 | val elementsPerBlock = divUp(dataWidth, elementWidth) 64 | 65 | def index(j: Int): (Int, Int) = (elementWidth*(j+1) - 1, elementWidth * j) 66 | 67 | val addr = Vec(numPorts, new Bundle { 68 | val addrHi = Wire(UInt(log2Up(sramDepth).W)) 69 | val addrLo = Wire(UInt(log2Up(elementsPerBlock).W))}) 70 | 71 | val writePending = Reg(Vec(numPorts, new WritePendingBundle( 72 | elementWidth = elementWidth, 73 | dataWidth = dataWidth, 74 | sramDepth = sramDepth))) 75 | 76 | val tmp = Wire(Vec(numPorts, Vec(elementsPerBlock, UInt(elementWidth.W) ))) 77 | val forwarding = Wire(Vec(numPorts, Bool())) 78 | 79 | // Combinational Logic 80 | for (i <- 0 until numPorts) { 81 | // Assign the addresses 82 | addr(i).addrHi := io.addr(i).asUInt()( 83 | log2Up(sramDepth * elementsPerBlock) - 1, log2Up(elementsPerBlock)) 84 | addr(i).addrLo := io.addr(i)(log2Up(elementsPerBlock) - 1, 0) 85 | 86 | val fwd = (io.we(i) && writePending(i).valid && 87 | addr(i).addrHi === writePending(i).addrHi) 88 | 89 | // Connections to the sram 90 | sram.weW(i) := writePending(i).valid 91 | sram.dinW(i) := tmp(i) 92 | sram.addrW(i) := writePending(i).addrHi 93 | sram.addrR(i) := addr(i).addrHi 94 | sram.reR(i) := io.re(i) || (io.we(i) && !fwd) 95 | io.dout(i) := sram.doutR(i) 96 | 97 | // Defaults 98 | val doutRTupled = (((x: Int, y: Int) => sram.doutR(i)(x, y)) tupled) 99 | (0 until elementsPerBlock).map(j => tmp(i)(j) := doutRTupled(index(j))) 100 | forwarding(i) := fwd 101 | 102 | when (writePending(i).valid) { 103 | // Write the element 104 | tmp(i)(writePending(i).addrLo) := writePending(i).data 105 | // Write the forwarded element if needed 106 | when (forwarding(i)) { 107 | tmp(i)(writePending(i).addrLo) := io.dinElement(i) }}} 108 | 109 | // Sequential Logic 110 | for (i <- 0 until numPorts) { 111 | // Assign the pending write data 112 | writePending(i).valid := false.B 113 | when (io.we(i) && (forwarding(i) === false.B)) { 114 | writePending(i).valid := true.B 115 | writePending(i).data := io.dinElement(i) 116 | writePending(i).addrHi := addr(i).addrHi 117 | writePending(i).addrLo := addr(i).addrLo }} 118 | } 119 | -------------------------------------------------------------------------------- /src/main/scala/util/SRAMVariant.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | package dana 4 | 5 | import chisel3._ 6 | import chisel3.util._ 7 | import scala.Array 8 | import scala.math.min 9 | 10 | case class Dimension(width: Int, height: Int) 11 | 12 | class SRAMVariantInterface( 13 | val dataWidth: Int, 14 | val sramDepth: Int, 15 | val numPorts: Int 16 | ) extends Bundle { 17 | override def cloneType = new SRAMVariantInterface( 18 | dataWidth = dataWidth, 19 | sramDepth = sramDepth, 20 | numPorts = numPorts).asInstanceOf[this.type] 21 | val we = Input(Vec(numPorts, Bool())) 22 | val re = Input(Vec(numPorts, Bool())) 23 | val din = Input(Vec(numPorts, UInt(dataWidth.W))) 24 | val addr = Input(Vec(numPorts, UInt(log2Up(sramDepth).W))) 25 | val dout = Output(Vec(numPorts, UInt(dataWidth.W))) 26 | } 27 | 28 | class SRAMVariant( 29 | val id: Int = 0, 30 | val dataWidth: Int = 32, 31 | val sramDepth: Int = 64, 32 | val numPorts: Int = 1 33 | ) extends Module { 34 | 35 | def writeElement(a: Vec[UInt], index: UInt, b: UInt) { a(index) := b } 36 | 37 | def divUp (dividend: Int, divisor: Int): Int = { 38 | (dividend + divisor - 1) / divisor} 39 | 40 | lazy val io = IO(new SRAMVariantInterface( 41 | dataWidth = dataWidth, 42 | sramDepth = sramDepth, 43 | numPorts = numPorts)) 44 | 45 | val blockSize = Dimension(min(32, dataWidth), min(1024, sramDepth)) 46 | val rows = divUp(sramDepth, blockSize.height) 47 | val cols = divUp(dataWidth, blockSize.width) 48 | require(dataWidth % blockSize.width == 0) 49 | 50 | val blockRows = Seq.fill(rows)(Wire(new SRAMInterface(dataWidth = dataWidth, 51 | numReadPorts = numPorts, 52 | numWritePorts = numPorts, 53 | numReadWritePorts = 0, 54 | sramDepth = blockSize.height))) 55 | for (r <- 0 until rows) { 56 | val srams = Seq.fill(cols)(Module(new SRAM( 57 | id = id, 58 | dataWidth = blockSize.width, 59 | sramDepth = blockSize.height, 60 | numReadPorts = numPorts, 61 | numWritePorts = numPorts, 62 | numReadWritePorts = 0))) 63 | for (i <- 0 until numPorts) { 64 | for (c <- 0 until cols) { 65 | srams(c).io.weW(i) := blockRows(r).weW(i) 66 | srams(c).io.dinW(i) := blockRows(r).dinW(i)((c + 1) * blockSize.width - 1, c * blockSize.width) 67 | srams(c).io.addrW(i) := blockRows(r).addrW(i) 68 | srams(c).io.reR(i) := blockRows(r).reR(i) 69 | srams(c).io.addrR(i) := blockRows(r).addrR(i) 70 | } 71 | blockRows(r).doutR(i) := srams.map(a => a.io.doutR(i)).reverse.reduce((a, b) => a ## b) 72 | } 73 | } 74 | val sram = Wire(new SRAMInterface(dataWidth = dataWidth, 75 | numReadPorts = numPorts, 76 | numWritePorts = numPorts, 77 | numReadWritePorts = 0, 78 | sramDepth = sramDepth)) 79 | for (i <- 0 until numPorts) { 80 | sram.weW(i) := io.we(i) 81 | sram.addrW(i) := io.addr(i) 82 | sram.dinW(i) := io.din(i) 83 | sram.reR(i) := io.re(i) 84 | sram.addrR(i) := io.addr(i) 85 | io.dout(i) := sram.doutR(i) 86 | 87 | val (bankR, bankW) = rows compare 1 match { 88 | case 0 => (0.U, 0.U) 89 | case 1 => ( 90 | sram.addrR(i)(log2Up(sramDepth) - 1, log2Up(blockSize.height)), 91 | sram.addrW(i)(log2Up(sramDepth) - 1, log2Up(blockSize.height))) 92 | case _ => 93 | throw new Exception(s"Unable to determine banking for rows: ${rows}") 94 | } 95 | blockRows.zipWithIndex.map { case(row, rowIdx) => { 96 | row.weW(i) := sram.weW(i) && bankW === rowIdx.U; 97 | row.addrW(i) := sram.addrW(i)(log2Up(blockSize.height) - 1, 0) 98 | row.dinW(i) := sram.dinW(i) 99 | 100 | row.reR(i) := sram.reR(i) && bankR === rowIdx.U; 101 | row.addrR(i) := sram.addrR(i)(log2Up(blockSize.height) - 1, 0) 102 | }} 103 | sram.doutR(i) := MuxLookup(RegNext(bankR), blockRows(0).doutR(i), (0 until rows). 104 | map (r => (r.U -> blockRows(r).doutR(i)))) 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/scala/xfiles/Backend.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | // See LICENSE.IBM for license details. 3 | 4 | package xfiles 5 | 6 | import chisel3._ 7 | import chisel3.util._ 8 | import rocket.{RoCCInterface, HasCoreParameters, HellaCacheReq} 9 | import cde._ 10 | import _root_.util.ParameterizedBundle 11 | 12 | case object BuildXFilesBackend extends Field[XFilesBackendParameters] 13 | case class XFilesBackendParameters( 14 | generator: Parameters => XFilesBackend, 15 | csrFile_gen: Parameters => CSRFile, 16 | csrStatus_gen: Parameters => XFStatus, 17 | csrProbes_gen: Parameters => BackendProbes, 18 | info: Long = 0 19 | ) 20 | 21 | class XFilesBackendReq(implicit p: Parameters) extends XFilesBundle()(p) { 22 | val tidx = Decoupled(UInt(log2Up(transactionTableNumEntries).W)) 23 | } 24 | 25 | class XFilesBackendResp(implicit p: Parameters) extends XFilesBundle()(p) { 26 | val tidx = Valid(UInt(log2Up(transactionTableNumEntries).W)) 27 | val flags = Output(new Bundle with FlagsVDIO) 28 | } 29 | 30 | class XFilesRs1Rs2Funct(implicit val p: Parameters) 31 | extends ParameterizedBundle()(p) with HasCoreParameters { 32 | val rs1 = UInt(xLen.W) 33 | val rs2 = UInt(xLen.W) 34 | val funct = UInt(7.W) 35 | } 36 | 37 | class XFilesQueueInterface(implicit p: Parameters) extends XFilesBundle()(p) { 38 | val tidxIn = Output(UInt(log2Up(transactionTableNumEntries).W)) 39 | val tidxOut = Output(UInt(log2Up(transactionTableNumEntries).W)) 40 | // The naming here follows what is connected to the XF TTable Input 41 | // and Ouptut queues. Alternatively, this is from the perspective of 42 | // data flowing into (in) and out of (out) the backend 43 | val in = Flipped(Decoupled(new XFilesRs1Rs2Funct)) 44 | val out = Decoupled(UInt(xLen.W)) 45 | } 46 | 47 | class XFilesBackendInterface(implicit p: Parameters) extends XFilesBundle()(p) { 48 | val rocc = new RoCCInterface 49 | val xfReq = Flipped(new XFilesBackendReq) 50 | val xfResp = new XFilesBackendResp 51 | val xfQueue = new XFilesQueueInterface 52 | val status = Input(p(BuildXFilesBackend).csrStatus_gen(p)) 53 | lazy val probes_backend = Output(p(BuildXFilesBackend).csrProbes_gen(p)) 54 | } 55 | 56 | trait AsicFlowSafety extends XFilesBackend { 57 | io.rocc.mem.req.bits := (new HellaCacheReq).fromBits(0.U) 58 | } 59 | 60 | trait UserSafety extends XFilesBackend { 61 | io.rocc.mem.req.valid := false.B 62 | io.rocc.mem.invalidate_lr := false.B 63 | io.rocc.mem.req.bits.phys := true.B 64 | 65 | io.rocc.busy := false.B 66 | io.rocc.cmd.ready := true.B 67 | io.rocc.resp.valid := false.B 68 | 69 | io.probes_backend.interrupt := false.B 70 | } 71 | 72 | class XFilesBackend(implicit p: Parameters) extends XFilesModule()(p) { 73 | lazy val io = IO(new XFilesBackendInterface) 74 | } 75 | -------------------------------------------------------------------------------- /src/main/scala/xfiles/Configs.scala: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | package xfiles 4 | 5 | import chisel3._ 6 | import cde._ 7 | import dana.DefaultDanaConfig 8 | 9 | class DefaultXFilesConfig extends Config ( topDefinitions = { 10 | (pname,site,here) => 11 | pname match { 12 | case TidWidth => 16 13 | case AsidWidth => 16 14 | case TableDebug => true 15 | case TransactionTableNumEntries => 2 16 | case TransactionTableQueueSize => 32 17 | case EnablePrintfs => true 18 | case EnableAsserts => true 19 | case _ => throw new CDEMatchError 20 | }} 21 | ) 22 | -------------------------------------------------------------------------------- /src/main/verilog/standalone.v: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | module top 4 | (input clock, 5 | input reset); 6 | 7 | wire io_cmd_ready; 8 | wire io_cmd_valid; 9 | wire [6:0] io_cmd_bits_inst_funct; 10 | wire [4:0] io_cmd_bits_inst_rs2; 11 | wire [4:0] io_cmd_bits_inst_rs1; 12 | wire io_cmd_bits_inst_xd; 13 | wire io_cmd_bits_inst_xs1; 14 | wire io_cmd_bits_inst_xs2; 15 | wire [4:0] io_cmd_bits_inst_rd; 16 | wire [6:0] io_cmd_bits_inst_opcode; 17 | wire [63:0] io_cmd_bits_rs1; 18 | wire [63:0] io_cmd_bits_rs2; 19 | wire io_cmd_bits_status_debug; 20 | wire [31:0] io_cmd_bits_status_isa; 21 | wire [1:0] io_cmd_bits_status_prv; 22 | wire io_cmd_bits_status_sd; 23 | wire [30:0] io_cmd_bits_status_zero3; 24 | wire io_cmd_bits_status_sd_rv32; 25 | wire [1:0] io_cmd_bits_status_zero2; 26 | wire [4:0] io_cmd_bits_status_vm; 27 | wire [3:0] io_cmd_bits_status_zero1; 28 | wire io_cmd_bits_status_mxr; 29 | wire io_cmd_bits_status_pum; 30 | wire io_cmd_bits_status_mprv; 31 | wire [1:0] io_cmd_bits_status_xs; 32 | wire [1:0] io_cmd_bits_status_fs; 33 | wire [1:0] io_cmd_bits_status_mpp; 34 | wire [1:0] io_cmd_bits_status_hpp; 35 | wire io_cmd_bits_status_spp; 36 | wire io_cmd_bits_status_mpie; 37 | wire io_cmd_bits_status_hpie; 38 | wire io_cmd_bits_status_spie; 39 | wire io_cmd_bits_status_upie; 40 | wire io_cmd_bits_status_mie; 41 | wire io_cmd_bits_status_hie; 42 | wire io_cmd_bits_status_sie; 43 | wire io_cmd_bits_status_uie; 44 | wire io_resp_ready; 45 | wire io_resp_valid; 46 | wire [4:0] io_resp_bits_rd; 47 | wire [63:0] io_resp_bits_dat; 48 | 49 | ROCC rocc 50 | (.clock(clock), 51 | .reset(reset), 52 | .io_cmd_ready(io_cmd_ready), 53 | .io_cmd_valid(io_cmd_valid), 54 | .io_cmd_bits_inst_funct(io_cmd_bits_inst_funct), 55 | .io_cmd_bits_inst_rs2(io_cmd_bits_inst_rs2), 56 | .io_cmd_bits_inst_rs1(io_cmd_bits_inst_rs1), 57 | .io_cmd_bits_inst_xd(io_cmd_bits_inst_xd), 58 | .io_cmd_bits_inst_xs1(io_cmd_bits_inst_xs1), 59 | .io_cmd_bits_inst_xs2(io_cmd_bits_inst_xs2), 60 | .io_cmd_bits_inst_rd(io_cmd_bits_inst_rd), 61 | .io_cmd_bits_inst_opcode(io_cmd_bits_inst_opcode), 62 | .io_cmd_bits_rs1(io_cmd_bits_rs1), 63 | .io_cmd_bits_rs2(io_cmd_bits_rs2), 64 | .io_cmd_bits_status_debug(io_cmd_bits_status_debug), 65 | .io_cmd_bits_status_isa(io_cmd_bits_status_isa), 66 | .io_cmd_bits_status_prv(io_cmd_bits_status_prv), 67 | .io_cmd_bits_status_sd(io_cmd_bits_status_sd), 68 | .io_cmd_bits_status_zero3(io_cmd_bits_status_zero3), 69 | .io_cmd_bits_status_sd_rv32(io_cmd_bits_status_sd_rv32), 70 | .io_cmd_bits_status_zero2(io_cmd_bits_status_zero2), 71 | .io_cmd_bits_status_vm(io_cmd_bits_status_vm), 72 | .io_cmd_bits_status_zero1(io_cmd_bits_status_zero1), 73 | .io_cmd_bits_status_mxr(io_cmd_bits_status_mxr), 74 | .io_cmd_bits_status_pum(io_cmd_bits_status_pum), 75 | .io_cmd_bits_status_mprv(io_cmd_bits_status_mprv), 76 | .io_cmd_bits_status_xs(io_cmd_bits_status_xs), 77 | .io_cmd_bits_status_fs(io_cmd_bits_status_fs), 78 | .io_cmd_bits_status_mpp(io_cmd_bits_status_mpp), 79 | .io_cmd_bits_status_hpp(io_cmd_bits_status_hpp), 80 | .io_cmd_bits_status_spp(io_cmd_bits_status_spp), 81 | .io_cmd_bits_status_mpie(io_cmd_bits_status_mpie), 82 | .io_cmd_bits_status_hpie(io_cmd_bits_status_hpie), 83 | .io_cmd_bits_status_spie(io_cmd_bits_status_spie), 84 | .io_cmd_bits_status_upie(io_cmd_bits_status_upie), 85 | .io_cmd_bits_status_mie(io_cmd_bits_status_mie), 86 | .io_cmd_bits_status_hie(io_cmd_bits_status_hie), 87 | .io_cmd_bits_status_sie(io_cmd_bits_status_sie), 88 | .io_cmd_bits_status_uie(io_cmd_bits_status_uie), 89 | .io_resp_ready(io_resp_ready), 90 | .io_resp_valid(io_resp_valid), 91 | .io_resp_bits_rd(io_resp_bits_rd), 92 | .funct(funct), 93 | .rd(rd), 94 | .rs1(rs1), 95 | .rs2(rs2)); 96 | 97 | function xcustom 98 | (input int funct, 99 | input int rd, 100 | input longint rs1, 101 | input longint rs2 102 | ); 103 | $display("[INFO] hello_from_verilog"); 104 | case (funct) 105 | // 0:; 106 | // 1:; 107 | // 2:; 108 | // 3:; 109 | endcase 110 | assert(funct >=0 && funct <= 4); 111 | 112 | endfunction 113 | export "DPI-C" function hello_from_verilog; 114 | import "DPI-C" context function void hello_from_c(); 115 | initial begin 116 | $display("[INFO] In initial block"); 117 | hello_from_c(); 118 | end 119 | 120 | endmodule 121 | -------------------------------------------------------------------------------- /src/test/cpp/rocc_test.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef SRC_TEST_CPP_ROCC_TEST_H_ 4 | #define SRC_TEST_CPP_ROCC_TEST_H_ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #if VM_TRACE 12 | #include 13 | #endif 14 | 15 | #include "src/test/cpp/xcustom.h" 16 | 17 | typedef struct { 18 | bool verbose; 19 | char * filename_vcd; 20 | char * filename_mem; 21 | long timeout; 22 | int exit_code; 23 | int nofail; 24 | int resolution; 25 | char * argv0; 26 | } t_options; 27 | 28 | class RoccTest { 29 | private: 30 | TOP_TYPE * t_; 31 | vluint64_t * main_time_; 32 | std::queue resp_; 33 | unsigned int half_; 34 | t_options opts_; 35 | #if VM_TRACE 36 | VerilatedVcdC * tfp_; 37 | #endif 38 | 39 | public: 40 | RoccTest(TOP_TYPE * top); 41 | ~RoccTest(); 42 | 43 | // Utility functions 44 | int parseOptions(int argc, char ** argv); 45 | 46 | // Low-level operations 47 | int tick(unsigned int num_cycles = 1, bool reset = false, bool debug = false); 48 | int reset(unsigned int num_cycles = 1); 49 | int finish(unsigned int drain_cycles = 1); 50 | int loadMemory(bool safe = false); 51 | RoccResp * popResp(); 52 | 53 | // RoCC Command-level functions 54 | int inst(const RoccCmd & cmd); 55 | bool instAndCheck(const RoccCmd & cmd, const RoccResp & resp); 56 | bool instAndCheck(const std::tuple & t); 57 | bool instAndCheck(const std::vector> & t); 58 | 59 | // Testcase functions 60 | int run(std::vector &); 61 | int run(void *); 62 | 63 | // Blindly run until the end 64 | int run(unsigned int num_cycles = -1); 65 | 66 | // Accessor functions 67 | bool isVerbose() { return opts_.verbose; } 68 | int numResp() { return resp_.size(); } 69 | vluint64_t getTime() { return *main_time_; } 70 | int exit_code() { return opts_.exit_code; } 71 | 72 | private: 73 | void usage(const char * name, const char * extra = NULL); 74 | }; 75 | 76 | #endif // SRC_TEST_CPP_ROCC_TEST_H_ 77 | -------------------------------------------------------------------------------- /src/test/cpp/t_debug.cpp: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include 4 | #include 5 | 6 | #include "src/test/cpp/rocc_test.h" 7 | #include "src/test/cpp/xfiles_debug.h" 8 | 9 | int main(int argc, char** argv) { 10 | Verilated::commandArgs(argc, argv); 11 | 12 | RoccTest test = RoccTest(new TOP_TYPE); 13 | if (test.parseOptions(argc, argv)) return test.finish(); 14 | if (test.isVerbose()) std::cout << "[INFO] Starting simulation!\n"; 15 | 16 | // Apply reset 17 | test.reset(1); 18 | done_reset = true; 19 | 20 | // Create all the instructions 21 | XFilesDebug g(0); 22 | std::vector> tests = { 23 | std::make_tuple(g.DebugEchoViaReg(0xdead), g.RespVal(0xdead)), 24 | std::make_tuple(g.DebugWriteUtl(0xf00d, 0x20), g.RespVal(0x0)), 25 | std::make_tuple(g.DebugReadUtl(0x20), g.RespVal(0xf00d)) 26 | }; 27 | 28 | // Run the tests 29 | test.instAndCheck(tests); 30 | if (test.exit_code() != 0) 31 | std::cerr << "[ERROR] Tests failed (count: " << test.exit_code() << ")\n"; 32 | else 33 | if(test.isVerbose()) std::cout << "[INFO] Test passed\n"; 34 | 35 | // Let the simulation run for a few more cycles 36 | for (int i = 0; i < tests.size(); ++i) { 37 | delete std::get<0>(tests[i]); 38 | delete std::get<1>(tests[i]); 39 | } 40 | return test.finish(); 41 | } 42 | -------------------------------------------------------------------------------- /src/test/cpp/transaction.cpp: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "transaction.h" 4 | 5 | transaction::transaction(fann * _ann, fann_type * _inputs, 6 | uint16_t _asid, uint32_t _nnid, 7 | unsigned int _decimal_point) { 8 | ann = _ann; 9 | asid = _asid; 10 | nnid = _nnid; 11 | num_input = fann_get_num_input(ann); 12 | num_output = fann_get_num_output(ann); 13 | count_in = 0; 14 | count_out = 0; 15 | count_reads = 0; 16 | decimal_point = _decimal_point; 17 | inputs.resize(num_input); 18 | outputs_fann.resize(num_output); 19 | for (int i = 0; i < num_input; i++) 20 | inputs[i] = (int32_t) (_inputs[i] * pow(2, decimal_point)); 21 | fann_type * tmp = fann_run(ann, _inputs); 22 | for (int i = 0; i < num_output; i++) 23 | outputs_fann[i] = tmp[i]; 24 | }; 25 | 26 | bool transaction::new_read() { 27 | count_reads++; 28 | return count_reads == num_output; 29 | }; 30 | 31 | int32_t transaction::get_input() { 32 | return inputs[count_in++]; 33 | }; 34 | 35 | bool transaction::done_in() { 36 | return count_in == num_input - 1; 37 | }; 38 | 39 | bool transaction::done_out() { 40 | return outputs.size() == num_output; 41 | }; 42 | 43 | void transaction::update_error(double bound) { 44 | error = 0; 45 | error_squared = 0; 46 | bound_failures = 0; 47 | bit_failures = 0; 48 | assert(outputs.size() == num_output); 49 | double err; 50 | for (int i = 0; i < num_output; i++) { 51 | err = outputs_fann[i] - (double) outputs[i] / pow(2.0, decimal_point); 52 | error += err; 53 | error_squared += err * err; 54 | // Check to see if we're violating an error bound 55 | if (fabs(err) > bound) { 56 | printf("[ERROR] ABS Err (%f) > %0.5f on [TID: 0x%x, %d], found %d (%f), should be %f\n", 57 | fabs(err), bound, tid, i, 58 | outputs[i], 59 | (double) outputs[i] / pow(2.0, decimal_point), 60 | outputs_fann[i]); 61 | bound_failures++; 62 | } 63 | // Check to see if this results in a bit flip 64 | output_fann_th = outputs_fann[i] > 0.5 ? 1 : 0; 65 | output_dana_th = outputs[i] > (1 << (decimal_point - 1)) ? 1 : 0; 66 | if (output_fann_th != output_dana_th) { 67 | printf("[ERROR] Bit flip on [TID: 0x%x, %d], found %d (%f), should be %f\n", 68 | tid, i, outputs[i], 69 | (double) outputs[i] / pow(2.0, decimal_point), 70 | outputs_fann[i]); 71 | bit_failures++; 72 | } 73 | } 74 | }; 75 | -------------------------------------------------------------------------------- /src/test/cpp/transaction.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "fann.h" 4 | 5 | // Transaction class that encapsulates a single transaction, i.e., a 6 | // request by a thread to compute the output of neural network 7 | // (specified by an NNID) for a given input vector. 8 | 9 | class transaction { 10 | private: 11 | unsigned int count_in, count_out, count_reads, decimal_point; 12 | int output_fann_th, output_dana_th; 13 | 14 | public: 15 | struct fann * ann; 16 | unsigned int num_input, num_output; 17 | std::vector inputs; 18 | std::vector outputs; 19 | std::vector outputs_fann; 20 | uint16_t asid; 21 | uint16_t tid; 22 | uint16_t num_rounds; 23 | uint32_t nnid; 24 | double error, error_squared; 25 | int bound_failures; 26 | int bit_failures; 27 | 28 | transaction(fann *, fann_type *, uint16_t, uint32_t, unsigned int); 29 | int32_t get_input(); 30 | bool done_in(); 31 | bool done_out(); 32 | bool new_read(); 33 | void update_error(double); 34 | }; 35 | -------------------------------------------------------------------------------- /src/test/cpp/xcustom.cpp: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include "src/test/cpp/xcustom.h" 4 | 5 | XCustom::XCustom(int x, privilegeMode prv) { 6 | if (x < 0 || x > 3) 7 | throw std::domain_error("XCustom x must be on range [0, 3]"); 8 | x_ = x; 9 | prv_ = prv; 10 | } 11 | 12 | privilegeMode XCustom::ChangePrv(privilegeMode prv) { 13 | privilegeMode prv_old = prv_; 14 | prv_ = prv; 15 | return prv_old; 16 | } 17 | 18 | RoccCmd * XCustom::Instruction(int funct, uint64_t rs1, uint64_t rs2, int rs1_d, 19 | int rs2_d, int rd) { 20 | roccInsnUnion r; 21 | r.rocc.funct = funct; 22 | r.rocc.rs1 = rs1_d; 23 | r.rocc.rs2 = rs2_d; 24 | r.rocc.rd = rd; 25 | 26 | r.rocc.xs1 = 1; 27 | r.rocc.xs2 = 1; 28 | r.rocc.xd = rd != 0; 29 | switch (x_) { 30 | case (0): r.rocc.opcode = 0b0001011; break; 31 | case (1): r.rocc.opcode = 0b0101011; break; 32 | case (2): r.rocc.opcode = 0b1011011; break; 33 | case (3): r.rocc.opcode = 0b1111011; break; 34 | } 35 | 36 | return new RoccCmd(r, rs1, rs2); 37 | } 38 | 39 | RoccCmd * XCustom::Unimplemented() { 40 | throw std::logic_error("Unimplemented function"); 41 | } 42 | 43 | RoccResp * XCustom::RespVal(int data, int rd) { 44 | return new RoccResp(rd, data); 45 | } 46 | -------------------------------------------------------------------------------- /src/test/cpp/xcustom.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef SRC_TEST_CPP_XCUSTOM_H_ 4 | #define SRC_TEST_CPP_XCUSTOM_H_ 5 | 6 | #include 7 | 8 | enum privilegeMode {kUser, kSupervisor, kHypervisor, kMachine}; 9 | 10 | struct roccInsn { 11 | unsigned opcode : 7; 12 | unsigned rd : 5; 13 | unsigned xs2 : 1; 14 | unsigned xs1 : 1; 15 | unsigned xd : 1; 16 | unsigned rs1 : 5; 17 | unsigned rs2 : 5; 18 | unsigned funct : 7; 19 | }; 20 | 21 | union roccInsnUnion { 22 | roccInsn rocc; 23 | uint32_t raw; 24 | }; 25 | 26 | class RoccCmd { 27 | public: 28 | RoccCmd(roccInsnUnion inst, uint64_t rs1, uint64_t rs2) { 29 | inst_ = inst; 30 | rs1_ = rs1; 31 | rs2_ = rs2; 32 | } 33 | public: 34 | roccInsnUnion inst_; 35 | uint64_t rs1_; 36 | uint64_t rs2_; 37 | }; 38 | 39 | class RoccResp { 40 | public: 41 | RoccResp(unsigned rd, uint64_t data) { 42 | rd_ = rd; 43 | data_ = data; 44 | } 45 | bool operator== (const RoccResp &b) const { 46 | bool same = this->rd_ == b.rd_; 47 | same &= (this->data_ == b.data_); 48 | return same; 49 | } 50 | bool operator!= (const RoccResp&b) const { 51 | bool same = this->rd_ != b.rd_; 52 | same |= (this->data_ != b.data_); 53 | return same; 54 | } 55 | public: 56 | unsigned rd_; 57 | uint64_t data_; 58 | }; 59 | 60 | class XCustom { 61 | private: 62 | int x_; 63 | privilegeMode prv_; 64 | 65 | public: 66 | XCustom(int x, privilegeMode prv = kUser); 67 | privilegeMode ChangePrv(privilegeMode prv); 68 | RoccCmd * Instruction(int funct, uint64_t rs1, uint64_t rs2, int rs1_d = 1, 69 | int rs2_d = 2, int rd = 0); 70 | RoccCmd * Unimplemented(); 71 | RoccResp * RespVal(int d, int rd = 1); 72 | }; 73 | 74 | #endif // SRC_TEST_CPP_XCUSTOM_H_ 75 | -------------------------------------------------------------------------------- /src/test/cpp/xfiles_dana.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #ifndef __XFILES_DANA_H__ 4 | #define __XFILES_DANA_H__ 5 | 6 | #include 7 | #include "emulator.h" 8 | 9 | class xfiles_dana_helper : public Top_api_t { 10 | private: 11 | struct { 12 | uint64_t num_pes; 13 | uint64_t cache_num_entries; 14 | uint64_t elements_per_block; 15 | uint64_t transaction_table_num_entries; 16 | uint64_t transaction_table_sram_elements; 17 | uint64_t register_file_num_elements; 18 | uint64_t asid_width; 19 | uint64_t tid_width; 20 | uint64_t nnid_width; 21 | uint64_t feedback_width; 22 | uint64_t element_width; 23 | uint64_t num_cores; 24 | uint64_t decimal_point_offset; 25 | uint64_t decimal_point_width; 26 | } parameters; 27 | 28 | public: 29 | xfiles_dana_helper(); 30 | ~xfiles_dana_helper(); 31 | 32 | // Load the cache so that memory requests aren't necessary 33 | int cache_load(int, uint32_t, const char *, bool); 34 | 35 | // Read a parameter file and populate the local parameters 36 | int read_parameters(const string); 37 | }; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/test/cpp/xfiles_debug.cpp: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include "src/test/cpp/xfiles_debug.h" 4 | 5 | XFilesDebug::XFilesDebug(int x) : XCustom(x) {} 6 | 7 | RoccCmd * XFilesDebug::DebugTest(xfiles_debug_action_t action, int data, int rd, 8 | uint64_t addr) { 9 | uint64_t action_and_data = ((uint64_t) action << 32) | data; 10 | return Instruction(t_USR_XFILES_DEBUG, action_and_data, addr, 1, 2, 1); 11 | } 12 | 13 | RoccCmd * XFilesDebug::DebugEchoViaReg(int d) { 14 | return DebugTest(a_REG, d); 15 | } 16 | 17 | RoccCmd * XFilesDebug::DebugReadMem(uint64_t a) { 18 | return DebugTest(a_MEM_READ, 0, a); 19 | } 20 | 21 | RoccCmd * XFilesDebug::DebugWriteMem(int d, uint64_t a) { 22 | return DebugTest(a_MEM_WRITE, d, a); 23 | } 24 | 25 | RoccCmd * XFilesDebug::DebugVirtToPhys(uint64_t a) { 26 | return DebugTest(a_VIRT_TO_PHYS, 0, a); 27 | } 28 | 29 | RoccCmd * XFilesDebug::DebugReadUtl(uint64_t a) { 30 | return DebugTest(a_UTL_READ, 0, a); 31 | } 32 | 33 | RoccCmd * XFilesDebug::DebugWriteUtl(int d, uint64_t a) { 34 | return DebugTest(a_UTL_WRITE, d, a); 35 | } 36 | -------------------------------------------------------------------------------- /src/test/cpp/xfiles_debug.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef SRC_TEST_CPP_XFILES_DEBUG_H_ 4 | #define SRC_TEST_CPP_XFILES_DEBUG_H_ 5 | 6 | #include "src/test/cpp/xcustom.h" 7 | #include "src/main/c/xfiles-debug.h" 8 | 9 | class XFilesDebug : public XCustom { 10 | public: 11 | XFilesDebug(int x = 0); 12 | RoccCmd * DebugEchoViaReg(int data); 13 | RoccCmd * DebugReadMem(uint64_t address); 14 | RoccCmd * DebugWriteMem(int data, uint64_t address); 15 | RoccCmd * DebugVirtToPhys(uint64_t address); 16 | RoccCmd * DebugReadUtl(uint64_t address); 17 | RoccCmd * DebugWriteUtl(int d, uint64_t a); 18 | private: 19 | RoccCmd * DebugTest(xfiles_debug_action_t action, int data = 0, int rd = 1, 20 | uint64_t addr = 0); 21 | }; 22 | 23 | #endif // SRC_TEST_CPP_XFILES_DEBUG_H_ 24 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *# 3 | *.#* 4 | build 5 | autom4te.cache 6 | configure 7 | -------------------------------------------------------------------------------- /tests/Makefile.in: -------------------------------------------------------------------------------- 1 | prefix := @prefix@ 2 | abs_top_srcdir := @abs_top_srcdir@ 3 | XLEN := @XLEN@ 4 | ROCC := xfiles-dana 5 | 6 | test_smoke_dir := $(abs_top_srcdir)/smoke 7 | test_nets_dir := $(abs_top_srcdir)/nets 8 | test_pk_dir := $(abs_top_srcdir)/pk 9 | 10 | .PHONY: all smoke nets pk clean 11 | 12 | all: smoke nets pk 13 | 14 | vars = \ 15 | abs_top_srcdir=$(abs_top_srcdir) \ 16 | XLEN=$(XLEN) \ 17 | PREFIX=$(ROCC)-$@ \ 18 | src_dir=$(abs_top_srcdir)/$@ 19 | 20 | smoke: 21 | mkdir -p $@ 22 | $(MAKE) -C $@ -f $(test_smoke_dir)/Makefile $(vars) 23 | 24 | nets: 25 | mkdir -p $@ 26 | $(MAKE) -C $@ -f $(test_nets_dir)/Makefile $(vars) 27 | 28 | pk: 29 | mkdir -p $@ 30 | $(MAKE) -C $@ -f $(abs_top_srcdir)/$@/Makefile $(vars) 31 | 32 | clean: 33 | $(MAKE) -C smoke -f $(test_smoke_dir)/Makefile abs_top_srcdir=$(abs_top_srcdir) PREFIX=$(ROCC)-smoke src_dir=$(abs_top_srcdir)/smoke clean 34 | $(MAKE) -C nets -f $(test_nets_dir)/Makefile abs_top_srcdir=$(abs_top_srcdir) PREFIX=$(ROCC)-nets src_dir=$(abs_top_srcdir)/nets clean 35 | $(MAKE) -C pk -f $(test_pk_dir)/Makefile abs_top_srcdir=$(abs_top_srcdir) PREFIX=$(ROCC)-pk src_dir=$(abs_top_srcdir)/pk clean 36 | -------------------------------------------------------------------------------- /tests/Makefrag: -------------------------------------------------------------------------------- 1 | XLEN ?= 64 2 | 3 | TARGET ?= riscv$(XLEN)-unknown-elf 4 | CC := $(TARGET)-gcc 5 | LD := $(CC) 6 | 7 | OBJDUMP ?= \ 8 | $(TARGET)objdump \ 9 | --disassemble-all \ 10 | --disassemble-zeroes \ 11 | --section=.text \ 12 | --section=.text.startup \ 13 | --section=.text.init \ 14 | --section=.data 15 | 16 | LFLAGS := $(LFLAGS) \ 17 | -L$(abs_top_srcdir)/libs/build/$(TARGET) 18 | 19 | ENV_P = $(abs_top_srcdir)/../../riscv-tools/riscv-tests/env/p 20 | ENV_V = $(abs_top_srcdir)/../../riscv-tools/riscv-tests/env/v 21 | 22 | HEADERS_P := $(HEADERS) $(shell find $(ENV_P)) 23 | HEADERS_V := $(HEADERS) $(shell find $(ENV_P)) 24 | top_build_dir = $(abs_top_srcdir)/../build/nets 25 | 26 | # Compute the ID String 27 | TTABLE_ENTRIES ?= 2 28 | EPB ?= 4 29 | NUM_PES ?= 4 30 | CACHE_ENTRIES ?= 1 31 | ID_STRING ?= 0x$(shell echo "obase=16; $(CACHE_ENTRIES) + ($(NUM_PES) * (2 ^ 4)) + ($(EPB) * (2 ^ 10)) + ($(TTABLE_ENTRIES) * (2 ^ 48))" | bc) 32 | 33 | default: all 34 | src_dir = . 35 | 36 | .PRECIOUS: $(abs_top_srcdir)/libs/build/$(TARGET)/lib%.a 37 | 38 | $(abs_top_srcdir)/libs/build/$(TARGET)/lib%.a: 39 | make -C $(abs_top_srcdir)/libs 40 | 41 | clean: 42 | rm -rf $(junk) 43 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | ## Software Testing Environment for Dana 2 | This contains a Rocket-attached suite of tests for verifying DANA's functionality based off of [`riscv-tests`](https://github.com/riscv/riscv-tests). 3 | 4 | ### Organization 5 | Tests are organized into the following categories: 6 | 7 | * [Smoke Tests](smoke) -- Bare metal verification of all instructions. These are not intended to be comprehensive. 8 | * [Neural Network Tests](nets) -- Bare metal inference, learning, and simultaneously multi-processed (SMP) inference tests 9 | 10 | Like with `riscv-tests`, the tests are intended to be built in varieties that use physical (`-p`) or virtual memory (`-v`). Currently, only the `-p` variants are built. 11 | 12 | ### Usage 13 | 14 | This requires that certain submodules in the Rocket Chip hierarchy are provided: 15 | ``` 16 | cd $ROCKETCHIP_DIR 17 | git submodule update --init --recursive 18 | ``` 19 | 20 | Build neural networks for DANA: 21 | ``` 22 | cd $DANA_DIR 23 | make 24 | ``` 25 | 26 | Build all tests: 27 | ``` 28 | cd $DANA_DIR/tests 29 | autoconf 30 | mkdir build 31 | cd build 32 | ../configure 33 | make 34 | ``` 35 | 36 | You can then run one of these tests if you have the emulator: 37 | 38 | ``` 39 | $ROCKET_CHIP/emulator/emulator-rocketchip-XFilesDanaCppPe1Epb4Config smoke/-p 40 | ``` 41 | 42 | Due to the fact that these are bare-metal, the output is not terrifically interesting (it's either pass/fail). Running with Chisel `printf` commands enabled or to generate a waveform produces a more verbose output. 43 | 44 | You can run these through `spike`, but these will naturally fail as `spike` does not have an attached accelerator. 45 | -------------------------------------------------------------------------------- /tests/configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT(xfiles-dana-tests, 1.0) 2 | 3 | cross_compiling=yes 4 | AC_PROG_CC 5 | 6 | AC_ARG_WITH(xlen, 7 | [AS_HELP_STRING([--with-xlen=XLEN], 8 | [Set XLEN, the X-register bit width (default is 64)])], 9 | AC_SUBST(XLEN, $withval), 10 | AC_SUBST(XLEN, 64) 11 | ) 12 | 13 | AC_OUTPUT( 14 | Makefile 15 | ) 16 | -------------------------------------------------------------------------------- /tests/libs/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | *~ 3 | *.#* -------------------------------------------------------------------------------- /tests/libs/Makefile: -------------------------------------------------------------------------------- 1 | TARGET ?= riscv64-unknown-elf 2 | DIR_TOP ?= $(abspath .) 3 | DIR_BUILD ?= $(DIR_TOP)/build/$(TARGET) 4 | 5 | # RISC-V related options 6 | ifeq "$(TARGET)" "host" 7 | CFLAGS = -DNO_VM=1 8 | libs = \ 9 | xfiles-ant 10 | else 11 | TARGET_DASH = $(TARGET)- 12 | libs = \ 13 | xfiles-user \ 14 | xfiles-supervisor \ 15 | xfiles-debug \ 16 | xfiles-user-pk \ 17 | xfiles-ant 18 | endif 19 | CC = $(TARGET_DASH)gcc 20 | CXX = $(TARGET_DASH)g++ 21 | AR = $(TARGET_DASH)ar 22 | OBJDUMP = $(TARGET_DASH)objdump 23 | 24 | 25 | CFLAGS += \ 26 | -Wall \ 27 | -Werror \ 28 | -O3 \ 29 | -static \ 30 | --std=gnu11 \ 31 | -I$(DIR_TOP)/../.. 32 | LFLAGS = -L$(DIR_BUILD)/$(TARGET) -L$(DIR_BUILD)/fann/$(TARGET) 33 | 34 | vpath %.h $(DIR_TOP)/src/include 35 | vpath %.c $(DIR_TOP)/src 36 | 37 | .PHONY: all clean 38 | .SUFFIXES: 39 | 40 | all: $(libs:%=$(DIR_BUILD)/lib%.a) 41 | 42 | LIBS = \ 43 | $(DIR_BUILD)/libxfiles-user.a \ 44 | $(DIR_BUILD)/libxfiles-supervisor.a \ 45 | $(DIR_BUILD)/libxfiles-user-pk.a \ 46 | $(DIR_BUILD)/libxfiles-debug.a \ 47 | $(DIR_BUILD)/libxfiles-ant.a 48 | $(DIR_BUILD)/libxfiles-user.a: \ 49 | $(DIR_BUILD)/xfiles-user.o \ 50 | $(DIR_BUILD)/xfiles-debug.o 51 | $(DIR_BUILD)/libxfiles-supervisor.a: \ 52 | $(DIR_BUILD)/xfiles-supervisor.o \ 53 | $(DIR_BUILD)/xfiles-asid-nnid-table.o 54 | $(DIR_BUILD)/libxfiles-user-pk.a: \ 55 | $(DIR_BUILD)/xfiles-user.o \ 56 | $(DIR_BUILD)/xfiles-user-pk.o \ 57 | $(DIR_BUILD)/xfiles-supervisor.o \ 58 | $(DIR_BUILD)/xfiles-asid-nnid-table.o \ 59 | $(DIR_BUILD)/xfiles-debug.o 60 | $(DIR_BUILD)/libxfiles-debug.a: \ 61 | $(DIR_BUILD)/xfiles-debug.o 62 | $(DIR_BUILD)/libxfiles-ant.a: \ 63 | $(DIR_BUILD)/xfiles-asid-nnid-table.o 64 | 65 | $(LIBS): | $(DIR_BUILD) 66 | $(AR) rcs $@ $^ 67 | 68 | $(DIR_BUILD)/%.o: %.c %.h | $(DIR_BUILD) 69 | $(CC) $(CFLAGS) -c $< -o $@ 70 | 71 | $(DIR_BUILD): 72 | mkdir -p $@ 73 | 74 | clean: 75 | rm -rf $(DIR_BUILD) 76 | 77 | mrproper: 78 | rm -rf $(DIR_TOP)/build 79 | -------------------------------------------------------------------------------- /tests/libs/src/include/xfiles-asid-nnid-table.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_ASID_NNID_TABLE_H_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_ASID_NNID_TABLE_H_ 5 | 6 | #include "tests/libs/src/include/xfiles-supervisor-types.h" 7 | #ifndef NO_VM 8 | #include "tests/libs/src/include/xfiles-debug.h" 9 | #endif 10 | 11 | // Print a visual organization of a specific ASID--NNIT Table 12 | void asid_nnid_table_info(ant * table); 13 | 14 | // Constructor and destructor for the ASID--NNID Table data structure 15 | void asid_nnid_table_create(ant ** table, size_t num_asids, 16 | size_t nn_configurations_per_asid); 17 | void asid_nnid_table_destroy(ant **); 18 | 19 | // Constructor and destructor for the Queue structure 20 | void construct_queue(queue **, int); 21 | void destroy_queue(queue **); 22 | 23 | // Append the NN configuration contained in a binary file to the ASID 24 | // of the specified ASID--NNID table. **NOTE** This is currently 25 | // unsupported with the proxy kernel as it doesn't supported file 26 | // operation system calls. 27 | int attach_nn_configuration(ant ** table, asid_type asid, 28 | const char * nn_configuration_binary_file); 29 | 30 | // Attach an NN configuration that points to NULL. This is useful for 31 | // testing purposes to place a specific NN configuration in a specific 32 | // location and generate traps that will cause us to fail fast on an 33 | // invalid read. 34 | int attach_garbage(ant ** table, asid_type asid); 35 | 36 | // Append the NN configuration contained in an XLen-sized (64-bit or 37 | // 32-bit depending on RISC-V architecture) array and of a certain 38 | // size to the ASID of a specific ASID--NNID Table. 39 | int attach_nn_configuration_array(ant ** table, uint16_t asid, 40 | const xlen_t * nn_configuration_array, 41 | size_t size); 42 | 43 | // Bytes of data per beat of Tilelink L2 response. This is the value 44 | // of tlDataBeats in uncore/src/main/scala/tilelink.scala. 45 | #define TILELINK_BYTES_PER_BEAT 16 46 | #define TILELINK_LG_BYTES_PER_BEAT 4 47 | #define TILELINK_L2_BYTES 128 48 | #define TILELINK_L2_ADDR_BITS 7 49 | // Do an allocation that is aligned on an L2 cache line 50 | int alloc_config_aligned(xlen_t ** raw, xlen_t ** aligned, size_t size); 51 | 52 | #endif // XFILES_DANA_LIBS_SRC_XFILES_ASID_NNID_TABLE_H_ 53 | -------------------------------------------------------------------------------- /tests/libs/src/include/xfiles-debug.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_MAIN_C_XFILES_DEBUG_H_ 4 | #define XFILES_DANA_LIBS_SRC_MAIN_C_XFILES_DEBUG_H_ 5 | 6 | #include "tests/libs/src/include/xfiles.h" 7 | #include "tests/libs/src/xfiles-debug.S" 8 | 9 | //-------------------------------------- Interactions with the Debug Unit 10 | 11 | // Function that accesses the per-core Debug Unit. This can be used 12 | // manually or the functions below act as aliases to this function. 13 | xlen_t debug_test(unsigned action, uint32_t data, void * addr); 14 | 15 | // Write data to the accelerator and have the accelerator return it: 16 | // data = data 17 | xlen_t debug_echo_via_reg(uint32_t data); 18 | 19 | // Read from a specific address using the L1 port: 20 | // data = [addr] 21 | xlen_t debug_read_mem(void * addr); 22 | 23 | // Write to a specific address using the L1 port: 24 | // [addr] = data 25 | xlen_t debug_write_mem(uint32_t data, void * addr); 26 | 27 | // Do virtual to physical address translation: 28 | // addr_phys = virt_to_phys(addr_virt) 29 | void * debug_virt_to_phys(void * addr_v); 30 | 31 | // Read a specific memory address using the L2 uncached tilelink port: 32 | // data = [addr] 33 | xlen_t debug_read_utl(void * addr); 34 | 35 | // Write to a specific memory address using the L2 uncached tilelink port: 36 | // [addr] = data 37 | xlen_t debug_write_utl(uint32_t data, void * addr); 38 | 39 | #endif // XFILES_DANA_LIBS_SRC_MAIN_C_XFILES_DEBUG_H_ 40 | -------------------------------------------------------------------------------- /tests/libs/src/include/xfiles-supervisor.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_SUPERVISOR_H_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_SUPERVISOR_H_ 5 | 6 | #include "tests/libs/src/include/xfiles.h" 7 | #include "tests/libs/src/include/xfiles-supervisor-types.h" 8 | #include "tests/libs/src/xfiles-supervisor.S" 9 | 10 | // Set the ASID to a new value 11 | asid_type set_asid(asid_type * asid, tid_type * tid); 12 | 13 | // Set the ASID--NNID Table Poitner (ANTP) 14 | ant_entry * set_antp(ant_entry * antp, size_t * size); 15 | 16 | // Read a csr from XFiles 17 | xlen_t xf_read_csr(xlen_t csr); 18 | 19 | // Write (swap) a csr from XFiles 20 | xlen_t xf_write_csr(xlen_t csr, xlen_t val); 21 | 22 | #endif // XFILES_DANA_LIBS_SRC_XFILES_SUPERVISOR_H_ 23 | -------------------------------------------------------------------------------- /tests/libs/src/include/xfiles-user-pk.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_USER_PK_H_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_USER_PK_H_ 5 | 6 | #include "tests/libs/src/include/xfiles.h" 7 | #include "tests/libs/src/include/xfiles-supervisor.h" 8 | #include "tests/libs/src/include/xfiles-user.h" 9 | 10 | // Set the ASID to a new value 11 | xlen_t pk_syscall_set_asid(asid_type asid); 12 | 13 | // Set the ASID--NNID Table Poitner (ANTP) 14 | xlen_t pk_syscall_set_antp(ant * os_antp); 15 | 16 | // Do a debug echo using a systemcall 17 | xlen_t pk_syscall_debug_echo(uint32_t data); 18 | 19 | #endif // XFILES_DANA_LIBS_SRC_XFILES_USER_PK_H_ 20 | -------------------------------------------------------------------------------- /tests/libs/src/include/xfiles-user.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_USER_H_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_USER_H_ 5 | 6 | #include 7 | #include "tests/libs/src/include/xfiles.h" 8 | 9 | //-------------------------------------- Userland 10 | 11 | // Read the X-Files ID string 12 | xlen_t xfiles_dana_id(); 13 | 14 | // Initiate a new Transaction for a specific NNID. The X-Files Arbiter 15 | // will then assign and return a TID necessary for other userland 16 | // functions. The second parameter, "num_train_outputs", when set to 17 | // zero indicates that this is a feedforward computation. If non-zero, 18 | // this is a learning request. 19 | tid_type new_write_request(nnid_type nnid, learning_type_t learning_type, 20 | element_type num_train_outputs); 21 | 22 | // Function to write a specific register inside of the X-Files 23 | // Arbiter. The value is passed as a 32-bit unsigned, but only the 24 | // LSBs will be used if the destination register has fewer than 32 25 | // bits. 26 | xlen_t write_register(tid_type tid, xfiles_reg reg, uint32_t value); 27 | 28 | // Write the contents of an input array of some size to the X-Files 29 | // Arbiter. After completing this function, the transaction is deemed 30 | // valid and will start executing on Dana. 31 | xlen_t write_data(tid_type tid, 32 | element_type * input_data_array, 33 | size_t count); 34 | 35 | // Writes an input array to the X-Files Arbiter, but does not write 36 | // the last array element. This, coupled with `write_data_last` can be 37 | // used to start transactions nearly simultaneously. 38 | xlen_t write_data_except_last(tid_type tid, 39 | element_type * input_data_array, 40 | size_t count); 41 | 42 | // Writes the last element of an input array to the X-Files Arbiter. 43 | // This will implicitly start a transaction. 44 | xlen_t write_data_last(tid_type tid, 45 | element_type * input_data_array, 46 | size_t count); 47 | 48 | // A special write data request used for incremental training. Here, 49 | // an input and an expected output vector are passed. The 50 | // configuration cache is updated inside the Configuration Cache. 51 | xlen_t write_data_train_incremental(tid_type tid, 52 | element_type * input_data_array, 53 | element_type * output_data_array, 54 | size_t count_input, 55 | size_t count_output); 56 | 57 | xlen_t transaction_learn(nnid_type nnid, element_type * addr_i, element_type * addr_o, 58 | element_type * addr_e, size_t num_inputs, 59 | size_t num_outputs); 60 | 61 | // Read all the output data for a specific transaction. This throws 62 | // the CPU into a spinlock repeatedly checking the validity of the 63 | // X-Files response. 64 | uint64_t read_data_spinlock(tid_type tid, 65 | element_type * output_data_array, 66 | size_t count); 67 | 68 | // Forcibly kill a running transaction 69 | xlen_t kill_transaction(tid_type tid); 70 | 71 | // Run feedforward inference on one input--output pair 72 | xlen_t transaction_feedforward(nnid_type nnid, 73 | element_type * addr_i, 74 | element_type * addr_o, 75 | int num_inputs, 76 | int num_outputs); 77 | 78 | // Run over an input--output dataset for a given NNID, returning the 79 | // number of differences with the expected output 80 | xlen_t xfiles_fann_run_compare( 81 | nnid_type nnid, 82 | element_type * addr_i, 83 | element_type * addr_o, 84 | element_type * addr_e, 85 | int num_inputs, 86 | int num_outputs, 87 | int num_data); 88 | 89 | xlen_t xfiles_fann_run_no_compare( 90 | nnid_type nnid, 91 | element_type * addr_i, 92 | element_type * addr_o, 93 | int num_inputs, 94 | int num_outputs, 95 | int num_data); 96 | 97 | xlen_t xfiles_fann_run_smp_no_compare( 98 | nnid_type nnid, 99 | element_type * addr_i, 100 | element_type * addr_o, 101 | int num_inputs, 102 | int num_outputs, 103 | int num_data); 104 | 105 | xlen_t xfiles_fann_run_smp_compare( 106 | nnid_type nnid, 107 | element_type * addr_i, 108 | element_type * addr_o, 109 | element_type * addr_e, 110 | int num_inputs, 111 | int num_outputs, 112 | int num_data); 113 | 114 | xlen_t xfiles_fann_run_infer( 115 | nnid_type nnid, 116 | element_type * addr_i, 117 | element_type * addr_o, 118 | int num_inputs, 119 | int num_outputs); 120 | 121 | #endif // XFILES_DANA_LIBS_SRC_XFILES_USER_H_ 122 | -------------------------------------------------------------------------------- /tests/libs/src/include/xfiles.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_H_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_H_ 5 | 6 | #include 7 | #include 8 | 9 | #include "tests/libs/src/xfiles.S" 10 | #include "tests/rocc-software/src/xcustom.h" 11 | 12 | // [TODO] Any changes to these types need to occur in conjunction with 13 | // the Chisel code and with the TID extraction part of 14 | // new_write_request. 15 | typedef int32_t nnid_type; 16 | typedef int16_t tid_type; 17 | typedef int32_t element_type; 18 | typedef uint64_t xlen_t; 19 | 20 | typedef enum { 21 | xfiles_reg_batch_items = 0, 22 | xfiles_reg_learning_rate, 23 | xfiles_reg_weight_decay_lambda 24 | } xfiles_reg; 25 | 26 | typedef enum { 27 | FEEDFORWARD = 0, 28 | TRAIN_INCREMENTAL = 1, 29 | TRAIN_BATCH = 2 30 | } learning_type_t; 31 | 32 | typedef enum { 33 | err_XFILES_UNKNOWN = 0, 34 | err_XFILES_NOASID, 35 | err_XFILES_TTABLEFULL, 36 | err_XFILES_INVALIDTID 37 | } xfiles_err_t; 38 | 39 | typedef enum { 40 | resp_OK = 0, 41 | resp_TID, 42 | resp_READ, 43 | resp_NOT_DONE, 44 | resp_QUEUE_ERR, 45 | resp_XFILES 46 | } xfiles_resp_t; 47 | 48 | typedef enum { 49 | err_UNKNOWN = 0, 50 | err_DANA_NOANTP = 1, 51 | err_INVASID = 2, 52 | err_INVNNID = 3, 53 | err_ZEROSIZE = 4, 54 | err_INVEPB = 5 55 | } dana_err_t; 56 | 57 | typedef enum { 58 | int_INVREQ = 0, 59 | int_DANA_NOANTP = 1, 60 | int_INVASID = 2, 61 | int_INVNNID = 3, 62 | int_NULLREAD = 4, 63 | int_ZEROSIZE = 5, 64 | int_INVEPB = 6, 65 | int_MISALIGNED = 7, 66 | int_UNKNOWN = -1 67 | } xfiles_causes_t; 68 | 69 | #define OPCODE 0 70 | // Standard macro that passes rd_, rs1_, and rs2_ via registers 71 | #define XFILES_INSTRUCTION(rd, rs1, rs2, funct) \ 72 | ROCC_INSTRUCTION_R_R_R(OPCODE, rd, rs1, rs2, funct, 5, 6, 7) 73 | 74 | #define XFILES_INSTRUCTION_R_R_R(rd, rs1, rs2, funct) \ 75 | ROCC_INSTRUCTION_R_R_R(OPCODE, rd, rs1, rs2, funct, 5, 6, 7) 76 | 77 | // Macro to pass rs2_ as an immediate 78 | #define XFILES_INSTRUCTION_R_R_I(rd, rs1, rs2, funct) \ 79 | ROCC_INSTRUCTION_R_R_R(OPCODE, rd, rs1, rs2, funct, 5, 6, 7) 80 | 81 | // Macro to pass rs1_ and rs2_ as immediates 82 | #define XFILES_INSTRUCTION_R_I_I(rd, rs1, rs2, funct) \ 83 | ROCC_INSTRUCTION_R_R_R(OPCODE, rd, rs1, rs2, funct, 5, 6, 7) 84 | 85 | #endif // XFILES_DANA_LIBS_SRC_XFILES_H_ 86 | -------------------------------------------------------------------------------- /tests/libs/src/xfiles-debug.S: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_DEBUG_S_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_DEBUG_S_ 5 | 6 | #include "tests/libs/src/xfiles.S" 7 | 8 | // Enumerated type that defines the action taken by the Debug Unit 9 | #define a_REG 0 // Return a value written using the cmd interface 10 | #define a_MEM_READ 1 // Read data from the L1 cache and return it 11 | #define a_MEM_WRITE 2 // Write data to the L1 cache 12 | #define a_VIRT_TO_PHYS 3 // Do address translation via the PTW port 13 | #define a_UTL_READ 4 // Read data from the L2 cache and return it 14 | #define a_UTL_WRITE 5 // Write data to the L2 cache 15 | 16 | #define DEBUG_TEST(action, data, addr, rd, rs1, rs2) \ 17 | li x ## rs1, action; \ 18 | slli x ## rs1, x ## rs1, 32; \ 19 | li x ## rs2, data; \ 20 | or x ## rs1, x ## rs1, x ## rs2; \ 21 | la x ## rs2, addr; \ 22 | ROCC_INSTRUCTION_RAW_R_R_R(CUSTOM_X, rd, rs1, rs2, t_USR_XFILES_DEBUG); 23 | 24 | #define DEBUG_ECHO_VIA_REG(data) DEBUG_TEST(a_REG, data, tdat, 10, 10, 11) 25 | #define DEBUG_READ_MEM(addr) DEBUG_TEST(a_MEM_READ, 0, addr, 10, 10, 11); 26 | #define DEBUG_WRITE_MEM(data, addr) DEBUG_TEST(a_MEM_WRITE, data, addr, 10, 10, 11); 27 | #define DEBUG_VIRT_TO_PHYS(vaddr, paddr) DEBUG_TEST(a_VIRT_TO_PHYS, 0, vaddr, paddr, 10, 11); 28 | #define DEBUG_READ_UTL(addr) DEBUG_TEST(a_UTL_READ, 0, addr, 10, 10, 11); 29 | #define DEBUG_WRITE_UTL(data, addr) DEBUG_TEST(a_UTL_WRITE, data, addr, 10, 10, 11); 30 | 31 | #endif // XFILES_DANA_LIBS_SRC_XFILES_DEBUG_S_ 32 | -------------------------------------------------------------------------------- /tests/libs/src/xfiles-debug.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include 4 | 5 | #include "tests/libs/src/include/xfiles-debug.h" 6 | 7 | xlen_t debug_test(unsigned action, uint32_t data, void * addr) { 8 | xlen_t out, action_and_data = ((uint64_t)action << 32) | (uint32_t)data; 9 | XFILES_INSTRUCTION(out, action_and_data, addr, t_USR_XFILES_DEBUG); 10 | return out; 11 | } 12 | 13 | xlen_t debug_echo_via_reg(uint32_t data) { 14 | return debug_test(a_REG, data, 0); 15 | } 16 | 17 | xlen_t debug_read_mem(void * addr) { 18 | return debug_test(a_MEM_READ, 0, addr); 19 | } 20 | 21 | xlen_t debug_write_mem(uint32_t data, void * addr) { 22 | return debug_test(a_MEM_WRITE, data, addr); 23 | } 24 | 25 | void * debug_virt_to_phys(void * addr_v) { 26 | return (void *) debug_test(a_VIRT_TO_PHYS, 0, addr_v); 27 | } 28 | 29 | xlen_t debug_read_utl(void * addr) { 30 | return debug_test(a_UTL_READ, 0, addr); 31 | } 32 | 33 | xlen_t debug_write_utl(uint32_t data, void * addr) { 34 | return debug_test(a_UTL_WRITE, data, addr); 35 | } 36 | -------------------------------------------------------------------------------- /tests/libs/src/xfiles-supervisor.S: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_SUPERVISOR_S_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_SUPERVISOR_S_ 5 | 6 | #define t_SUP_UPDATE_ASID 0 7 | #define t_SUP_WRITE_REG 1 8 | #define t_SUP_READ_CSR 2 9 | #define t_SUP_WRITE_CSR 3 10 | 11 | #define XF_READ_CSR(csr) \ 12 | li a0, csr; \ 13 | jal xf_read_csr; 14 | 15 | #define XF_WRITE_CSR_A(csr, addr) \ 16 | li a0, csr; \ 17 | la a1, addr; \ 18 | jal xf_write_csr; 19 | 20 | #define XF_WRITE_CSR_I(csr, imm) \ 21 | li a0, csr; \ 22 | li a1, imm; \ 23 | jal xf_write_csr; 24 | 25 | #define XF_WRITE_CSR_R(csr, reg) \ 26 | li a0, csr; \ 27 | lw a1, reg; \ 28 | jal xf_write_csr; 29 | 30 | #define XF_WRITE_CSR(csr, val) XF_WRITE_CSR_I(csr, val); 31 | 32 | #define DANA_FENCE(nnid) XF_WRITE_CSR(CSRs_fence, (1<<16) | (nnid & ~(~0<<16))); 33 | #define DANA_SYNC(nnid) XF_WRITE_CSR(CSRs_fence, (0 << 16) | (nnid & ~(~0<<16))); 34 | 35 | #define SET_ASID(asid) \ 36 | XF_WRITE_CSR_I(CSRs_asid, asid); \ 37 | XF_WRITE_CSR_I(CSRs_tid, 0); 38 | #define SET_ANTP(antp, size) \ 39 | XF_WRITE_CSR_A(CSRs_antp, antp); \ 40 | XF_WRITE_CSR_R(CSRs_num_asids, size); 41 | 42 | // #define XF_READ_CSR(csr) 43 | 44 | #endif // XFILES_DANA_LIBS_SRC_XFILES_SUPERVISOR_S_ 45 | -------------------------------------------------------------------------------- /tests/libs/src/xfiles-supervisor.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-supervisor.h" 4 | 5 | asid_type set_asid(asid_type * asid, tid_type * tid) { 6 | *asid = xf_write_csr(CSRs_asid, *asid); 7 | *tid = xf_write_csr(CSRs_tid, *tid); 8 | return *asid; 9 | } 10 | 11 | ant_entry * set_antp(ant_entry * antp, size_t * size) { 12 | antp = (ant_entry *) xf_write_csr(CSRs_antp, (xlen_t) antp); 13 | size = (size_t *) xf_write_csr(CSRs_num_asids, *size); 14 | return antp; 15 | } 16 | 17 | xlen_t xf_read_csr(xlen_t csr) { 18 | xlen_t csr_value; 19 | XFILES_INSTRUCTION_R_R_I(csr_value, csr, 0, t_SUP_READ_CSR); 20 | return csr_value; 21 | } 22 | 23 | xlen_t xf_write_csr(xlen_t csr, xlen_t val) { 24 | xlen_t csr_value; 25 | XFILES_INSTRUCTION_R_R_R(csr_value, csr, val, t_SUP_WRITE_CSR); 26 | return csr_value; 27 | } 28 | -------------------------------------------------------------------------------- /tests/libs/src/xfiles-user-pk.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | 5 | xlen_t pk_syscall_set_asid(asid_type asid) { 6 | // This currently depends on a backing OS system call supported by 7 | // the Proxy Kernel (a basic RISC-V OS). Using the RISC-V function 8 | // calling convention, the asid is placed into register a0, the 9 | // syscall ID (#512) in register a7, and we generate a syscall. The 10 | // Proxy Kernel will then generate a special custom0 instruction 11 | // that sets the ASID. No output is expected, so we just return 12 | // whenever the OS returns control. 13 | xlen_t old_asid; 14 | asm volatile ("mv a0, %[asid]\n\t" 15 | "li a7, %[syscall]\n\t" 16 | "ecall\n\t" 17 | "mv %[old_asid], a0" 18 | : [old_asid] "=r" (old_asid) 19 | : [asid] "r" (asid), [syscall] "i" (SYSCALL_SET_ASID) 20 | : "a0", "a7"); 21 | return old_asid; 22 | } 23 | 24 | xlen_t pk_syscall_set_antp(ant * os_antp) { 25 | // As with `set_asid`, this relies on the Proxy Kernel to handle 26 | // this system call. This passes a pointer to the first ASID--NNID 27 | // table entry and the size (i.e., the number of ASIDs). 28 | xlen_t old_antp; 29 | asm volatile ("mv a0, %[antp]\n\t" 30 | "mv a1, %[size]\n\t" 31 | "li a7, %[syscall]\n\t" 32 | "ecall\n\t" 33 | "mv %[old_antp], a0" 34 | : [old_antp] "=r" (old_antp) 35 | : [antp] "r" (os_antp->entry_p), [size] "r" (os_antp->size), 36 | [syscall] "i" (SYSCALL_SET_ANTP) 37 | : "a0", "a7"); 38 | return old_antp; 39 | } 40 | 41 | xlen_t pk_syscall_debug_echo(uint32_t data) { 42 | xlen_t out; 43 | asm volatile ("mv a0, %[data]\n\t" 44 | "li a7, %[syscall]\n\t" 45 | "ecall\n\t" 46 | "mv %[out], a0" 47 | : [out] "=r" (out) 48 | : [data] "r" (data), [syscall] "i" (SYSCALL_DEBUG_ECHO) 49 | : "a0", "a7"); 50 | return out; 51 | } 52 | -------------------------------------------------------------------------------- /tests/libs/src/xfiles.S: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef XFILES_DANA_LIBS_SRC_XFILES_S_ 4 | #define XFILES_DANA_LIBS_SRC_XFILES_S_ 5 | 6 | #define t_USR_READ_DATA 4 7 | #define t_USR_WRITE_DATA 5 8 | #define t_USR_NEW_REQUEST 6 9 | #define t_USR_WRITE_DATA_LAST 7 10 | #define t_USR_WRITE_REGISTER 8 11 | #define t_USR_XFILES_DEBUG 9 12 | 13 | // User CSRs read/write 14 | #define CSRs_fence 0x080 // Dana 15 | #define CSRs_learn_rate 0x081 16 | #define CSRs_weight_decay 0x081 17 | 18 | // User CSRs read-only 19 | #define CSRs_u_xfid 0xC00 20 | 21 | // Supervisor CSRs read/write 22 | #define CSRs_cause 0x100 // X-Files 23 | #define CSRs_ttable_size 0x101 24 | #define CSRs_asid 0x102 25 | #define CSRs_tid 0x103 26 | 27 | #define CSRs_pe_size 0x180 // Dana 28 | #define CSRs_cache_size 0x181 29 | #define CSRs_pe_cooldown 0x182 30 | #define CSRs_antp 0x183 31 | #define CSRs_num_asids 0x184 32 | #define CSRs_pe_governor 0x185 33 | 34 | // Supervisor CSRS read-only 35 | #define CSRs_xfid 0xD00 // X-Files 36 | #define CSRs_xfid_current 0xD01 37 | 38 | #define RESP_CODE_WIDTH 3 39 | 40 | #endif // XFILES_DANA_LIBS_SRC_XFILES_S_ 41 | -------------------------------------------------------------------------------- /tests/nets/.gitignore: -------------------------------------------------------------------------------- 1 | *.ant.h -------------------------------------------------------------------------------- /tests/nets/Makefile: -------------------------------------------------------------------------------- 1 | include $(abs_top_srcdir)/Makefrag 2 | 3 | _tests = $(notdir $(wildcard $(src_dir)/../../build/nets/*-fixed.ant.h)) 4 | tests = \ 5 | $(patsubst %-fixed.ant.h,%, $(_tests)) \ 6 | $(patsubst %-fixed.ant.h,%-smp, $(_tests)) \ 7 | $(patsubst %-fixed.ant.h,%-learn, $(_tests)) 8 | 9 | tests_p = $(addprefix $(PREFIX)-p-, $(tests)) 10 | 11 | CFLAGS := $(CFLAGS) \ 12 | -static \ 13 | -mcmodel=medany \ 14 | -fvisibility=hidden \ 15 | -nostdlib \ 16 | -nostartfiles \ 17 | -I$(abs_top_srcdir)/.. \ 18 | -DID_STRING=$(ID_STRING) 19 | LIBS := $(LIBS) \ 20 | -lxfiles-user \ 21 | -lxfiles-supervisor \ 22 | 23 | HEADERS = $(shell find \ 24 | $(abs_top_srcdir)/rocc-software/src \ 25 | $(abs_top_srcdir)/libs/src) 26 | 27 | all: $(tests_p) 28 | 29 | vpath %.S $(src_dir) 30 | vpath %.h $(src_dir) 31 | vpath %.h $(src_dir)/../../build/nets 32 | 33 | $(PREFIX)-p-%: genericNetTest.S $(top_build_dir)/%-fixed.ant.h $(HEADERS_P) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-supervisor.a 34 | $(CC) $(CFLAGS) -I$(ENV_P) -include $(top_build_dir)/$*-fixed.ant.h $< $(LFLAGS) -T$(ENV_P)/link.ld -o $@ $(LIBS) 35 | 36 | $(PREFIX)-p-%-smp: genericNetTest.S $(top_build_dir)/%-fixed.ant.h $(HEADERS_P) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-supervisor.a 37 | $(CC) $(CFLAGS) -DSMP -I$(ENV_P) -include $(top_build_dir)/$*-fixed.ant.h $< $(LFLAGS) -T$(ENV_P)/link.ld -o $@ $(LIBS) 38 | 39 | $(PREFIX)-p-%-learn: genericLearnTest.S $(top_build_dir)/%-fixed.ant.h $(HEADERS_P) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-supervisor.a 40 | $(CC) $(CFLAGS) -I$(ENV_P) -include $(top_build_dir)/$*-fixed.ant.h $< $(LFLAGS) -T$(ENV_P)/link.ld -o $@ $(LIBS) 41 | 42 | $(PREFIX)-v-%: %.S $(HEADERS_V) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-supervisor.a 43 | $(CC) $(CFLAGS) -I$(ENV_V) $(RISCV_LD_OPTS) -T$(ENV_V)/link.ld $(LFLAGS) $< -o $@ $(LIBS) 44 | 45 | junk += $(tests_p) 46 | -------------------------------------------------------------------------------- /tests/nets/genericLearnTest.S: -------------------------------------------------------------------------------- 1 | # See LICENSE.IBM for license details. 2 | 3 | #***************************************************************************** 4 | # genericLearnTest.S 5 | #----------------------------------------------------------------------------- 6 | # 7 | # Generic assembly test of learning using a FANN dataset 8 | # 9 | 10 | #include "riscv_test.h" 11 | #include "../riscv-tools/riscv-tests/isa/macros/scalar/test_macros.h" 12 | #include "tests/rocc-software/src/xcustom.h" 13 | #include "tests/rocc-software/src/riscv_test_rocc.h" 14 | #include "tests/libs/src/xfiles-supervisor.S" 15 | #include "tests/libs/src/xfiles-user.S" 16 | 17 | #define STACK_TOP (_end + 4096) 18 | 19 | // RVTEST_CODE_BEGIN includes the EXTRA_INIT macro before its final 20 | // `mret` and the resulting drop to user mode. We use this to setup 21 | // the ASID and ANTP for a single transaction test. 22 | #undef EXTRA_INIT 23 | #define EXTRA_INIT \ 24 | SET_ASID(1); \ 25 | SET_ANTP(antp_dana, antp_os); \ 26 | la sp, _end + 1024; 27 | 28 | RVTEST_WITH_ROCC 29 | 30 | start: 31 | 32 | RVTEST_CODE_BEGIN 33 | 34 | // Decrease the learning rate to 0.125 35 | XF_WRITE_CSR(CSRs_learn_rate, 0b00000000000000000000100000000000); 36 | FANN_TEST_LEARN(0, NUM_INPUTS, NUM_OUTPUTS, NUM_DATAPOINTS, data_in, data_expected, data_out); 37 | DANA_FENCE(0) 38 | FANN_TEST_NO_COMPARE_CC(0, NUM_INPUTS, NUM_OUTPUTS, NUM_DATAPOINTS, data_in, data_out); 39 | 40 | TEST_PASSFAIL 41 | 42 | RVTEST_CODE_END 43 | 44 | .data 45 | RVTEST_DATA_BEGIN 46 | 47 | TEST_DATA 48 | 49 | DANA_TEST_DATA 50 | DANA_ANT_DATA 51 | 52 | RVTEST_DATA_END 53 | -------------------------------------------------------------------------------- /tests/nets/genericNetTest.S: -------------------------------------------------------------------------------- 1 | # See LICENSE.IBM for license details. 2 | 3 | #***************************************************************************** 4 | # genericNetTest.S 5 | #----------------------------------------------------------------------------- 6 | # 7 | # Generic assembly test that runs on a FANN dataset 8 | # 9 | 10 | #include "riscv_test.h" 11 | #include "../riscv-tools/riscv-tests/isa/macros/scalar/test_macros.h" 12 | #include "tests/rocc-software/src/xcustom.h" 13 | #include "tests/rocc-software/src/riscv_test_rocc.h" 14 | #include "tests/libs/src/xfiles-supervisor.S" 15 | #include "tests/libs/src/xfiles-user.S" 16 | 17 | #define STACK_TOP (_end + 4096) 18 | 19 | // RVTEST_CODE_BEGIN includes the EXTRA_INIT macro before its final 20 | // `mret` and the resulting drop to user mode. We use this to setup 21 | // the ASID and ANTP for a single transaction test. 22 | #undef EXTRA_INIT 23 | #define EXTRA_INIT \ 24 | SET_ASID(1); \ 25 | SET_ANTP(antp_dana, antp_os); \ 26 | la sp, _end + 1024; 27 | 28 | RVTEST_WITH_ROCC 29 | 30 | start: 31 | 32 | RVTEST_CODE_BEGIN 33 | 34 | #ifndef SMP 35 | FANN_TEST_NO_COMPARE_CC(0, NUM_INPUTS, NUM_OUTPUTS, NUM_DATAPOINTS, data_in, data_out); 36 | #else 37 | FANN_TEST_SMP_NO_COMPARE_CC(0, NUM_INPUTS, NUM_OUTPUTS, NUM_DATAPOINTS, data_in, data_out); 38 | #endif // SMP 39 | 40 | TEST_PASSFAIL 41 | 42 | RVTEST_CODE_END 43 | 44 | .data 45 | RVTEST_DATA_BEGIN 46 | 47 | TEST_DATA 48 | 49 | DANA_TEST_DATA 50 | DANA_ANT_DATA 51 | 52 | RVTEST_DATA_END 53 | -------------------------------------------------------------------------------- /tests/nets/inference.S: -------------------------------------------------------------------------------- 1 | # See LICENSE.IBM for license details. 2 | 3 | #include "riscv_test.h" 4 | #include "../riscv-tools/riscv-tests/isa/macros/scalar/test_macros.h" 5 | #include "tests/rocc-software/src/xcustom.h" 6 | #include "tests/rocc-software/src/riscv_test_rocc.h" 7 | #include "tests/libs/src/xfiles-supervisor.S" 8 | #include "tests/libs/src/xfiles-user.S" 9 | 10 | #define CUSTOM_X 0 11 | 12 | #define STACK_TOP (_end + 4096) 13 | 14 | #undef EXTRA_INIT 15 | #define EXTRA_INIT \ 16 | SET_ASID(1); \ 17 | SET_ANTP(antp_dana, antp_os); \ 18 | la sp, _end + 1024; 19 | 20 | RVTEST_WITH_ROCC 21 | 22 | start: 23 | 24 | RVTEST_CODE_BEGIN 25 | 26 | FANN_TEST_INFER_CC(0, NUM_INPUTS, NUM_OUTPUTS, data_in, data_out) 27 | 28 | TEST_PASSFAIL 29 | 30 | RVTEST_CODE_END 31 | 32 | .data 33 | RVTEST_DATA_BEGIN 34 | 35 | TEST_DATA 36 | 37 | DANA_TEST_DATA 38 | DANA_ANT_DATA 39 | 40 | RVTEST_DATA_END 41 | -------------------------------------------------------------------------------- /tests/pk/Makefile: -------------------------------------------------------------------------------- 1 | include $(abs_top_srcdir)/Makefrag 2 | 3 | tests = \ 4 | hello \ 5 | dana-benchmark \ 6 | debug-test \ 7 | id \ 8 | trap-00-new-request-no-asid \ 9 | trap-00-supervisor-req-as-user \ 10 | trap-00-write-register-no-asid \ 11 | trap-01-request-antp-not-set \ 12 | trap-02-request-oob-asid \ 13 | trap-03-request-oob-nnid \ 14 | trap-05-request-nn-config-zero-size \ 15 | trap-06-request-invalid-epb 16 | 17 | CFLAGS := $(CFLAGS) \ 18 | -Wall \ 19 | -Werror \ 20 | -static \ 21 | -I$(abs_top_srcdir)/.. 22 | 23 | tests_pk = $(addprefix $(PREFIX)-, $(tests)) 24 | 25 | all: $(tests_pk) 26 | 27 | vpath %.c $(src_dir) 28 | vpath %.h $(src_dir) 29 | 30 | $(PREFIX)-trap-%: trap-%.c $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user-pk.a 31 | $(CC) $(CFLAGS) $< -o $@ $(LFLAGS) -lxfiles-user-pk 32 | $(PREFIX)-dana-benchmark: dana-benchmark.c $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user-pk.a 33 | $(CC) $(CFLAGS) $< -o $@ $(LFLAGS) -lxfiles-user-pk 34 | $(PREFIX)-id: id.c $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user-pk.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-supervisor.a 35 | $(CC) $(CFLAGS) $< -o $@ $(LFLAGS) -lxfiles-user-pk -lxfiles-supervisor 36 | $(PREFIX)-%: %.c $(XFILES_LIBRARIES) $(libfann_dep) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a 37 | $(CC) $(CFLAGS) $< -o $@ $(LFLAGS) -lxfiles-user 38 | $(PREFIX)-%.S: $(dir_build)/%.rv 39 | $(OBJDUMP) -S $< > $@ 40 | 41 | junk += $(tests_pk) 42 | -------------------------------------------------------------------------------- /tests/pk/antw-config.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include 4 | 5 | #include "tests/libs/src/include/xfiles-supervisor.h" 6 | 7 | int main() { 8 | ant_entry asidEntry; 9 | nn_config nnidEntry; 10 | 11 | printf("(sizeAntStruct,%ld)\n" 12 | "(sizeAsidStruct,%ld)\n" 13 | "(sizeNnidStruct,%ld)\n" 14 | "(sizeIoStruct,%ld)\n" 15 | "(sizeQueueStruct,%ld)\n" 16 | "(offsetNnidPtr,%ld)\n" 17 | "(offsetEpb,%ld)\n" 18 | "(offsetConfig,%ld)\n", 19 | sizeof(ant), 20 | sizeof(ant_entry), 21 | sizeof(nn_config), 22 | sizeof(io), 23 | sizeof(queue), 24 | (uint64_t) &asidEntry.asid_nnid_p - (uint64_t) &asidEntry, 25 | (uint64_t) &nnidEntry.elements_per_block - (uint64_t) &nnidEntry, 26 | (uint64_t) &nnidEntry.config_p - (uint64_t) &nnidEntry); 27 | // sizeof()); 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /tests/pk/dana-benchmark.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | /********** 4 | * Author: Craig Einstein 5 | * 6 | * File: DANA_BENCHMARK.h 7 | * 8 | * Description: Header file for DANA_BENCHMARK 9 | * DANA_BENCHMARK runs transactional testing on the XFILES/DANA System 10 | * 11 | **********/ 12 | #ifndef __SRC_TEST_RV_DANA_BENCHMARK__ 13 | #define __SRC_TEST_RV_DANA_BENCHMARK__ 14 | 15 | /*Struct that contains transactional information 16 | Each struct contains: 17 | - A neural network 18 | - The number of inputs 19 | - The number of outputs 20 | - The input array 21 | - The output array 22 | */ 23 | typedef struct Transaction{ 24 | char net[100]; 25 | int input; 26 | int output; 27 | element_type * input_array; 28 | element_type * output_array; 29 | } Transaction; 30 | 31 | /* 32 | The main method of DANA_BENCHMARK.c does the following: 33 | - Parses the command line input (format defined in the README) 34 | - Creates the ASID_NNID Table, ASID, and ANTP 35 | - Queries the XFILES/DANA system with the desired number of concurrent transactions 36 | - Waits for the output of the desired number of concurrent outputs. 37 | */ 38 | 39 | /*Transaction Function 40 | This function completes the following: 41 | - Attaches the transaction's neural network to the asid_nnid table 42 | - Creates write request (and gets the tid) 43 | - Creates an input array 44 | - Creates an output array 45 | - Writes the input array to the transaction 46 | - Returns the tid 47 | Takes an asid_nnid table, an asid, and a transaction as input 48 | */ 49 | tid_type create_transaction( 50 | ant * table, 51 | asid_type asid, 52 | Transaction * transaction); 53 | 54 | //Auxillary Functions 55 | 56 | //Function that prints out debug messages if the user chooses 57 | void debug(const char * output, ...); 58 | 59 | //Copies a character array into another character array 60 | void strcopy(char *file, char str[]); 61 | 62 | //Nicely prints an array 63 | void printArr(int * arr, int size); 64 | 65 | //Nicely prints a transaction (if debugging is on) 66 | void printTransaction(Transaction * transaction); 67 | 68 | #endif // __SRC_TEST_RV_DANA_BENCHMARK__ 69 | -------------------------------------------------------------------------------- /tests/pk/debug-test.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "tests/libs/src/include/xfiles-debug.h" 8 | 9 | int main(int argc, char **argv) { 10 | 11 | uint64_t data[4]; 12 | data[0] = 0xaaaa; 13 | data[1] = 0xbbbb; 14 | data[2] = 0xcccc; 15 | data[3] = 0xdddd; 16 | 17 | xlen_t out; 18 | printf("[test] Testing register interface (action 0x%x)...\n", a_REG); 19 | printf("[test] - sent: 0x%lx\n", data[0]); 20 | out = debug_echo_via_reg(data[0]); 21 | printf("[test] received: 0x%lx\n", out); 22 | assert(out == data[0]); 23 | 24 | printf("[test] Testing L1 read (action 0x%x)...\n", a_MEM_READ); 25 | for (size_t i = 0; i < 4; ++i) { 26 | printf("[test] - virtual address: 0x%p\n", &data[i]); 27 | xlen_t * data_p = debug_virt_to_phys(&data[i]); 28 | printf("[test] physical address: 0x%p\n", data_p); 29 | out = debug_read_mem(data_p); 30 | printf("[test] data: 0x%lx\n", out); 31 | } 32 | 33 | printf("[test] Testing L1 write (action 0x%x)...\n", a_MEM_WRITE); 34 | xlen_t copy_l1[4]; 35 | for (size_t i = 0; i < 4; ++i) { 36 | xlen_t * copy_p = (xlen_t *) debug_virt_to_phys(©_l1[i]); 37 | printf("[test] Write %p\n", copy_p); 38 | out = debug_write_mem(data[i], copy_p); 39 | assert(out == 0); 40 | assert(data[i] == copy_l1[i]); 41 | } 42 | 43 | printf("[test] Testing translation (action 0x%x)...\n", a_VIRT_TO_PHYS); 44 | out = (xlen_t) debug_virt_to_phys(&data); 45 | assert(out != -1); 46 | 47 | printf("[test] Testing L2 read (action 0x%x)...\n", a_UTL_READ); 48 | for (size_t i = 0; i < 4; ++i) { 49 | xlen_t * data_p = (xlen_t *) debug_virt_to_phys(&data[i]); 50 | printf("[test] Read PHYS %p (VIRT: %p)\n", data_p, &data[i]); 51 | out = debug_read_utl(data_p); 52 | printf("[test] Got 0x%lx\n", out); 53 | assert(out == data[i]); 54 | } 55 | 56 | printf("[test] Testing L2 write (action 0x%x)...\n", a_UTL_WRITE); 57 | xlen_t copy_l2[4]; 58 | for (size_t i = 0; i < 4; ++i) { 59 | xlen_t * copy_p = (xlen_t *) debug_virt_to_phys(©_l2[i]); 60 | printf("[test] Write 0x%lx to PHYS %p (VIRT: %p)\n", 61 | data[i], copy_p, ©_l2[i]); 62 | out = debug_write_utl(data[i], copy_p); 63 | printf("[test] Got 0x%lx\n", copy_l2[i]); 64 | assert(out == 0); 65 | assert(copy_l2[i] == data[i]); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /tests/pk/hello.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include 4 | #include 5 | 6 | int main() { 7 | 8 | printf("\n === ===\n" 9 | " \\\\ //\n" 10 | " --- \\ /\n" 11 | "T H E | X | F I L E S\n" 12 | " --- / \\ \n" 13 | " // \\\\ \n" 14 | " === ===\n" 15 | "\n" 16 | " GILLIAN\n" 17 | " ANDERSON as DANA\n"); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /tests/pk/id.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user.h" 4 | 5 | int main (int argc, char ** argv) { 6 | xlen_t id = xfiles_dana_id(1); 7 | printf("[info] got id: 0x%lx\n", id); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/pk/mt19937ar.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | /* 4 | A C-program for MT19937, with initialization improved 2002/1/26. 5 | Coded by Takuji Nishimura and Makoto Matsumoto. 6 | 7 | Before using, initialize the state by using init_genrand(seed) 8 | or init_by_array(init_key, key_length). 9 | 10 | Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, 11 | All rights reserved. 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions 15 | are met: 16 | 17 | 1. Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | 20 | 2. Redistributions in binary form must reproduce the above copyright 21 | notice, this list of conditions and the following disclaimer in the 22 | documentation and/or other materials provided with the distribution. 23 | 24 | 3. The names of its contributors may not be used to endorse or promote 25 | products derived from this software without specific prior written 26 | permission. 27 | 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 32 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 33 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 34 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 35 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 36 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 37 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 38 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | 40 | 41 | Any feedback is very welcome. 42 | http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html 43 | email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) 44 | */ 45 | 46 | #include "mt19937ar.h" 47 | 48 | int main(void) 49 | { 50 | int i; 51 | unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4; 52 | init_by_array(init, length); 53 | printf("1000 outputs of genrand_int32()\n"); 54 | for (i=0; i<1000; i++) { 55 | printf("%10lu ", genrand_int32()); 56 | if (i%5==4) printf("\n"); 57 | } 58 | printf("\n1000 outputs of genrand_real2()\n"); 59 | for (i=0; i<1000; i++) { 60 | printf("%10.8f ", genrand_real2()); 61 | if (i%5==4) printf("\n"); 62 | } 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /tests/pk/trap-00-new-request-no-asid.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user.h" 4 | 5 | int main() { 6 | new_write_request(0, 0, 0); 7 | while(1) {}; 8 | } 9 | -------------------------------------------------------------------------------- /tests/pk/trap-00-supervisor-req-as-user.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | 5 | int main() { 6 | asid_type asid = 2; 7 | tid_type tid = 0; 8 | pk_syscall_set_asid(1); 9 | set_asid(&asid, &tid); 10 | while(1) {}; 11 | } 12 | -------------------------------------------------------------------------------- /tests/pk/trap-00-write-register-no-asid.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user.h" 4 | 5 | int main() { 6 | write_register(0, 0, 0); 7 | while(1) {}; 8 | } 9 | -------------------------------------------------------------------------------- /tests/pk/trap-01-request-antp-not-set.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | 5 | int main() { 6 | pk_syscall_set_asid(1); 7 | tid_type tid = new_write_request(0, 0, 0); 8 | element_type junk = 0; 9 | write_data(tid, &junk, 1); 10 | while(1) {}; 11 | } 12 | -------------------------------------------------------------------------------- /tests/pk/trap-02-request-oob-asid.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | #include "tests/libs/src/include/xfiles-asid-nnid-table.h" 5 | 6 | int main() { 7 | pk_syscall_set_asid(2); 8 | 9 | ant * ant; 10 | asid_nnid_table_create(&ant, 2, 4); 11 | pk_syscall_set_antp(ant); 12 | 13 | tid_type tid = new_write_request(0, 0, 0); 14 | element_type junk = 0; 15 | write_data(tid, &junk, 1); 16 | while(1) {}; 17 | } 18 | -------------------------------------------------------------------------------- /tests/pk/trap-03-request-oob-nnid.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | #include "tests/libs/src/include/xfiles-asid-nnid-table.h" 5 | 6 | int main() { 7 | pk_syscall_set_asid(1); 8 | 9 | ant * ant; 10 | asid_nnid_table_create(&ant, 2, 4); 11 | attach_garbage(&ant, 1); 12 | attach_garbage(&ant, 1); 13 | attach_garbage(&ant, 1); 14 | attach_garbage(&ant, 1); 15 | pk_syscall_set_antp(ant); 16 | 17 | tid_type tid = new_write_request(4, 0, 0); 18 | element_type junk = 0; 19 | write_data(tid, &junk, 1); 20 | while (1) {}; 21 | } 22 | -------------------------------------------------------------------------------- /tests/pk/trap-05-request-nn-config-zero-size.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | #include "tests/libs/src/include/xfiles-asid-nnid-table.h" 5 | 6 | int main() { 7 | pk_syscall_set_asid(1); 8 | 9 | ant * ant; 10 | asid_nnid_table_create(&ant, 2, 4); 11 | attach_garbage(&ant, 1); 12 | attach_garbage(&ant, 1); 13 | attach_garbage(&ant, 1); 14 | attach_garbage(&ant, 1); 15 | pk_syscall_set_antp(ant); 16 | 17 | tid_type tid = new_write_request(0, 0, 0); 18 | element_type junk = 0; 19 | write_data(tid, &junk, 1); 20 | while (1) {}; 21 | } 22 | -------------------------------------------------------------------------------- /tests/pk/trap-06-request-invalid-epb.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include "tests/libs/src/include/xfiles-user-pk.h" 4 | #include "tests/libs/src/include/xfiles-asid-nnid-table.h" 5 | 6 | int main() { 7 | pk_syscall_set_asid(1); 8 | 9 | ant * ant; 10 | asid_nnid_table_create(&ant, 2, 4); 11 | attach_nn_configuration(&ant, 1, "../xfiles-dana/build/nets/xorSigmoidSymmetric-fixed.128bin"); 12 | pk_syscall_set_antp(ant); 13 | 14 | tid_type tid = new_write_request(0, 0, 0); 15 | element_type junk = 0; 16 | write_data(tid, &junk, 1); 17 | while (1) {}; 18 | } 19 | -------------------------------------------------------------------------------- /tests/smoke/Makefile: -------------------------------------------------------------------------------- 1 | include $(abs_top_srcdir)/Makefrag 2 | 3 | tests = \ 4 | debug \ 5 | id \ 6 | csr 7 | 8 | tests_p = $(addprefix $(PREFIX)-p-, $(tests)) 9 | 10 | CFLAGS := $(CFLAGS) \ 11 | -static \ 12 | -mcmodel=medany \ 13 | -fvisibility=hidden \ 14 | -nostdlib \ 15 | -nostartfiles \ 16 | -I$(abs_top_srcdir)/.. \ 17 | -DID_STRING=$(ID_STRING) 18 | LIBS := $(LIBS) \ 19 | -lxfiles-user \ 20 | -lxfiles-supervisor \ 21 | 22 | HEADERS = $(shell find \ 23 | $(abs_top_srcdir)/rocc-software/src \ 24 | $(abs_top_srcdir)/libs/src) 25 | 26 | all: $(tests_p) 27 | 28 | vpath %.S $(src_dir) 29 | 30 | $(PREFIX)-p-%: %.S $(HEADERS_P) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-debug.a 31 | $(CC) $(CFLAGS) -I$(ENV_P) -T$(ENV_P)/link.ld $< $(LFLAGS) -o $@ $(LIBS) 32 | 33 | $(PREFIX)-v-%: %.S $(HEADERS_V) $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-user.a $(abs_top_srcdir)/libs/build/$(TARGET)/libxfiles-debug.a 34 | $(CC) $(CFLAGS) -I$(ENV_V) -T$(ENV_V)/link.ld $(LFLAGS) $< -o $@ $(LIBS) 35 | 36 | junk += $(tests_p) 37 | -------------------------------------------------------------------------------- /tests/smoke/csr.S: -------------------------------------------------------------------------------- 1 | # See LICENSE.IBM for license details. 2 | 3 | #***************************************************************************** 4 | # csr.S 5 | #----------------------------------------------------------------------------- 6 | # 7 | # Check that we can read and write CSRs 8 | # 9 | 10 | #include "riscv_test.h" 11 | #include "../riscv-tools/riscv-tests/isa/macros/scalar/test_macros.h" 12 | #include "tests/rocc-software/src/xcustom.h" 13 | #include "tests/rocc-software/src/riscv_test_rocc.h" 14 | #include "tests/libs/src/xfiles-supervisor.S" 15 | #include "tests/libs/src/xfiles-user.S" 16 | 17 | #define CUSTOM_X 0 18 | #define STACK_TOP (_end + 4096) 19 | #ifndef ID_STRING 20 | #define ID_STRING 0x0002000000001012 21 | // * Transaction Table Entries: 2 [63:48] 22 | // * Elements Per Block: 4 [13:10] 23 | // * Processing Elements: 1 [ 9: 4] 24 | // * Cache Entries: 2 [ 3: 0] 25 | #endif 26 | #define ID_STRING_CURRENT 0x0001000000001001 27 | 28 | // RVTEST_CODE_BEGIN includes the EXTRA_INIT macro before its final 29 | // `mret` and the resulting drop to user mode. We use this to setup 30 | // the ASID and ANTP for a single transaction test. 31 | #undef EXTRA_INIT 32 | #define EXTRA_INIT \ 33 | XF_WRITE_CSR(CSRs_ttable_size, 0x1); \ 34 | XF_WRITE_CSR(CSRs_asid, 0x14); \ 35 | XF_WRITE_CSR(CSRs_tid, 0x15); \ 36 | XF_WRITE_CSR(CSRs_pe_size, 0x0); \ 37 | XF_WRITE_CSR(CSRs_cache_size, 0x1); \ 38 | XF_WRITE_CSR(CSRs_pe_cooldown, 0x7); \ 39 | XF_WRITE_CSR(CSRs_antp, 0x103); \ 40 | XF_WRITE_CSR(CSRs_num_asids, 0x104); \ 41 | XF_WRITE_CSR(CSRs_pe_governor, 0x1); \ 42 | TEST_CASE( 1, a0, 0x1, XF_READ_CSR(CSRs_ttable_size) ); \ 43 | TEST_CASE( 2, a0, ID_STRING, XF_READ_CSR(CSRs_xfid) ); \ 44 | TEST_CASE( 3, a0, ID_STRING_CURRENT, XF_READ_CSR(CSRs_xfid_current) ); \ 45 | TEST_CASE( 4, a0, 0x14, XF_READ_CSR(CSRs_asid) ); \ 46 | TEST_CASE( 5, a0, 0x15, XF_READ_CSR(CSRs_tid) ); \ 47 | TEST_CASE( 10, a0, 0x0, XF_READ_CSR(CSRs_pe_size) ); \ 48 | TEST_CASE( 11, a0, 0x1, XF_READ_CSR(CSRs_cache_size) ); \ 49 | TEST_CASE( 12, a0, 0x7, XF_READ_CSR(CSRs_pe_cooldown) ); \ 50 | TEST_CASE( 13, a0, 0x103, XF_READ_CSR(CSRs_antp) ); \ 51 | TEST_CASE( 14, a0, 0x104, XF_READ_CSR(CSRs_num_asids) ); \ 52 | TEST_CASE( 15, a0, 0x1, XF_READ_CSR(CSRs_pe_governor) ); \ 53 | la sp, _end + 1024; 54 | 55 | RVTEST_WITH_ROCC 56 | 57 | start: 58 | 59 | RVTEST_CODE_BEGIN 60 | TEST_CASE( 20, a0, 0x0, XF_READ_CSR(CSRs_fence); \ 61 | srli a0, a0, 17); 62 | TEST_CASE( 21, a0, 0x0, DANA_SYNC(0xdead)); 63 | TEST_CASE( 22, a0, 0x0, DANA_FENCE(0xdead)); 64 | 65 | TEST_PASSFAIL 66 | 67 | RVTEST_CODE_END 68 | 69 | .data 70 | RVTEST_DATA_BEGIN 71 | 72 | TEST_DATA 73 | 74 | RVTEST_DATA_END 75 | -------------------------------------------------------------------------------- /tests/smoke/debug.S: -------------------------------------------------------------------------------- 1 | # See LICENSE.IBM for license details. 2 | 3 | #***************************************************************************** 4 | # debug.S 5 | #----------------------------------------------------------------------------- 6 | # 7 | # Exercise the debug unit 8 | # 9 | 10 | #include "riscv_test.h" 11 | #include "../riscv-tools/riscv-tests/isa/macros/scalar/test_macros.h" 12 | #include "tests/rocc-software/src/xcustom.h" 13 | #include "tests/rocc-software/src/riscv_test_rocc.h" 14 | #include "tests/libs/src/xfiles-debug.S" 15 | 16 | 17 | #define CUSTOM_X 0 18 | 19 | RVTEST_WITH_ROCC 20 | 21 | RVTEST_CODE_BEGIN 22 | TEST_CASE( 1, x10, 0xaaaa, DEBUG_ECHO_VIA_REG(0xaaaa) ); 23 | TEST_CASE( 2, x10, 0x0, DEBUG_WRITE_MEM(0xbbbb, tdat2) ); 24 | TEST_CASE( 3, x10, 0xbbbb, DEBUG_READ_MEM(tdat3) ); 25 | TEST_CASE( 4, x10, 0, DEBUG_WRITE_UTL(0xcccc, tdat4) ); 26 | TEST_CASE( 5, x10, 0xcccc, DEBUG_READ_UTL(tdat5) ); 27 | 28 | TEST_PASSFAIL 29 | 30 | RVTEST_CODE_END 31 | 32 | .data 33 | RVTEST_DATA_BEGIN 34 | 35 | TEST_DATA 36 | 37 | tdat: 38 | tdat2: 39 | tdat3: .dword 0x0 40 | tdat4: 41 | tdat5: .dword 0x0 42 | 43 | RVTEST_DATA_END 44 | -------------------------------------------------------------------------------- /tests/smoke/id.S: -------------------------------------------------------------------------------- 1 | # See LICENSE.IBM for license details. 2 | 3 | #***************************************************************************** 4 | # id.S 5 | #----------------------------------------------------------------------------- 6 | # 7 | # Check that the ID matches what we expect 8 | # 9 | 10 | #include "riscv_test.h" 11 | #include "../riscv-tools/riscv-tests/isa/macros/scalar/test_macros.h" 12 | #include "tests/rocc-software/src/xcustom.h" 13 | #include "tests/rocc-software/src/riscv_test_rocc.h" 14 | #include "tests/libs/src/xfiles-user.S" 15 | 16 | #define CUSTOM_X 0 17 | 18 | // ID string parameters 19 | #ifndef ID_STRING 20 | #define ID_STRING 0x0001000000001012 21 | // * Transaction Table Entries: 1 [63:48] 22 | // * Elements Per Block: 4 [13:10] 23 | // * Processing Elements: 1 [ 9: 4] 24 | // * Cache Entries: 2 [ 3: 0] 25 | #endif 26 | 27 | RVTEST_WITH_ROCC 28 | 29 | RVTEST_CODE_BEGIN 30 | TEST_CASE( 1, a0, ID_STRING, XFILES_DANA_ID ) 31 | 32 | TEST_PASSFAIL 33 | 34 | RVTEST_CODE_END 35 | 36 | .data 37 | RVTEST_DATA_BEGIN 38 | 39 | TEST_DATA 40 | 41 | RVTEST_DATA_END 42 | -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | bin -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | DIR_TOP = $(abspath ..) 2 | TARGET = host 3 | 4 | include ../Makefrag 5 | 6 | DIR_SRC = $(DIR_TOP)/tools/src 7 | DIR_BIN = $(DIR_TOP)/tools/bin 8 | DIR_INC = $(DIR_SRC)/include 9 | COMMA = , 10 | 11 | TOOLS = \ 12 | fann-float-to-fixed \ 13 | write-fann-config-for-accelerator \ 14 | bin-config-to-c-header \ 15 | fann-train-to-c-header \ 16 | fann-train-to-c-header-fixed \ 17 | fann-random \ 18 | fann-train \ 19 | fann-eval \ 20 | fann-eval-fixed \ 21 | fann-image 22 | BINS = $(addprefix $(DIR_BIN)/, $(TOOLS)) 23 | 24 | vpath %.c src 25 | 26 | .PHONY: all clean 27 | 28 | all: $(BINS) 29 | 30 | include common/Makefrag-rv 31 | include common/Makefrag-submodule 32 | 33 | INCLUDE_PATHS = $(DIR_TOP) 34 | 35 | LIB_PATHS = \ 36 | $(DIR_BUILD)/$(TARGET) \ 37 | $(DIR_BUILD)/fann/$(TARGET) \ 38 | $(DIR_TOP)/tests/libs/build/$(TARGET) 39 | LDIRS = \ 40 | $(addprefix -Wl$(COMMA)-R, $(shell echo $(LIB_PATHS) | xargs -n1 readlink -f)) \ 41 | $(addprefix -L, $(LIB_PATHS)) 42 | 43 | .INTERMEDIATE: $(DIR_BIN)/fann-train-to-c-header.o \ 44 | $(DIR_BIN)/fann-eval-fixed.o \ 45 | $(DIR_BIN)/write-fann-config-for-accelerator.o \ 46 | $(DIR_BIN)/fann-train-to-c-header.o \ 47 | $(DIR_BIN)/fann-eval.o \ 48 | $(DIR_BIN)/fann-train.o \ 49 | $(DIR_BIN)/fann-random.o \ 50 | $(DIR_BIN)/fann-float-to-fixed.o \ 51 | $(DIR_BIN)/generate-ant.o \ 52 | $(DIR_BIN)/fann-image.o 53 | 54 | $(DIR_BIN)/generate-ant: $(DIR_BIN)/generate-ant.o $(DIR_TOP)/tests/libs/build/$(TARGET)/libxfiles-ant.a $(libfann_dep) 55 | $(CC) $(CFLAGS) $< $(LDIRS) -lxfiles-ant -o $@ 56 | 57 | # No pattern rules as I need to be explicit about what is linking 58 | # against FANN since it's LGPLv2 59 | 60 | # Fixed FANN 61 | $(DIR_BIN)/fann-train-to-c-header-fixed: $(DIR_BIN)/fann-train-to-c-header.o $(libfann_dep) 62 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfixedfann -fopenmp -o $@ 63 | $(DIR_BIN)/fann-eval-fixed: $(DIR_BIN)/fann-eval-fixed.o $(libfann_dep) 64 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfixedfann -fopenmp -o $@ 65 | $(DIR_BIN)/write-fann-config-for-accelerator: $(DIR_BIN)/write-fann-config-for-accelerator.o $(libfann_dep) 66 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfixedfann -fopenmp -o $@ 67 | 68 | # FANN 69 | $(DIR_BIN)/fann-train-to-c-header: $(DIR_BIN)/fann-train-to-c-header.o $(libfann_dep) 70 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfann -fopenmp -o $@ 71 | $(DIR_BIN)/fann-eval: $(DIR_BIN)/fann-eval.o $(libfann_dep) 72 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfann -fopenmp -o $@ 73 | $(DIR_BIN)/fann-image: $(DIR_BIN)/fann-image.o $(libfann_dep) 74 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfann -lpng -fopenmp -o $@ 75 | $(DIR_BIN)/fann-train: $(DIR_BIN)/fann-train.o $(libfann_dep) 76 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfann -fopenmp -o $@ 77 | $(DIR_BIN)/fann-random: $(DIR_BIN)/fann-random.o $(libfann_dep) 78 | $(CC) $(CFLAGS) $< $(LDIRS) -lm -lfann -fopenmp -o $@ 79 | $(DIR_BIN)/fann-float-to-fixed: $(DIR_BIN)/fann-float-to-fixed.o $(libfann_dep) 80 | $(CC) $(CFLAGS) $< $(LDIRS) -lfann -o $@ 81 | 82 | $(DIR_BIN)/fann-eval-fixed.o: fann-eval.c | $(DIR_BIN) 83 | $(CC) $(CFLAGS) -DFIXEDFANN $< -c -o $@ 84 | $(DIR_BIN)/%.o: %.c | $(DIR_BIN) 85 | $(CC) $(CFLAGS) $< -c -o $@ 86 | 87 | $(DIR_BIN): 88 | mkdir -p $@ 89 | 90 | clean: 91 | rm -rf $(DIR_BIN) 92 | -------------------------------------------------------------------------------- /tools/common/Makefrag-rv: -------------------------------------------------------------------------------- 1 | #-*- mode: makefile-*- 2 | 3 | # RISC-V related options 4 | ifeq "$(TARGET)" "host" 5 | CFLAGS := $(CFLAGS) -DNO_VM=1 6 | else 7 | TARGET_DASH = $(TARGET)- 8 | endif 9 | CC = $(TARGET_DASH)gcc 10 | CXX = $(TARGET_DASH)g++ 11 | AR = $(TARGET_DASH)ar 12 | OBJDUMP = $(TARGET_DASH)objdump 13 | dir_build = $(DIR_BUILD)/$(TARGET) 14 | 15 | CFLAGS += \ 16 | -Wall \ 17 | -Werror \ 18 | --std=gnu11 \ 19 | -I$(DIR_TOP) \ 20 | -I$(DIR_TOP)/tests/libs \ 21 | -I$(DIR_BUILD)/nets 22 | CFLAGS_RV += \ 23 | $(CFLAGS) \ 24 | -static 25 | LFLAGS = \ 26 | -L$(DIR_TOP)/tests/libs/build/$(TARGET) \ 27 | -L$(DIR_BUILD)/fann/$(TARGET) 28 | -------------------------------------------------------------------------------- /tools/common/Makefrag-submodule: -------------------------------------------------------------------------------- 1 | #-*- mode: makefile-*- 2 | 3 | SUBMODULE_FANN=$(DIR_TOP)/fann/.git 4 | SUBMODULE_HDL_SCRIPTS=$(DIR_TOP)/util/hdl-scripts/.git 5 | 6 | .PRECIOUS: \ 7 | $(SUBMODULE_FANN) \ 8 | $(SUBMODULES_HDL_SCRIPTS) 9 | 10 | # Grab submodules 11 | $(DIR_TOP)/fann/.git: 12 | git submodule update --init $(DIR_TOP)/fann 13 | $(DIR_TOP)/util/hdl-scripts/.git: 14 | git submodule update --init $(DIR_TOP)/util/hdl-scripts 15 | 16 | # Makefile for building host and RISC-V targets of FANN 17 | dir_host = $(DIR_BUILD)/fann/host 18 | cmake_flags_host = -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=$(dir_host) \ 19 | -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=$(dir_host) \ 20 | -DCMAKE_C_FLAGS="-DFANN_NO_SEED" \ 21 | -DCMAKE_CXX_FLAGS="-DFANN_NO_SEED" 22 | $(dir_host)/lib%: | $(dir_host) $(SUBMODULE_FANN) 23 | cd $(dir_host) && \ 24 | cmake $(cmake_flags_host) $(DIR_TOP)/fann &&\ 25 | $(MAKE) 26 | 27 | # Newer versions of FANN includes additional tests which newlib cannot 28 | # build. Hence, this target is set to ignore all build errors. This is 29 | # dangerous, but I don't see a way around it without me putting a fix 30 | # in FANN. 31 | dir_newlib = $(DIR_BUILD)/fann/riscv64-unknown-elf 32 | cmake_flags_newlib = -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=$(dir_newlib) \ 33 | -DPKGCONFIG_INSTALL_DIR=$(dir_newlib) \ 34 | -DINCLUDE_INSTALL_DIR=$(dir_newlib) \ 35 | -DLIB_INSTALL_DIR=$(dir_newlib) \ 36 | -DCMAKE_CONFIG_DIR=$(dir_newlib) \ 37 | -DCMAKE_CURRENT_BINARY_DIR=$(dir_newlib) \ 38 | -DCMAKE_C_COMPILER=$(CC) \ 39 | -DCMAKE_CXX_COMPILER=$(CXX) \ 40 | -DCMAKE_SYSTEM_NAME=Generic \ 41 | -DDISABLE_PARALLEL_FANN=1 \ 42 | -DBUILD_SHARED_LIBS=OFF 43 | $(dir_newlib)/libfann.a $(dir_newlib)/libfixedfann.a $(dir_newlib)/libdoublefann.a $(dir_newlib)/libfloatfann.a: | $(dir_newlib) $(SUBMODULE_FANN) 44 | cd $(dir_newlib) && \ 45 | cmake $(cmake_flags_newlib) $(DIR_TOP)/fann &&\ 46 | $(MAKE) -k || true 47 | 48 | dir_linux = $(DIR_BUILD)/fann/riscv64-unknown-linux-gnu 49 | cmake_flags_linux = -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=$(dir_linux) \ 50 | -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=$(dir_linux) \ 51 | -DPKGCONFIG_INSTALL_DIR=$(dir_linux) \ 52 | -DINCLUDE_INSTALL_DIR=$(dir_linux) \ 53 | -DLIB_INSTALL_DIR=$(dir_linux) \ 54 | -DCMAKE_CONFIG_DIR=$(dir_linux) \ 55 | -DCMAKE_CURRENT_BINARY_DIR=$(dir_linux) \ 56 | -DCMAKE_C_COMPILER=$(CC) \ 57 | -DCMAKE_CXX_COMPILER=$(CXX) \ 58 | -DDISABLE_PARALLEL_FANN=1 59 | $(dir_linux)/libfann.a $(dir_linux)/libfixedfann.a $(dir_linux)/libdoublefann.a $(dir_linux)/libfloatfann.a: | $(dir_linux) $(SUBMODULE_FANN) 60 | cd $(dir_linux) && \ 61 | cmake $(cmake_flags_linux) $(DIR_TOP)/fann && \ 62 | $(MAKE) 63 | 64 | $(dir_host) $(dir_newlib) $(dir_linux): 65 | mkdir -p $@ 66 | -------------------------------------------------------------------------------- /tools/common/Makefrag-tools: -------------------------------------------------------------------------------- 1 | #-*- mode: makefile-*- 2 | 3 | # Compiled tools 4 | BIN_TO_C_HEADER = $(DIR_TOP)/tools/bin/bin-config-to-c-header 5 | FLOAT_TO_FIXED = $(DIR_TOP)/tools/bin/fann-float-to-fixed 6 | FANN_RANDOM = $(DIR_TOP)/tools/bin/fann-random 7 | FANN_TRAIN = $(DIR_TOP)/tools/bin/fann-train 8 | TRAIN_TO_C_HEADER = $(DIR_TOP)/tools/bin/fann-train-to-c-header 9 | TRAIN_TO_C_HEADER_FIXED = $(DIR_TOP)/tools/bin/fann-train-to-c-header-fixed 10 | WRITE_FANN_CONFIG = $(DIR_TOP)/tools/bin/write-fann-config-for-accelerator 11 | FANN_EVAL = $(DIR_TOP)/tools/bin/fann-eval 12 | FANN_EVAL_FIXED = $(DIR_TOP)/tools/bin/fann-eval-fixed 13 | # Scripts 14 | FANN_CHANGE_FIXED_POINT = $(DIR_TOP)/tools/scripts/fann-change-fixed-point 15 | FANN_TRAIN_TO_FIXED = $(DIR_TOP)/tools/scripts/fann-data-to-fixed 16 | GEN_BOOLEAN_DATA = $(DIR_TOP)/tools/scripts/gen-boolean-data 17 | GEN_MATH_DATA = $(DIR_TOP)/tools/scripts/gen-math-data 18 | GEN_VIDEO = $(DIR_TOP)/tools/scripts/gen-trace-video 19 | GEN_TEST_MEM = $(DIR_TOP)/tools/scripts/generate_test_mem.py 20 | 21 | NETS_TOOLS = \ 22 | $(FLOAT_TO_FIXED) \ 23 | $(WRITE_FANN_CONFIG) \ 24 | $(BIN_TO_C_HEADER) \ 25 | $(TRAIN_TO_C_HEADER) \ 26 | $(FANN_RANDOM) \ 27 | $(FANN_TRAIN) \ 28 | $(FANN_EVAL) \ 29 | $(FANN_EVAL_FIXED) 30 | 31 | $(DIR_TOP)/tools/bin/%: $(DIR_BUILD)/fann/host/libfann.so 32 | $(MAKE) -C $(DIR_TOP)/tools $(DIR_TOP)/tools/bin/$* 33 | -------------------------------------------------------------------------------- /tools/common/Makefrag-video: -------------------------------------------------------------------------------- 1 | #-*- mode: makefile-*- 2 | 3 | DIR_VIDEO=$(DIR_BUILD)/video 4 | 5 | VID_DAT=$(addprefix $(DIR_VIDEO)/, $(addsuffix -float-x0.dat, $(TRAIN_SIN))) \ 6 | $(addprefix $(DIR_VIDEO)/, $(addsuffix -float-x1.dat, $(TRAIN_SIN))) 7 | VID_SIN=$(addprefix $(DIR_VIDEO)/, $(addsuffix -float-x0.mp4, $(TRAIN_SIN))) \ 8 | $(addprefix $(DIR_VIDEO)/, $(addsuffix -float-x1.mp4, $(TRAIN_SIN))) 9 | 10 | VIDEOS=$(VID_SIN) 11 | 12 | video: tools nets $(DIR_VIDEO) $(VIDEOS) 13 | 14 | $(DIR_VIDEO)/sin-%-float-x0.dat: $(DIR_BUILD_NETS)/sin-float.net $(DIR_BUILD_NETS)/sin-%-float.train 15 | $(FANN_TRAIN) -n $< -t $(DIR_BUILD_NETS)/sin-$*-float.train -e500 -b $@ -x0 16 | 17 | $(DIR_VIDEO)/sin-%-float-x1.dat: $(DIR_BUILD_NETS)/sin-float.net $(DIR_BUILD_NETS)/sin-%-float.train 18 | $(FANN_TRAIN) -n $< -t $(DIR_BUILD_NETS)/sin-$*-float.train -e500 -b $@ -x1 19 | 20 | $(DIR_VIDEO)/sin-%-float-x0.mp4: $(DIR_VIDEO)/sin-%-float-x0.dat 21 | $(GEN_VIDEO) -t $(DIR_BUILD_NETS)/sin-$*-float.train -l $< -s $@ 22 | 23 | $(DIR_VIDEO)/sin-%-float-x1.mp4: $(DIR_VIDEO)/sin-%-float-x1.dat 24 | $(GEN_VIDEO) -t $(DIR_BUILD_NETS)/sin-$*-float.train -l $< -s $@ 25 | 26 | $(DIR_VIDEO): 27 | mkdir -p $@ 28 | -------------------------------------------------------------------------------- /tools/common/nets.txt: -------------------------------------------------------------------------------- 1 | # Shared NN configurations used by fann-random to generate networks 2 | andSigmoidSymmetric,-l2 -l2 -l1 -a5 -o5 -r0.7 3 | orSigmoidSymmetric,-l2 -l2 -l1 -a5 -o5 -r0.7 4 | xorSigmoid,-r0.7 -l2 -l4 -l1 -a5 -o3 5 | xorSigmoidSymmetric,-l2 -l8 -l1 -a5 -o5 -r0.7 6 | xorSigmoidSymmetricThreeLayer,-l2 -l8 -l9 -l1 -a5 -o5 -r0.7 7 | # XOR variants 8 | xor-sigmoid-4i,-l4 -l4 -l1 -a5 -o3 -r0.7 9 | xor-sigmoid-8i,-l8 -l4 -l1 -a5 -o3 -r.07 10 | xor-sigmoid-16i,-l16 -l4 -l1 -a5 -o3 -r.07 11 | xor-sigmoid-32i,-l32 -l4 -l1 -a5 -o3 -r.07 12 | xor-sigmoid-64i,-l64 -l4 -l1 -a5 -o3 -r.07 13 | xor-sigmoid-128i,-l128 -l4 -l1 -a5 -o3 -r.07 14 | xor-sigmoid-256i,-l256 -l4 -l1 -a5 -o3 -r.07 15 | xor-sigmoid-512i,-l512 -l4 -l1 -a5 -o3 -r.07 16 | xor-sigmoid-1024i,-l1024 -l4 -l1 -a5 -o3 -r.07 17 | xor-sigmoid-4o,-l2 -l4 -l4 -a5 -o3 -r0.7 18 | xor-sigmoid-8o,-l2 -l4 -l8 -a5 -o3 -r.07 19 | xor-sigmoid-16o,-l2 -l4 -l16 -a5 -o3 -r.07 20 | xor-sigmoid-32o,-l2 -l4 -l32 -a5 -o3 -r.07 21 | xor-sigmoid-64o,-l2 -l4 -l64 -a5 -o3 -r.07 22 | xor-sigmoid-128o,-l2 -l4 -l128 -a5 -o3 -r.07 23 | xor-sigmoid-256o,-l2 -l4 -l256 -a5 -o3 -r.07 24 | xor-sigmoid-512o,-l2 -l4 -l512 -a5 -o3 -r.07 25 | xor-sigmoid-1024o,-l2 -l4 -l1024 -a5 -o3 -r.07 26 | # FANN dataset NNs 27 | abelone,-l10 -l8 -l1 -a5 -o3 -r0.7 28 | bank32fm,-l32 -l16 -l1 -a5 -o3 -r0.7 29 | bank32nh,-l32 -l16 -l1 -a5 -o3 -r0.7 30 | building,-l14 -l8 -l3 -a5 -o3 -r0.7 31 | census-house,-l16 -l8 -l4 -l1 -a5 -o3 -r0.7 32 | diabetes,-l8 -l10 -l2 -a5 -o3 -r0.7 33 | gene,-l120 -l20 -l3 -a5 -o3 -r0.7 34 | kin32fm,-l32 -l20 -l1 -a5 -o3 35 | mushroom,-l125 -l32 -l2 -a5 -o3 36 | pumadyn-32fm,-l32 -l16 -l8 -l4 -l1 -a5 -o3 -r0.7 37 | robot,-l48 -l16 -l3 -a5 -o3 -r0.7 38 | soybean,-l82 -l32 -l19 -a5 -o3 -r0.7 39 | thyroid,-l21 -l10 -l3 -a5 -o3 40 | two-spiral,-l2 -l10 -l30 -l3 -l1 -a5 -o3 41 | # Parity configurations 42 | parity-1,-l1 -l1 -l1 -a5 -o5 43 | parity-2,-l2 -l4 -l1 -a5 -o5 -r0.2 44 | parity-3,-l3 -l8 -l1 -a5 -o5 -r0.2 45 | parity-4,-l4 -l9 -l1 -a5 -o5 -r0.7 46 | parity-5,-l5 -l9 -l1 -a5 -o5 -r0.7 47 | parity-6,-l6 -l10 -l1 -a5 -o5 -r0.7 48 | parity-7,-l7 -l12 -l1 -a5 -o5 -r0.7 49 | parity-8,-l8 -l14 -l1 -a5 -o5 -r0.7 50 | parity-9,-l9 -l16 -l1 -a5 -o5 -r0.7 51 | # Parity configurations with the same hidden topology (-f0.5) 52 | parity-same-1,-l1 -l8 -l1 -a5 -o5 -r0.7 53 | parity-same-2,-l2 -l8 -l1 -a5 -o5 -r0.7 54 | parity-same-3,-l3 -l8 -l1 -a5 -o5 -r0.7 55 | parity-same-4,-l4 -l8 -l1 -a5 -o5 -r0.7 56 | parity-same-5,-l5 -l8 -l1 -a5 -o5 -r0.7 57 | parity-same-6,-l6 -l8 -l1 -a5 -o5 -r0.7 58 | parity-same-7,-l7 -l8 -l1 -a5 -o5 -r0.7 59 | parity-same-8,-l8 -l8 -l1 -a5 -o5 -r0.7 60 | parity-same-9,-l9 -l8 -l1 -a5 -o5 -r0.7 61 | # Sine tests 62 | sin,-l1 -l8 -l1 -a3 -o5 -r0.7 63 | sin-scale-0.25,-l1 -l8 -l1 -a3 -o5 -r0.7 64 | sin-scale-0.50,-l1 -l8 -l1 -a3 -o5 -r0.7 65 | sin-scale-0.75,-l1 -l8 -l1 -a3 -o5 -r0.7 66 | sin-scale-1.00,-l1 -l8 -l1 -a3 -o5 -r0.7 67 | sin-scale-1.25,-l1 -l8 -l1 -a3 -o5 -r0.7 68 | sin-scale-1.50,-l1 -l8 -l1 -a3 -o5 -r0.7 69 | sin-scale-1.75,-l1 -l8 -l1 -a3 -o5 -r0.7 70 | sin-scale-2.00,-l1 -l8 -l1 -a3 -o5 -r0.7 71 | sin-scale-2.25,-l1 -l8 -l1 -a3 -o5 -r0.7 72 | sin-scale-2.50,-l1 -l8 -l1 -a3 -o5 -r0.7 73 | sin-scale-2.75,-l1 -l8 -l1 -a3 -o5 -r0.7 74 | sin-scale-3.00,-l1 -l8 -l1 -a3 -o5 -r0.7 75 | sin-scale-3.25,-l1 -l8 -l1 -a3 -o5 -r0.7 76 | sin-scale-3.50,-l1 -l8 -l1 -a3 -o5 -r0.7 77 | sin-scale-3.75,-l1 -l8 -l1 -a3 -o5 -r0.7 78 | sin-scale-4.00,-l1 -l8 -l1 -a3 -o5 -r0.7 79 | sinc,-l1 -l8 -l1 -a3 -o5 -r0.7 80 | gaussian,-l1 -l8 -l1 -a3 -o5 -r0.7 81 | # MNIST 82 | mnist-300,-l784 -l300 -l10 -a5 -o0 --steepness-hidden 1 --steepness-output 1 83 | mnist-1000,-l784 -l1000 -l10 -a5 -o0 --steepness-hidden 1 --steepness-output 1 84 | mnist-300-100,-l784 -l300 -l100 -l10 -a5 -o0 --steepness-hidden 1 --steepness-output 1 85 | mnist-500-150,-l784 -l500 -l150 -l10 -a5 -o0 --steepness-hidden 1 --steepness-output 1 86 | mnist-500-500-2000-30,-l784 -l500 -l500 -l2000 -l30 -l10 -a5 -o0 --steepness-hidden 1 --steepness-output 1 87 | mnist-2500-2000-1500-1000-500,-l784 -l2500 -l2000 -l1500 -l1000 -l500 -l10 -a5 -o0 --steepness-hidden 1 --steepness-output 1 88 | -------------------------------------------------------------------------------- /tools/scripts/binary-to-ram-init: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Getopt::Long; 6 | 7 | sub usage { 8 | print << "END" 9 | Usage: binary-to-ram-init [OPTIONS] 10 | Convert a FANN binary configuration to a Verilog init structure 11 | 12 | Options: 13 | -h, -?, --help print this help and exit 14 | -i, --input input FANN configuration 15 | -w, --block-width the width of one block in bytes 16 | --format output format (see below) 17 | 18 | Output formats: 19 | verilog verilog for initializing a memory (default) 20 | readmemh suitable for the Verilog \$readmemh command 21 | END 22 | } 23 | 24 | sub header { 25 | my $format = shift; 26 | my $filename = shift; 27 | if ($format =~ /^verilog$/) { 28 | return << "END"; 29 | // Initial RAM state for file: 30 | // $filename 31 | initial begin 32 | END 33 | } 34 | if ($format =~ /^readmemh$/) { 35 | return << "END"; 36 | // Initial RAM state for file: 37 | // $filename 38 | END 39 | } 40 | die "[ERROR] Unexpected format in header: $format\n"; 41 | } 42 | 43 | sub line { 44 | my $format = shift; 45 | my $i = shift; 46 | my $width_in_bits = shift; 47 | my $little_endian = shift; 48 | if ($format =~ /^verilog$/) { 49 | return " ram[$i] = $width_in_bits\'h$little_endian;\n"; 50 | } 51 | if ($format =~ /^readmemh$/) { 52 | return "$little_endian\n"; 53 | } 54 | die "[ERROR] Unexpected format in header: $format\n"; 55 | } 56 | 57 | sub footer { 58 | my $format = shift; 59 | if ($format =~ /^verilog$/) { 60 | return "end\n"; 61 | } 62 | if ($format =~ /^readmemh$/) { 63 | return ""; 64 | } 65 | die "[ERROR] Unexpected format in header: $format\n"; 66 | } 67 | 68 | my ($opt_config_input, 69 | $opt_block_width, 70 | $opt_help); 71 | my $opt_format = "verilog"; 72 | GetOptions ("format=s" => \$opt_format, 73 | "h|?|help" => \$opt_help, 74 | "i|input=s" => \$opt_config_input, 75 | "w|block-width=i" => \$opt_block_width) 76 | or usage() and die "Bad options"; 77 | 78 | usage() and die "[ERROR] Unspecified required options" 79 | if not $opt_config_input or not $opt_block_width; 80 | usage() and exit if ($opt_help); 81 | usage() and die "[ERROR] Bad format \"$opt_format\"" 82 | if $opt_format !~ /verilog|readmemh/; 83 | 84 | my $width_in_bits = $opt_block_width * 8; 85 | 86 | # Open the configuration file and read all of it into a buffer. 87 | my $buffer = ""; 88 | open FILE, "<$opt_config_input" or die "Unable to open <$opt_config_input"; 89 | read FILE, $buffer, -s FILE, 0; 90 | close FILE; 91 | 92 | # Go through the file byte by byte and dump it out into a Verilog 93 | # initialization format 94 | my $count = 0; 95 | my $i = 0; 96 | my $little_endian = ""; 97 | print header($opt_format, $opt_config_input); 98 | foreach (split(//, $buffer)) { 99 | $little_endian = sprintf("%02x%s", ord($_), $little_endian); 100 | # printf("%02x ", ord($_)); 101 | if ($count == $opt_block_width - 1) { 102 | print line($opt_format, $i, $width_in_bits, $little_endian); 103 | $little_endian = ""; 104 | $count = 0; 105 | $i++; 106 | next; 107 | } 108 | $count++; 109 | } 110 | print footer($opt_format); 111 | -------------------------------------------------------------------------------- /tools/scripts/danaCache: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | # FIXME: use tempfile -d $cache to generate network base name 4 | 5 | export USAGE="$0 " 6 | 7 | # Absolute path to this script. /home/user/bin/foo.sh 8 | SCRIPT=$(readlink -f $0) 9 | # Absolute path this script is in. /home/user/bin 10 | SCRIPTPATH=$(dirname $SCRIPT) 11 | 12 | export PATH=$PATH:$SCRIPTPATH/../usr/bin 13 | 14 | DANA_DEV=${DANA_DEV:-/dev/ttyUSB0} 15 | FLOATTOFIXEDCMD=fann-float-to-fixed 16 | WRITECONFIGCMD=write-fann-config-for-accelerator 17 | BIN2RAMINITCMD=binary-to-ram-init 18 | BLOCKWIDTHS="16 32 64 128" 19 | DECIMAL_POINT_OFFSET=7 20 | NUMENTRIES=6 21 | 22 | typeset cache=$1 23 | typeset net=$2 24 | typeset -i input_size 25 | typeset -i output_size 26 | typeset netfixed 27 | typeset -i decimal_bits 28 | typeset freelink 29 | typeset freenum 30 | 31 | verify() 32 | { 33 | if ! type -a $FLOATTOFIXEDCMD > /dev/null 34 | then 35 | echo "ERROR: can't find $FLOATTOFIXEDCMD in your path" 36 | exit -1 37 | fi 38 | 39 | if ! type -a $WRITECONFIGCMD > /dev/null 40 | then 41 | echo "ERROR: can't find $FLOATTOFIXEDCMD in your path" 42 | exit -1 43 | fi 44 | } 45 | 46 | trim() 47 | { 48 | trimmed=$1 49 | trimmed=${trimmed%% } 50 | trimmed=${trimmed## } 51 | 52 | echo "$trimmed" 53 | } 54 | 55 | function calc_sizes() 56 | { 57 | # compute input layer and output layer sizes 58 | sizes=$(grep '^layer_sizes=' $cache/$net) 59 | sizes=${sizes##layer_sizes=} 60 | sizes=$(trim $sizes) 61 | 62 | input_size=${sizes%% *} 63 | output_size=${sizes##* } 64 | 65 | # fann has bias node that needs to be removed 66 | (( input_size-- )) 67 | (( output_size-- )) 68 | } 69 | 70 | function calc_decimal_bits() 71 | { 72 | # compute input layer and output layer sizes 73 | str=$(grep '^decimal_point=' $netfixed) 74 | str=${str##decimal_point=} 75 | decimal_bits=$(trim $str) 76 | } 77 | 78 | function create_fixed() 79 | { 80 | nettype=$(head -1 $cache/$net) 81 | netfixed=$cache/${net%%.net}-fixed.net 82 | 83 | [[ -a $netfixed ]] && return 84 | 85 | if [[ $nettype = *_FIX_* ]]; then 86 | cp $cache/$net $netfixed 87 | elif [[ $nettype = *_FLO_* ]]; then 88 | if ! ${FLOATTOFIXEDCMD} $cache/$net $netfixed >/dev/null 2>&1; then 89 | echo "ERROR: in float to fixed: likely due to missmatch in FANN versions" > /dev/stderr 90 | ${FLOATTOFIXEDCMD} $cache/$net /dev/null 91 | exit -1 92 | fi 93 | else 94 | echo "ERROR: unknown fann network type: $nettype" 95 | exit -1 96 | fi 97 | } 98 | 99 | function create_entrydata() 100 | { 101 | for c in $BLOCKWIDTHS; do 102 | bfile=$netfixed.${c}bin 103 | $WRITECONFIGCMD $c $bw$netfixed $bfile $DECIMAL_POINT_OFFSET > /dev/null 104 | $BIN2RAMINITCMD -i $bfile -w $c > ${netfixed}-${c}.v 105 | done 106 | } 107 | 108 | function find_free() 109 | { 110 | b=${BLOCKWIDTHS%% *} 111 | freelink=$(ls -l $cache/entry_*-${b}.v | grep zero | head -1) 112 | freelink=${freelink%% ->*} 113 | freelink=${freelink##* } 114 | freenum=${freelink##*entry_} 115 | freenum=${freenum%%-*} 116 | freelink=$(basename $freelink) 117 | freelink=${freelink%%-*} 118 | } 119 | 120 | function assign() 121 | { 122 | source=$(basename $netfixed) 123 | for b in $BLOCKWIDTHS; do 124 | ln -fs ${source}-${b}.v $cache/entry_${freenum}-${b}.v 125 | done 126 | } 127 | 128 | 129 | verify 130 | 131 | if [[ -z $cache || -z $net ]]; then 132 | echo $USAGE > /dev/stderr 133 | exit -1 134 | fi 135 | 136 | if [[ ! -a $cache ]]; then 137 | # echo "Creating $cache" > /dev/stderr 138 | mkdir -p $cache 139 | # echo "populating with null enties" 140 | cat > $cache/zero.v < /dev/stderr 154 | exit -1 155 | fi 156 | 157 | if [[ -a $cache/$(basename $net) ]]; then 158 | echo "ERROR: $net is already in the cache" > /dev/stderr 159 | exit -1 160 | fi 161 | 162 | if ! cp $net $cache; then 163 | echo "ERROR: copying $net into the cache $cache" > /dev/stderr 164 | exit -1 165 | fi 166 | 167 | net=$(basename $net) 168 | 169 | find_free 170 | 171 | if [[ -z $freelink ]]; then 172 | echo "NO FREE ENTRIES" 173 | exit -1 174 | fi 175 | 176 | calc_sizes 177 | 178 | create_fixed 179 | 180 | calc_decimal_bits 181 | 182 | create_entrydata 183 | 184 | assign 185 | 186 | echo "dana:$freenum,$input_size,$output_size,$decimal_bits,$DANA_DEV.net -> $net" > $cache/entry${freenum}.info 187 | cat $cache/entry${freenum}.info 188 | -------------------------------------------------------------------------------- /tools/scripts/debug-table.awk: -------------------------------------------------------------------------------- 1 | BEGIN { block = "" } 2 | { if (/DEBUG/) { 3 | line = gensub(/^\[DEBUG\] */, "", "g", $0) 4 | if (substr (line, 0, 1) == ",") { 5 | block = block "\\n" line 6 | } else { 7 | if (block != "") { system("echo -e \"" block "\" | column -s, -t -o\" \"") } 8 | block = line 9 | } 10 | } 11 | } 12 | END { 13 | if (block != "") { 14 | system("echo -e \"" block "\" | column -s, -t -o\" \"") 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /tools/scripts/fann-change-fixed-point: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | sub usage { 7 | my $usage = <<'END'; 8 | Usage: fann-change-fixed-point net binary-point 9 | Changes the binary point of an existing FANN configuration. 10 | END 11 | print $usage; 12 | } 13 | 14 | if ($#ARGV != 1) { 15 | usage() and die "[ERROR] Wrong number of inputs"; 16 | } 17 | 18 | my $file_net = $ARGV[0]; 19 | my $binary_point = $ARGV[1]; 20 | 21 | open FILE_IN, "<$file_net" or die "[ERROR] unable to open <$file_net"; 22 | 23 | my $old_binary_point = -1; 24 | my $multiplier = 0; 25 | while () { 26 | if ($_ =~ /^decimal_point=(\d+)/) { 27 | $old_binary_point = $1; 28 | $multiplier = 2**($binary_point-$old_binary_point); 29 | print "decimal_point=$binary_point\n"; 30 | next; 31 | } 32 | 33 | if ($_ =~ /^(neurons.+=)(.+)$/) { 34 | my $new_neurons = $2; 35 | print "$1"; 36 | $new_neurons =~ 37 | s/\((\d+), (\d+), (\d+)\)/"($1, $2, ".int($3*$multiplier).")"/eg; 38 | print "$new_neurons\n"; 39 | next; 40 | } 41 | 42 | if ($_ =~ /^(connections.+=)(.+)/) { 43 | my $new_connections = $2; 44 | print "$1"; 45 | $new_connections =~ 46 | s/\((\d+), (-?\d+)\)/"($1, ".int($2*$multiplier).")"/eg; 47 | print "$new_connections\n"; 48 | next; 49 | } 50 | 51 | print $_; 52 | } 53 | 54 | close FILE_IN; 55 | -------------------------------------------------------------------------------- /tools/scripts/fann-config-mr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Getopt::Long; 6 | 7 | sub usage { 8 | print << "END" 9 | Usage: fann-config-mr [OPTIONS] 10 | Create an equivalent FANN configuration file with a specified amount of modular 11 | redundancy 12 | 13 | Required Options: 14 | -i, --input input FANN configuration 15 | -r, --modular-redundancy the specified amount of modular redundancy 16 | END 17 | } 18 | 19 | my ($config_input, 20 | $mr); 21 | GetOptions ( "i|input=s" => \$config_input, 22 | "r|modular-redundancy=i" => \$mr) 23 | or usage() and die "Bad options"; 24 | 25 | usage() and die "Unspecified required options" if 26 | not $config_input or not $mr; 27 | 28 | open FILE, "<$config_input" or die "Unable to open <$config_input"; 29 | 30 | my (@layers, @layers_new, 31 | @neurons, @neurons_new, 32 | @connections, @connections_new, 33 | $total_layers); 34 | while(my $line = ) { 35 | # Convert the layer sizes line 36 | if ($line =~ /^layer_sizes=(.+)$/) { 37 | @layers = split(/ /, $1); 38 | next; 39 | } 40 | # Convert the neurons line 41 | if ($line =~ /^neurons.+=(.+)$/) { 42 | @neurons = split(/(?<=\)) /, $1); 43 | next; 44 | } 45 | # Convert the connections line 46 | if ($line =~ /^connections.+=(.+)$/) { 47 | @connections = split(/(?<=\)) /, $1); 48 | next 49 | } 50 | print $line; 51 | } 52 | 53 | # Take care of the layers 54 | print "layer_sizes="; 55 | my $count_layer = 0; 56 | foreach (@layers) { 57 | # If we're in the first or last layer, then nothing changes 58 | if ($count_layer == 0 or $count_layer == $#layers) { 59 | print "$_ "; 60 | push @layers_new, $_; 61 | } 62 | # Hidden neurons are all replicated $mr times 63 | else { 64 | print ((($_ - 1) * $mr + 1)." "); 65 | push @layers_new, ($_ - 1) * $mr + 1; 66 | } 67 | $count_layer++; 68 | } 69 | print "\n"; 70 | $total_layers = $count_layer; 71 | 72 | # Take care of the neurons 73 | my ($i, $r, $p, $j, $k, $count_neuron); 74 | $count_neuron = 0; 75 | my $neuron_offset = 0; 76 | my $conn_offset = 0; 77 | my $string_neurons = 78 | "neurons (num_inputs, activation_function, activation_steepness)="; 79 | my $string_connections = 80 | "connections (connected_to_neuron, weight)="; 81 | # Loop over all the layers in the network 82 | for ($i = 0; $i <= $#layers; $i++) { 83 | # Store, in separate variables, the multipliers that we use for neurons, 84 | # inputs, and connections 85 | my $multiplier = ($i == 0 or $i == $#layers) ? 1 : $mr; 86 | my $multiplier_inputs = ($i < 2) ? 1 : $mr; 87 | # print "######################################## $multiplier, $multiplier_inputs\n"; 88 | my $num_conn_base = 0; 89 | my $num_conn = 0; 90 | # Loop over all the neurons in the network $multiplier times 91 | for ($r = 0; $r < $multiplier; $r++) { 92 | for ($j = 0; $j < $layers[$i] - 1; $j++) { 93 | # print "$i-$j-".($neuron_offset + $j).": "; 94 | $neurons[$neuron_offset + $j] =~ /\((\d+), (.+)\)/; 95 | $num_conn_base = $1; 96 | $num_conn = ($1 - 1) * $multiplier_inputs + 1; 97 | my $tmp_neuron = "($num_conn, $2)"; 98 | # print "$tmp_neuron\n"; 99 | $string_neurons .= "$tmp_neuron "; 100 | my $conn_index = 0; 101 | for ($p = 0; $p < $multiplier_inputs; $p++) { 102 | for ($k = 0; $k < $num_conn_base - 1; $k++) { 103 | $conn_index = $conn_offset + $num_conn_base * $j + $k - 1; 104 | $connections[$conn_index] =~ /\(([-\d]+), ([-\d]+)\)/; 105 | my $index = $1 + ($num_conn_base - 1) * $p; 106 | my $weight = int($2/$multiplier_inputs); 107 | $string_connections .= "($index, $weight) "; 108 | # print "($index, $weight)\n"; 109 | # print $conn_index." "; 110 | # $string_connections .= "$connections[$conn_index] "; 111 | } 112 | # $connections[$conn_index] =~ /\(([-\d]+), ([-\d]+)\)/; 113 | } 114 | $connections[$conn_index + 1] =~ /\(([-\d]+), ([-\d]+)\)/; 115 | my $index = $1 + ($num_conn_base - 1) * ($p - 1); 116 | my $weight = $2; 117 | $string_connections .= "($index, $weight) " if $k; 118 | # print (($conn_index + 1)." ") if $k; 119 | # print "\n" if $k; 120 | # $conn_offset += $num_conn; 121 | } 122 | } 123 | $conn_offset += $num_conn_base * ($j - 1) + ($k) + 1; 124 | # Handle bias 125 | # print "$i--$j--".($neuron_offset + $j)." (B): "; 126 | $string_neurons .= "$neurons[$neuron_offset + $j] "; 127 | # print "$neurons[$neuron_offset + $j]\n"; 128 | $neuron_offset += $layers[$i]; 129 | } 130 | print "$string_neurons\n"; 131 | print "$string_connections\n"; 132 | 133 | # Take care of the connections 134 | 135 | close FILE; 136 | -------------------------------------------------------------------------------- /tools/scripts/fann-data-to-fixed: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | sub usage { 7 | my $usage = <<'END'; 8 | Usage: 9 | ./fann-data-to-fixed 10 | END 11 | print $usage; 12 | } 13 | 14 | if ($#ARGV != 2) { 15 | usage(); 16 | die "[ERROR] Wrong number of inputs"; 17 | } 18 | 19 | my $file_input = $ARGV[0]; 20 | my $file_output = $ARGV[1]; 21 | my $decimal_point = $ARGV[2]; 22 | 23 | open FILE_IN, "<$file_input" or 24 | die "[ERROR] Unable to open <$file_input"; 25 | open FILE_OUT, ">$file_output" or 26 | die "[ERROR] Unable to open >$file_output"; 27 | 28 | # Ignore the first line 29 | my $first_line = ; 30 | print FILE_OUT $first_line; 31 | 32 | while () { 33 | my @inputs = split(" ", $_); 34 | foreach (@inputs) { 35 | print FILE_OUT int($_ * 2 ** $decimal_point)." "; 36 | } 37 | print FILE_OUT "\n"; 38 | } 39 | 40 | close FILE_OUT; 41 | close FILE_IN; 42 | -------------------------------------------------------------------------------- /tools/scripts/find-net: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | net=$1 4 | nets_txt=$2 5 | 6 | entry=$(grep ^$1, $2) 7 | 8 | if [ $? -eq 1 ]; then 9 | echo "[ERROR] Unable to locate $net in $nets_txt" 1>&2 10 | exit 1 11 | fi 12 | 13 | config=${entry#*,} 14 | 15 | echo $config 16 | -------------------------------------------------------------------------------- /tools/scripts/gen-boolean-data: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Getopt::Long; 6 | 7 | sub usage() { 8 | print << "END" 9 | Usage: gen-boolean-data -n [number of bits] -f [boolean function] [OPTIONS] 10 | 11 | Required parameters: 12 | -n, --num-bits Number of input bits 13 | -f, --boolean-function Boolean function to use 14 | 15 | Optional parameters: 16 | -s, --symmetric Generate symmetric output (as opposed to bipolar) 17 | --replicate-input Replicate the inputs a number of times 18 | --replicate-output Replicate the outputs a number of times 19 | 20 | Supported boolean Functions: 21 | and 22 | or 23 | xor 24 | END 25 | } 26 | 27 | my $number_of_bits; 28 | my $boolean_function; 29 | my $flag_symmetric; 30 | my $replicate_input = 1; 31 | my $replicate_output = 1; 32 | GetOptions ("n|num-bits=i" => \$number_of_bits, 33 | "f|boolean-function=s" => \$boolean_function, 34 | "s|symmetric" => \$flag_symmetric, 35 | "replicate-input=i" => \$replicate_input, 36 | "replicate-output=i" => \$replicate_output) 37 | or usage() and die "Invalid option"; 38 | 39 | usage() and die "Missing required options" if 40 | not $number_of_bits or not $boolean_function; 41 | 42 | usage() and die "Unknown boolean function $boolean_function" if 43 | not ($boolean_function eq "and" or 44 | $boolean_function eq "or" or 45 | $boolean_function eq "xor"); 46 | 47 | die "Input/output replication bust be >= 1" if 48 | ($replicate_input < 1 or $replicate_output < 1); 49 | 50 | print 2 ** $number_of_bits . " " . 51 | $number_of_bits * $replicate_input . " " . 52 | 1 * $replicate_output . "\n"; 53 | 54 | for (my $i = 0; $i < 2 ** $number_of_bits; $i++) { 55 | my $output; 56 | my $bit; 57 | for (my $k = 0; $k < $replicate_input; $k++) { 58 | my $tmp_i = $i; 59 | for (my $j = $number_of_bits - 1; $j >= 0; $j--) { 60 | if ($tmp_i - 2 ** $j >= 0) { 61 | $bit = 1; 62 | $tmp_i -= 2 ** $j; 63 | } else { 64 | $bit = 0; 65 | } 66 | $_ = $bit; s/0/-1/; 67 | print $flag_symmetric ? "$_ " : "$bit "; 68 | 69 | if ($j == $number_of_bits - 1) { 70 | $output = $bit; 71 | } else { 72 | if ($boolean_function eq "and") { 73 | $output = $output & $bit; 74 | } elsif ($boolean_function eq "or") { 75 | $output = $output | $bit; 76 | } elsif ($boolean_function eq "xor") { 77 | $output = $output ^ $bit; 78 | } 79 | } 80 | } 81 | } 82 | print "\n"; 83 | $_ = $output; s/0/-1/; 84 | for (my $k = 0; $k < $replicate_output; $k++) { 85 | print $flag_symmetric ? "$_ " : "$output "; 86 | } 87 | print "\n"; 88 | } 89 | -------------------------------------------------------------------------------- /tools/scripts/gen-random-fann-input: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Getopt::Long; 6 | 7 | sub usage() { 8 | print << "END" 9 | Usage: gen-random-fann-input -i [number inputs] -o [number outputs] \ 10 | -n [number to generate] [OPTION]... 11 | 12 | -d, --decimal-point Generate fixed-point data with `decimal-point` bits 13 | of fractional precision 14 | -i, --number-of-inputs The network has this many inputs 15 | -o, --number-of-outputs The network has this many outputs 16 | -n, --number-to-generate The number of random input/output pairs to generate 17 | END 18 | } 19 | 20 | my $decimal_point = 0; 21 | my $number_of_inputs; 22 | my $number_of_outputs; 23 | my $number_to_generate; 24 | GetOptions ("d|decimal-point=i" => \$decimal_point, 25 | "i|number-of-inputs=i" => \$number_of_inputs, 26 | "o|number-of-outputs=i" => \$number_of_outputs, 27 | "n|number-to-generate=i" => \$number_to_generate) 28 | or usage() and die "Bad options"; 29 | 30 | usage() and die "Insufficient options" if 31 | not $number_of_inputs or not $number_of_outputs or not $number_to_generate; 32 | 33 | print "$number_to_generate $number_of_inputs $number_of_outputs\n"; 34 | for (my $i = 0; $i < $number_to_generate; $i++) { 35 | for (my $j = 0; $j < $number_of_inputs; $j++) { 36 | print int(rand(2)) << $decimal_point," "; 37 | } 38 | print "\n"; 39 | for (my $j = 0; $j < $number_of_outputs; $j++) { 40 | print int(rand(2)) << $decimal_point," "; 41 | 42 | } 43 | print "\n"; 44 | } 45 | -------------------------------------------------------------------------------- /tools/scripts/gen-trace-video: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy 4 | import math 5 | import matplotlib.pyplot as plt 6 | import matplotlib.animation as animation 7 | import argparse 8 | import re 9 | 10 | def parse_arguments(): 11 | parser = argparse.ArgumentParser( 12 | description='Generate a video of how a FANN network evolves') 13 | parser.add_argument( 14 | '-t', '--train-file', dest='file_train', type=str, required=True, 15 | help='The FANN training file') 16 | parser.add_argument( 17 | '-l', '--log-file', dest='file_log', type=str, 18 | nargs='+', 19 | help='The log of outputs generated by fann-train or fann-batch') 20 | parser.add_argument( 21 | '--labels', dest='labels', type=str, 22 | nargs='+', 23 | help='Labels to use for the graph') 24 | parser.add_argument( 25 | '-s', '--save-video', dest='file_save', type=str, 26 | help='The file to save the video in') 27 | parser.add_argument( 28 | '-x', '--x-limits', dest='xLimits', type=float, default=[-1,1], 29 | nargs=2, 30 | help='The domain of the plot (X axis)') 31 | parser.add_argument( 32 | '-y', '--y-limits', dest='yLimits', type=float, default=[-1,1], 33 | nargs=2, 34 | help='The range of the plot (Y axis)') 35 | return parser.parse_args() 36 | 37 | def file_len(filename): 38 | with open(filename) as f: 39 | for i, l in enumerate(f): 40 | pass 41 | return i+1 42 | 43 | def main(): 44 | args = parse_arguments() 45 | re_num = re.compile('[-.e\d]+') 46 | re_header = re.compile('(\d+) (\d+) (\d+)') 47 | 48 | with open(args.file_train, 'r') as f: 49 | line = f.readline() 50 | m = re_header.match(line) 51 | num_items = int(m.group(1)) 52 | num_inputs = int(m.group(2)) 53 | num_outputs = int(m.group(3)) 54 | inputs = numpy.zeros((num_items, num_inputs)) 55 | outputs_correct = numpy.zeros((num_items, num_outputs)) 56 | for i in range(num_items): 57 | line = f.readline() 58 | m = re_num.findall(line) 59 | for x in range(len(m)): 60 | inputs[i][x] = float(m[x]) 61 | line = f.readline() 62 | m = re_num.findall(line) 63 | for x in range(len(m)): 64 | outputs_correct[i][x] = float(m[x]) 65 | 66 | num_batches = int(file_len(args.file_log[0]) / num_items / num_outputs) 67 | 68 | print(args.file_log) 69 | num_logs = len(args.file_log) 70 | outputs = numpy.zeros((num_logs, num_batches, num_items, num_outputs)) 71 | 72 | for log in range(num_logs): 73 | with open(args.file_log[log], 'r') as f: 74 | for batch in range(num_batches): 75 | for item in range(num_items): 76 | line = f.readline() 77 | m = re_num.findall(line) 78 | for output in range(len(m)): 79 | outputs[log][batch][item][output] = float(m[output]) 80 | 81 | fig = plt.figure() 82 | ax = plt.axes(xlim=(args.xLimits[0], args.xLimits[1]), 83 | ylim=(args.yLimits[0], args.yLimits[1])) 84 | 85 | line_nn = [ax.plot(0, 0) for i in range(num_logs)] 86 | 87 | x, y_correct = zip(*sorted(zip(inputs, outputs_correct))) 88 | line_orig, = ax.plot(x, y_correct) 89 | line_orig.set_label('Correct') 90 | 91 | for log in range(num_logs): 92 | x, y = zip(*sorted(zip(inputs, outputs[log][0]))) 93 | line_nn[log], = ax.plot(x, y, '.') 94 | if args.labels is not None: 95 | line_nn[log].set_label(args.labels[log]) 96 | else: 97 | line_nn[log].set_label(args.file_log[log]) 98 | plt.legend(loc='best') 99 | 100 | def update(i): 101 | for log in range(num_logs): 102 | x, y = zip(*sorted(zip(inputs, outputs[log][i]))) 103 | line_nn[log].set_data(x, y) 104 | plt.title("Epoch {0}".format(i)) 105 | return line_nn, 106 | 107 | def init(): 108 | for log in range(num_logs): 109 | line_nn[log].set_data([], []) 110 | return line_nn, 111 | 112 | ani = animation.FuncAnimation(fig, update, init_func=init, 113 | frames=len(outputs[0]), interval=1) 114 | if (args.file_save is not None): 115 | ani.save(args.file_save, fps=30, extra_args=['-vcodec', 'libx264']) 116 | else: 117 | plt.show() 118 | 119 | if __name__ == '__main__': 120 | main() 121 | -------------------------------------------------------------------------------- /tools/scripts/gen-video-soft: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is intended to be run from the FPGA 4 | 5 | DIR_ROOT=/home/root 6 | DIR_BUILD=/home/se/research_local/rocket-chip/xfiles-dana/build 7 | DIR_NETS=$DIR_BUILD/nets 8 | DIR_VIDEO=$DIR_BUILD/video/soft 9 | 10 | MAX_EPOCHS=1000 11 | MSE_REPORTING=100 12 | 13 | FANN_SOFT=$DIR_BUILD/../usr/bin/fann-train 14 | 15 | TRAIN_SIN=(sin-scale-0.25 sin-scale-0.50 sin-scale-0.75 sin-scale-1.00 \ 16 | sin-scale-1.25 sin-scale-1.50 sin-scale-1.75 sin-scale-2.00 \ 17 | sin-scale-2.25 sin-scale-2.50 sin-scale-2.75 sin-scale-3.00 \ 18 | sin-scale-3.25 sin-scale-3.50 sin-scale-3.75 sin-scale-4.00) 19 | 20 | # TRAIN_SIN=(sin-scale-0.25 sin-scale-0.50) 21 | 22 | for net in ${TRAIN_SIN[*]}; do 23 | $FESVR $PK $FANN_SOFT \ 24 | -n $DIR_NETS/sin-float.net \ 25 | -t $DIR_NETS/$net-float.train \ 26 | -e$MAX_EPOCHS \ 27 | -z \ 28 | -m$MSE_REPORTING \ 29 | -b$DIR_VIDEO/$net-gd.dat 30 | done 31 | 32 | for net in ${TRAIN_SIN[*]}; do 33 | $FESVR $PK $FANN_SOFT \ 34 | -n $DIR_NETS/sin-float.net \ 35 | -t $DIR_NETS/$net-float.train \ 36 | -e$MAX_EPOCHS \ 37 | -z \ 38 | -x \ 39 | -m$MSE_REPORTING \ 40 | -b$DIR_VIDEO/$net-sgd.dat 41 | done 42 | -------------------------------------------------------------------------------- /tools/scripts/generate_test_mem.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import subprocess 4 | import argparse 5 | import os 6 | import sys 7 | 8 | this_dir = os.path.dirname(os.path.realpath(__file__)) 9 | path_generate_ant = this_dir + '/generate-ant' 10 | path_fann_eval_fixed = this_dir + '/../bin/fann-eval-fixed' 11 | 12 | def parse_arguments(): 13 | parser = argparse.ArgumentParser( 14 | description='Generate ANT headers', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument( 17 | '-n', '--net', type=str, 18 | required=True, 19 | help="Fixed-point FANN network base name (no suffix)") 20 | parser.add_argument( 21 | 'output', type=str, 22 | nargs='?', 23 | help='Output file') 24 | parser.add_argument( 25 | '--asid', type=int, help="The ASID to use", default=1) 26 | return parser.parse_args() 27 | 28 | def twos_complement(val, width): 29 | format_string = " .word 0x{0:0" + str(width >> 2) + "x}" 30 | if (val & (1 << (width - 1))) != 0: 31 | return format_string.format(int(((-1 * val) ^ 0xffffffff) + 1)) 32 | else: 33 | return format_string.format(val) 34 | 35 | def write_ant_file(args): 36 | net_file_path = os.path.join(args.net + '.net') 37 | train_file_path = os.path.join(args.net + '.train') 38 | ant_file_path = os.path.join(args.net + '.ant.h') 39 | sixteen_bin_file_path = os.path.join(args.net + '.16bin') 40 | # If the data_in file is not available or the ant file can't be written, 41 | # then abort 42 | try: 43 | data_in_file = open(train_file_path) 44 | except FileNotFoundError: 45 | print("[ERROR] Unable to open {} (for reading)".format(train_file_path)) 46 | return 47 | 48 | if (args.output): 49 | try: 50 | ant_file = open(args.output, 'w') 51 | except FileNotFoundError: 52 | print("[ERROR] Unable to open {} (for writing)".format(ant_file_path)) 53 | return 54 | else: 55 | ant_file = sys.stdout 56 | 57 | with open(train_file_path, 'rb') as train_file: 58 | num_datapoints, num_inputs, num_outputs = [int(s) for s in train_file.readline()[:-1].decode('utf-8').split(" ")] 59 | 60 | # Collect input data 61 | data_in_list = [] 62 | # Advance file pointer 63 | data_in_file.readline() 64 | for line_num, line in enumerate(data_in_file): 65 | if line_num % 2 == 0: 66 | # Inputs 67 | data_in_list.extend([int(x) for x in line.split()]) 68 | else: 69 | # Outputs 70 | continue 71 | 72 | # Collect expected outs 73 | fann_eval_fixed_output = subprocess.Popen([path_fann_eval_fixed, '--verbose', '-n', net_file_path, '-t', train_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0] 74 | expected_out_list = [] 75 | for line in fann_eval_fixed_output.splitlines(): 76 | expected_out_list.extend(line.split(b'->')[-1].split()) 77 | 78 | ant_file.write('''#define NUM_DATAPOINTS {} 79 | #define NUM_INPUTS {} 80 | #define NUM_OUTPUTS {} 81 | 82 | #define DANA_TEST_DATA \\ 83 | '''.format(num_datapoints, num_inputs, num_outputs)) 84 | 85 | # Append data_in 86 | ant_file.write('data_in:; \\\n') 87 | for v in [twos_complement(i, 32) for i in data_in_list]: 88 | ant_file.write(v + '; \\\n') 89 | # Create blank data_out region for each expected out 90 | ant_file.write('data_out:; \\\n') 91 | for _ in range(len(expected_out_list)): 92 | ant_file.write(' .word 0x00000000; \\\n') 93 | # Append data_expected 94 | ant_file.write('data_expected:;') 95 | for v in expected_out_list: 96 | ant_file.write(' \\\n .word 0x' + v.decode('utf-8') + ";") 97 | ant_file.write('\n\n') 98 | 99 | # Append ANT region 100 | ant_region = subprocess.Popen([path_generate_ant, '-a', str(args.asid) + ',' + sixteen_bin_file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0] 101 | ant_file.write('#define DANA_ANT_DATA ') 102 | for line in ant_region.splitlines(): 103 | if line.decode("utf-8")[0] == '.': 104 | ant_file.write('; \\\n ' + line.decode('utf-8')) 105 | else: 106 | ant_file.write('; \\\n' + line.decode('utf-8')) 107 | ant_file.write('\n') 108 | 109 | ant_file.close() 110 | 111 | if __name__ == "__main__": 112 | args = parse_arguments() 113 | write_ant_file(args) 114 | -------------------------------------------------------------------------------- /tools/scripts/instrument_dpi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Getopt::Long; 5 | 6 | sub usage() { 7 | print << "END" 8 | Usage: instrument-dpi -m [module] -s [signal] [file] 9 | END 10 | } 11 | 12 | my ($opt_module, 13 | $opt_signal, 14 | $opt_help, 15 | $opt_file); 16 | GetOptions("m|module=s" => \$opt_module, 17 | "s|signal=s" => \$opt_signal, 18 | "h|?|help" => \$opt_help) 19 | or usage() and die "[ERROR] Bad option"; 20 | 21 | usage() and exit if $opt_help; 22 | usage() and die "[ERROR] Mising required option" if not $opt_module or not $opt_signal; 23 | usage() and die "[ERROR] File not specified" if $#ARGV != 0; 24 | 25 | $opt_file = $ARGV[0]; 26 | open (my $fh, "<", $opt_file) or die "[ERROR] Unable to open $opt_file"; 27 | 28 | my $instrument_dpi_readmemh = << "END"; 29 | function dpi_readmemh; 30 | input string file; 31 | \$display("[INFO] Loading memory from file:"); 32 | \$display("[INFO] %s", file); 33 | \$readmemh(file, $opt_signal); 34 | \$display("[INFO] Done!"); 35 | endfunction 36 | export "DPI-C" function dpi_readmemh; 37 | import "DPI-C" context function void dpi_dummy(); 38 | initial dpi_dummy(); 39 | END 40 | 41 | my $found_it; 42 | while (<$fh>) { 43 | print $_; 44 | if ($_ =~ m/^module $opt_module\(/) { $found_it++; } 45 | next if not $found_it; 46 | 47 | if ($_ =~ m/ $opt_signal[ ;]/) { 48 | print $instrument_dpi_readmemh; 49 | $found_it = 0; 50 | } 51 | } 52 | 53 | close $fh; 54 | -------------------------------------------------------------------------------- /tools/scripts/instrument_vpi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Getopt::Long; 5 | 6 | sub usage() { 7 | print << "END" 8 | Usage: instrument-dpi -m [module] [file] 9 | Make all the IO of a Verilog file public via Verilator pragmas 10 | END 11 | } 12 | 13 | my ($opt_module, 14 | $opt_help, 15 | $opt_file); 16 | GetOptions("m|module=s" => \$opt_module, 17 | "h|?|help" => \$opt_help) 18 | or usage() and die "[ERROR] Bad option"; 19 | 20 | usage() and exit if $opt_help; 21 | usage() and die "[ERROR] Mising required option" if not $opt_module; 22 | usage() and die "[ERROR] File not specified" if $#ARGV != 0; 23 | 24 | $opt_file = $ARGV[0]; 25 | open (my $fh, "<", $opt_file) or die "[ERROR] Unable to open $opt_file"; 26 | 27 | my $found_it; 28 | while (<$fh>) { 29 | if ($_ =~ m/^module $opt_module\(/) { print $_ and $found_it++; } 30 | print $_ and next if not $found_it; 31 | 32 | if ($_ =~ m/^(\s*(input|output).*?\w+)(,?)$/) { 33 | chomp($_); 34 | print "$1 /*verilator public*/$3\n"; 35 | next; 36 | } 37 | if ($_ =~ m/;/) { 38 | print $_; 39 | $found_it = 0; 40 | } 41 | } 42 | 43 | close $fh; 44 | -------------------------------------------------------------------------------- /tools/scripts/max-processors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Figure out how many processors are in the system 4 | NUM_CPUS=`cat /proc/cpuinfo | \ 5 | grep processor | \ 6 | tail -n1 | \ 7 | awk '{print $3}' | \ 8 | xargs -IX echo "X 1+p" | \ 9 | dc` 10 | 11 | # Find the worst load that we've seen over the past 3 reported load 12 | # average intevals 13 | WORST_LOAD=`cat /proc/loadavg | \ 14 | awk '{print $1"\n"$2"\n"$3}' | \ 15 | sort -n | \ 16 | head -n1` 17 | 18 | # Grab the number of idle CPUs that we see, and, if the machine is 19 | # heavily loaded, just grab one 20 | COMMANDEERED_CPUS=`echo "$NUM_CPUS $WORST_LOAD-p" | dc | sed 's/\..\+$//'` 21 | if [[ $COMMANDEERED_CPUS -lt 1 ]]; then 22 | COMMANDEERED_CPUS=1; 23 | fi 24 | echo $COMMANDEERED_CPUS 25 | -------------------------------------------------------------------------------- /tools/scripts/parse-af: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | sub usage() { 4 | print <) { 17 | if ($_ !~ m/^(\d) (\d) ([-\d]+) ([-?\d]+)/) { 18 | next; 19 | } 20 | $y{$3}{$1}{$2} = $4; 21 | } 22 | 23 | close FILE; 24 | 25 | print "x "; 26 | foreach my $x (sort {$a <=> $b} keys %y) { 27 | foreach my $steepness (sort {$a <=> $b} keys %{$y{$x}}) { 28 | foreach my $af (sort {$a <=> $b} keys %{$y{$x}{$steepness}}) { 29 | print "af-$steepness-$af " 30 | } 31 | } 32 | print "\n"; 33 | last; 34 | } 35 | 36 | foreach my $x (sort {$a <=> $b} keys %y) { 37 | print $x." "; 38 | foreach my $steepness (sort {$a <=> $b} keys %{$y{$x}}) { 39 | foreach my $af (sort {$a <=> $b} keys %{$y{$x}{$steepness}}) { 40 | print $y{$x}{$steepness}{$af}." "; 41 | } 42 | } 43 | print "\n"; 44 | } 45 | -------------------------------------------------------------------------------- /tools/scripts/parse-data-python: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Completely generic data parser that adheres to a specific output log 4 | # format. All lines that we care about look like: 5 | # 6 | # |------------------------------------- start of line, i.e., will match m/^/ 7 | # | |-------------------------------- independent variable label and value 8 | # | | |--------------------------- first dependent variable 9 | # | | | |---------------------- second dependent variable 10 | # | | | | ... 11 | # | | | | |------------- last dependent variable 12 | # | v v v v 13 | # v |-| |--| |--| |--| 14 | # [STAT] x 0 y1 1 y2 2 ... yn n 15 | # 16 | # Variable labels are camelcase words that can include numbers. 17 | # Variable values are signed floats. 18 | 19 | import argparse 20 | import re 21 | import numpy 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('-k', '--key') 26 | parser.add_argument('-l', '--label') 27 | parser.add_argument('-v', dest='verbose', action='store_true') 28 | parser.add_argument('file_in', action='store') 29 | args = parser.parse_args() 30 | 31 | f = open(args.file_in, 'r') 32 | line_stat = re.compile('^\[STAT\]') 33 | 34 | array = numpy.array([]) 35 | for line in f: 36 | if not line_stat.match(line): 37 | continue 38 | 39 | 40 | f.close() 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /tools/scripts/regression.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -xe 2 | 3 | # Script to handle regression testing of xfiles-dana. This script is 4 | # intended to work in concert with `rocket-chip-setup.sh` which deals 5 | # with grabbing all the submodules of the current rocket-chip master 6 | # and building the RISC-V toolchain. The directory structure will look 7 | # like: 8 | # 9 | # /home/jenkins/ 10 | # |--> jobs/ 11 | # |--> rocket-chip/workspace/ 12 | # | |--> riscv/ 13 | # | |--> rocket-chip/ 14 | # | |--> xfiles-dana 15 | # |--> xfiles-dana/workspace/ 16 | # |--> [EMPTY] 17 | # 18 | # The workspace of this build is _technically_ in 19 | # xfiles-dana/workspace, but all the actual work will be done in the 20 | # workspace of the rocket-chip build, rocket-chip/workspace. Hence, we 21 | # need to deal with everything being relative to that directory. This 22 | # script, however, will be called _after_ a Jenkins moves us to the 23 | # rocket-chip/workspace/xfiles-dana directory. 24 | 25 | # Define a relative path to the RISC-V Toolchain 26 | DIR_RISCV=../../riscv 27 | 28 | # Setup the RISCV environment variable and add its binary directory to 29 | # the path. 30 | export RISCV=`readlink -f $DIR_RISCV` 31 | echo RISCV ENV VAR is $RISCV 32 | export PATH=$PATH:$RISCV/bin 33 | echo PATH is $PATH 34 | 35 | # Jenkins will recursively update xfiles-dana submodules, so we 36 | # shouldn't have to do any setup there. Just create the symlinks 37 | # inside rocket-chip (assuming they don't already exist), run the 38 | # normal `make rv` target to see if anything related to generating NNs 39 | # or libraries is broken, and then run the regression tests. 40 | ./install-symlinks 41 | make rv 42 | cd tests 43 | ./regress.sh 44 | -------------------------------------------------------------------------------- /tools/scripts/rocket-chip-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -xe 2 | 3 | # Setup script, intended to be used by Jenkins, that will build the 4 | # RISC-V toolchain specified by the commit ID of the riscv-tools 5 | # submodule. The directory structure used is as follows: 6 | # 7 | # /home/jenkins/ 8 | # |--> jobs/ 9 | # |--> rocket-chip/workspace/ 10 | # |--> riscv/ 11 | # |--> rocket-chip/ 12 | # 13 | # Jenkins uses a non-standard convention of cloning rocket-chip in a 14 | # rocket-chip subdirectory of the workspace. This then allows us to 15 | # define our own riscv directory where the toolchain will be built. 16 | 17 | # Create the riscv directory if it doesn't already exist and setup the 18 | # environment variables that we care about. 19 | mkdir -p ../riscv 20 | export RISCV=`readlink -f ../riscv` 21 | echo RISCV ENV VAR is $RISCV 22 | export PATH=$PATH:$RISCV/bin 23 | echo PATH is $PATH 24 | 25 | # Update the riscv-tools submodule. We're more careful about which 26 | # submodules we update here (as opposed to getting Jenkins to do it) 27 | # because we don't want to update everything (i.e., we don't need the 28 | # fpga-zynq images which take a while to get). We then set the number 29 | # of parllel jobs to something sane (the output of the 30 | # `max-processors.sh` script) so that we don't wind up using the 31 | # default value of 16. 32 | git submodule update --init --recursive riscv-tools 33 | cd riscv-tools 34 | ../xfiles-dana/usr/bin/max-processors.sh | \ 35 | xargs -IX sh -c "sed -i 's/JOBS=\([0-9]\+\)/JOBS=X/' build.common" 36 | 37 | # The proxy kernel needs to be patched to enable our special 38 | # supervisor systemcalls that set the ASID and ASID--NNID Table 39 | # pointer. 40 | cd riscv-pk 41 | git checkout . 42 | git apply ../../xfiles-dana/patches/riscv-pk-xfiles-syscalls.patch 43 | cd .. 44 | 45 | # Build the toolchain. 46 | ./build.sh; 47 | 48 | # The script will barf if we try to doubly patch the proxy kernel, so 49 | # we remove the patch in prepartion for a subsequent build. 50 | cd riscv-pk 51 | git apply -R ../../xfiles-dana/patches/riscv-pk-xfiles-syscalls.patch 52 | cd ../.. 53 | 54 | # Update only the submodules in the top level (e.g., uncore, rocket, 55 | # hardfloat) and then dump out their status so that we can see if 56 | # anything is acting weirdly in the logs. 57 | git submodule update --init 58 | git submodule status --recursive 59 | -------------------------------------------------------------------------------- /tools/scripts/rv-load-fpga: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # User configuration 4 | dir=/opt/etc 5 | dir_zedboard=/mnt/kd/se/research_local/rocket-chip/fpga-zynq/zedboard/fpga-images-zedboard 6 | dir_zedboard_local=/home/se/research_local/rocket-chip/fpga-zynq/zedboard/fpga-images-zedboard 7 | 8 | # Fixed configuration 9 | ssh_key=$dir/fpga-ssh 10 | username=root 11 | 12 | # FPGA info is also in info.txt in a line beginning with "fpga" 13 | fpgas=($(grep ^fpga $dir/info.txt)) 14 | 15 | for fpga in "${fpgas[@]}"; do 16 | name=${fpga%%,*} 17 | ssh -i $ssh_key $username@$name \ 18 | "cp /mnt/boot/boot.bin /mnt/boot/boot.bin.bak" 19 | scp -i $ssh_key $dir_zedboard_local/boot.bin \ 20 | $username@$name:/mnt/boot/boot.bin 21 | checksum_fpga=$(ssh -i $ssh_key $username@$name \ 22 | md5sum /mnt/boot/boot.bin | awk '{print $1}') 23 | checksum_local=$(ssh -i $ssh_key $username@$name \ 24 | md5sum $dir_zedboard/boot.bin | awk '{print $1}') 25 | if [ $checksum_fpga != $checksum_local ]; then 26 | echo "[ERROR] Checksums after copying do not match! Reverting..." 27 | # ssh -i $ssh_key $username@$name \ 28 | # "cp /mnt/boot/boot.bin.bak /mnt/boot/boot.bin" 29 | exit 1 30 | fi 31 | echo "[INFO] Updated boot.bin of $name" 32 | done 33 | -------------------------------------------------------------------------------- /tools/scripts/rvcon: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # User configuration 4 | dir=/opt/etc 5 | 6 | show_help() { 7 | cat << EOF 8 | Usage: $0 [OPTION]... [FPGA (optional)] 9 | Connect to a free FPGA or to a specific FPGA 10 | 11 | -h display this help text 12 | 13 | Example usage: 14 | Grab any free FPGA 15 | $0 16 | Grab a specific FPGA 17 | $0 fpga0 18 | EOF 19 | } 20 | 21 | # Check that the user can actually access the TTY 22 | if [ `groups | grep -c dialout` -eq 0 ]; then 23 | echo "[ERROR] User needs to be in group \"dialout\"" 24 | exit 1 25 | fi 26 | 27 | if [ -e $dir/fpga-motd ]; then 28 | cat $dir/fpga-motd 29 | fi 30 | 31 | OPTIND=1 32 | while getopts "hf" opt; do 33 | case "$opt" in 34 | h) 35 | show_help 36 | exit 0 37 | ;; 38 | esac 39 | done 40 | 41 | shift "$((OPTIND-1))" 42 | 43 | if [ "$#" -gt 1 ]; then 44 | echo "[ERROR] Too many arguments" 45 | show_help 46 | exit 1 47 | fi 48 | 49 | # Connect to a specific FPGA 50 | if [ "$#" -eq 1 ]; then 51 | name=$1 52 | fpga=($(grep ^$name $dir/info.txt)) 53 | if [ $? -eq 1 ]; then 54 | echo "[ERROR] Unable to find FPGA \"$name\" in $dir/info.txt" 55 | exit 1 56 | fi 57 | tty=${fpga#*,} 58 | tty=${tty%%,*} 59 | ps -A au | grep -i screen | grep $tty 2>&1 > /dev/null 60 | if [[ $? -eq 1 ]]; then 61 | exec screen $tty 115200,cs8,-parenb,-cstopb 62 | exit 0 63 | else 64 | echo "[ERROR] $name in use. Check ownership with rvstatus/rvwho." 65 | exit 1 66 | fi 67 | # Grab the next available FPGA 68 | else 69 | fpgas=($(grep ^fpga $dir/info.txt)) 70 | for fpga in "${fpgas[@]}"; do 71 | name=${fpga%%,*} 72 | tty=${fpga#*,} 73 | tty=${tty%%,*} 74 | # Check to see if we get a hit for that tty being in use 75 | ps -A au | grep -i screen | grep $tty 2>&1 > /dev/null 76 | if [[ $? -eq 1 ]]; then 77 | exec screen $tty 115200,cs8,-parenb,-cstopb 78 | exit 0 79 | fi 80 | done 81 | fi 82 | 83 | echo "[ERROR] All FPGAs in use. Check ownership with rvstatus/rvwho." 84 | exit 1 85 | -------------------------------------------------------------------------------- /tools/scripts/rvreboot: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # User configuration 4 | dir=/opt/etc 5 | 6 | show_help() { 7 | cat << EOF 8 | Usage: $0 [OPTION]... [FPGA] 9 | Reboot one of the attached FPGAs. You can normally only reboot an FPGA that 10 | you have an rvcon lock on or is not in use. You can override this with the 11 | force option 12 | 13 | -f forcibly reboot an FPGA in use by another user 14 | -h display this help text 15 | 16 | Example usage: 17 | $0 fpga0 18 | EOF 19 | } 20 | 21 | force=0 22 | OPTIND=1 23 | while getopts "hf" opt; do 24 | case "$opt" in 25 | h) 26 | show_help 27 | exit 0 28 | ;; 29 | f) 30 | force=1 31 | ;; 32 | esac 33 | done 34 | 35 | shift "$((OPTIND-1))" 36 | 37 | # Die if the user didn't specify what FPGA to reset 38 | if [ "$#" -ne 1 ]; then 39 | echo "[ERROR] Missing command line argument" 40 | show_help 41 | exit 1 42 | fi 43 | fpga=$1 44 | 45 | # Lookup the FPGA settings in the info.txt file, barfing if we can't find it 46 | fpga_info=($(grep ^$fpga, $dir/info.txt)) 47 | if [ $? -eq 1 ]; then 48 | echo "[ERROR] Unable to find FPGA \"$fpga\" in $dir/info.txt" 49 | exit 1 50 | fi 51 | 52 | # Parse info.txt to get all the info we need to reset this FPGA, 53 | # including any user that may have an rvcon lock on it. 54 | tty=${fpga_info#*,} 55 | tty=${tty%%,*} 56 | power_strip=${fpga_info#*,*,} 57 | power_strip=${power_strip%%,*} 58 | plug=${fpga_info##*,} 59 | 60 | if [ ! $plug ]; then 61 | echo "[ERROR] No plug for \"$fpga\" in $dir/info.txt" 62 | echo " This is expected if this is a PCIe board..." 63 | exit 1 64 | fi 65 | 66 | ip=$(grep ip $dir/info.txt | grep ,$power_strip, | awk -F',' '{print $3}') 67 | user=$(ps -A au | grep -i screen | grep $tty | tail -n1 | tail -n1 | \ 68 | awk '{print $1}') 69 | 70 | # Barf if a user is using the FPGA that is not us and we didn't tell 71 | # it to force the reboot. 72 | if [ $user ] && [ $user != $USER ] && [ $force -eq 0 ]; then 73 | echo "[ERROR] Cannot reboot because $fpga is in use by \"$user\"" 74 | exit 1 75 | fi 76 | 77 | if [ -e $dir/fpga-motd ]; then 78 | cat $dir/fpga-motd 79 | fi 80 | 81 | # Get the credentials_apc from info.txt that we need to login to the 82 | # powerstrip 83 | credentials_apc=$(grep ^credentials-apc $dir/info.txt) 84 | credentials_apc=${credentials_apc#*,} 85 | username_apc=${credentials_apc%,*} 86 | password_apc=${credentials_apc#*,} 87 | 88 | # Reboot the FPGA 89 | echo exec fence_apc --action=reboot --ip=$ip --plug=$plug \ 90 | --username=$username_apc --password=$password_apc 91 | -------------------------------------------------------------------------------- /tools/scripts/rvstatus: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # User configuration 4 | dir=/opt/etc 5 | 6 | # Fixed configuration 7 | ssh_key=$dir/fpga-ssh 8 | username=root 9 | 10 | if [ -e $dir/fpga-motd ]; then 11 | cat $dir/fpga-motd 12 | fi 13 | 14 | # The APC username and password are stored in a line beginning with 15 | # "credentials" in info.txt. 16 | credentials_apc=$(grep ^credentials-apc $dir/info.txt) 17 | credentials_apc=${credentials_apc#*,} 18 | username_apc=${credentials_apc%,*} 19 | password_apc=${credentials_apc#*,} 20 | 21 | # FPGA info is also in info.txt in a line beginning with "fpga" 22 | fpgas=($(grep ^fpga $dir/info.txt)) 23 | 24 | awk 'BEGIN {printf "%-5s %-6s %-32s %-10s\n", 25 | "FPGA", "STATUS", "CONFIG=md5(boot.bin)", "USER"}' 26 | for fpga in "${fpgas[@]}"; do 27 | name=${fpga%%,*} 28 | tty=${fpga#*,} 29 | tty=${tty%%,*} 30 | power_strip=${fpga#*,*,} 31 | power_strip=${power_strip%%,*} 32 | plug=${fpga##*,} 33 | 34 | if [ ! $plug ]; then 35 | status="ON" 36 | else 37 | ip=$(grep ip $dir/info.txt | grep ,$power_strip, | \ 38 | awk -F',' '{print $3}') 39 | status=$(exec fence_apc --action=status --ip=$ip --plug=$plug \ 40 | --username=$username_apc --password=$password_apc | \ 41 | awk '{print $2}') 42 | fi 43 | 44 | if [ $status = "ON" ]; then 45 | user=$(ps -A au | grep -i screen | grep $tty | tail -n1 | tail -n1 | \ 46 | awk '{print $1}') 47 | # config=$(ssh -i $ssh_key -o ConnectTimeout=1 $username@$name \ 48 | # md5sum /mnt/boot/boot.bin 2>/dev/null | awk '{print $1}') 49 | config=$(ssh -i $ssh_key -o ConnectTimeout=1 $username@$name \ 50 | "md5sum /mnt/boot/boot.bin" 2>/dev/null) 51 | return=$? 52 | if [ $return -ne 0 ]; then 53 | config="ssh-error-$return" 54 | else 55 | config=$(echo $config | awk '{print $1}') 56 | fi 57 | else 58 | config="fpga off no info" 59 | fi 60 | echo "$name $status $config $user" | \ 61 | awk '{printf "%-5s %-6s %-32s %-10s\n", $1, $2, $3, $4}' 62 | done 63 | -------------------------------------------------------------------------------- /tools/scripts/rvwho: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # User configuration 4 | dir=/opt/etc 5 | 6 | if [ -e $dir/fpga-motd ]; then 7 | cat $dir/fpga-motd 8 | fi 9 | 10 | # Get a comma delimited list, one entry per line, of all the screen 11 | # sessions running and tty that they're using. 12 | owners=($(ps -A au | grep -i screen | grep tty | awk '{print $1","$2","$12}')) 13 | 14 | # Loop over the owners and print them out. I could just do this with 15 | # the line above, but I'd like to have the opportunity to do some 16 | # additional processing which this loop can eventually be used for. 17 | awk 'BEGIN {printf "%-5s %-10s %-5s %-20s\n", \ 18 | "PID", "USER", "FPGA", "DEVICE"}' 19 | for owner in "${owners[@]}"; do 20 | device=${owner#*,*,} 21 | fpga=$(grep $device $dir/info.txt) 22 | fpga=${fpga%,/*} 23 | echo "$owner,$fpga" | awk -F',' '{printf "%-5s %-10s %-5s %-20s\n", \ 24 | $2, $1, $4, $3}' 25 | done 26 | -------------------------------------------------------------------------------- /tools/scripts/travis-before-install: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | echo "RISCV: $RISCV" 4 | echo "ROCKETCHIP: $ROCKETCHIP" 5 | echo "XFILES: $XFILES" 6 | echo "CPP_CONFIG: $CPP_CONFIG" 7 | 8 | # rocket-chip repo setup 9 | git clone --depth 1 --branch xfiles-dana https://github.com/ucb-bar/rocket-chip $ROCKETCHIP 10 | cd $ROCKETCHIP 11 | rm .travis.yml 12 | git submodule update --init 13 | 14 | # riscv-tools setup 15 | cd $ROCKETCHIP/riscv-tools 16 | rm .travis.yml 17 | git submodule update --init --recursive riscv-gnu-toolchain 18 | git submodule update --init --recursive riscv-isa-sim 19 | git submodule update --init --recursive riscv-fesvr 20 | git submodule update --init --recursive riscv-opcodes 21 | git submodule update --init --recursive riscv-pk 22 | git submodule update --init --recursive riscv-tests 23 | cd $ROCKETCHIP/riscv-tools/riscv-pk 24 | git checkout . 25 | git apply $XFILES/patches/riscv-pk-xfiles-syscalls.patch 26 | 27 | # Grab the submodules for xfiles-dana and create the symlinks 28 | cd $XFILES 29 | git submodule update --init 30 | ./install-symlinks $ROCKETCHIP 31 | -------------------------------------------------------------------------------- /tools/scripts/travis-script: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | echo "RISCV: $RISCV" 4 | echo "ROCKETCHIP: $ROCKETCHIP" 5 | echo "XFILES: $XFILES" 6 | echo "CPP_CONFIG: $CPP_CONFIG" 7 | 8 | # build emulator before RISC-V (to catch failures more quickly) 9 | # cd $ROCKETCHIP/emulator 10 | # make CONFIG=$CPP_CONFIG ROCKETCHIP_ADDONS=xfiles-dana 11 | 12 | # build riscv-tools 13 | # cd $ROCKETCHIP/riscv-tools 14 | # ./build.sh 15 | 16 | # Run regression tests 17 | # cd $XFILES 18 | # make rv 19 | cd $XFILES/tests 20 | ./regress.sh 21 | -------------------------------------------------------------------------------- /tools/src/bin-config-to-c-header.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int main (int argc, char * argv[]) { 8 | FILE * fp = NULL; 9 | void * data = NULL; 10 | long int file_size; 11 | int i, exit_code = 0; 12 | 13 | // Check that we have two input arguments 14 | if (argc != 4) { 15 | printf("Usage: %s \n", argv[0]); 16 | exit_code = -1; 17 | goto bail; 18 | } 19 | 20 | // Read the complete original binary file 21 | fp = fopen(argv[1], "rb"); 22 | fseek(fp, 0, SEEK_END); 23 | file_size = ftell(fp); 24 | fseek(fp, 0, SEEK_SET); 25 | switch (atoi(argv[3])) { 26 | case (32): 27 | file_size /= sizeof(uint32_t); 28 | data = (uint32_t *) malloc(file_size * sizeof(uint32_t)); 29 | fread((uint32_t *) data, sizeof(uint32_t), file_size, fp); 30 | printf("static uint32_t %s[%ld] __attribute__((unused)) = \n{", argv[2], 31 | file_size); 32 | for (i = 0; i < file_size - 1; i++) { 33 | printf("0x%08x,", ((uint32_t *)data)[i]); 34 | if ((i + 1) %4 == 0) 35 | printf("\n "); 36 | } 37 | printf("0x%08x};\n", ((uint32_t *)data)[i]); 38 | break; 39 | case (64): 40 | file_size /= sizeof(uint64_t); 41 | data = (uint64_t *) malloc(file_size * sizeof(uint64_t)); 42 | fread((uint64_t *) data, sizeof(uint64_t), file_size, fp); 43 | printf("static uint64_t %s[%ld] __attribute__((unused)) = \n{", argv[2], 44 | file_size); 45 | for (i = 0; i < file_size - 1; i++) { 46 | printf("0x%016lx,", ((uint64_t *)data)[i]); 47 | if ((i + 1) %2 == 0) 48 | printf("\n "); 49 | } 50 | printf("0x%016lx};\n", ((uint64_t *)data)[i]); 51 | break; 52 | case (128): 53 | // [TODO] Add support for this at some point 54 | default: 55 | printf("Only XLens of 32 or 64 are supported\n"); 56 | } 57 | 58 | bail: 59 | if (fp != NULL) 60 | fclose(fp); 61 | if (data != NULL) 62 | free(data); 63 | 64 | return exit_code; 65 | } 66 | -------------------------------------------------------------------------------- /tools/src/copyright.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #include 4 | 5 | #ifndef __TOOLS_SRC_COPYRIGHT_H_ 6 | #define __TOOLS_SRC_COPYRIGHT_H_ 7 | 8 | #define COPYRIGHT_FANN "Portions Copyright (C) 2003-2016 Steffen Nissen\n" 9 | 10 | #define PRINT_NOTICES(text) fprintf(stderr, text "\n") 11 | 12 | #endif // __TOOLS_SRC_COPYRIGHT_H_ 13 | -------------------------------------------------------------------------------- /tools/src/encoding.h: -------------------------------------------------------------------------------- 1 | // See LICENSE.IBM for license details. 2 | 3 | #ifndef __TOOLS_SRC_ENCODING_H_ 4 | #define __TOOLS_SRC_ENCODING_H_ 5 | 6 | #define W_NEURON_FIRST_NEURON_POINTER 12 7 | #define W_NEURON_NEURONS_IN_LAYER 10 8 | #define W_NEURON_NEURONS_IN_PREVIOUS_LAYER W_NEURON_NEURONS_IN_LAYER 9 | 10 | typedef uint32_t dana_ptr_t; // internal configuration pointer 11 | typedef uint32_t dana_data_t; // DANA arithmetic unit 12 | 13 | struct global_info_t { 14 | uint16_t decimal_point : 3; 15 | uint16_t error_function : 1; 16 | uint16_t binary_format : 3; 17 | uint16_t _unused_0 : 9; 18 | uint16_t total_weight_blocks; // ??? 19 | uint16_t total_neurons; 20 | uint16_t total_layers; 21 | dana_ptr_t ptr_first_layer; 22 | dana_ptr_t ptr_weights; 23 | }; 24 | 25 | struct layer_info_t { 26 | dana_ptr_t ptr_neuron; 27 | uint32_t num_neurons : 16; 28 | uint32_t num_neurons_previous : 16; 29 | }; 30 | 31 | struct neuron_info_t { 32 | dana_ptr_t ptr_weight_offset; 33 | uint16_t num_weights; 34 | uint8_t activation_function : 5; 35 | uint8_t steepness : 3; 36 | uint8_t _unused_0; 37 | uint32_t _unused_1; 38 | dana_data_t bias; 39 | }; 40 | 41 | enum encoding_error_t { 42 | NO_ERROR = 0, 43 | FAILED_TO_READ_ANN_FROM_FILE, 44 | FAILED_TO_OPEN_BIN_OUT, 45 | BAD_ARGUMENTS, 46 | UNSUPPORTED_BLOCK_WIDTH, 47 | VERIFY_GLOBAL_FAILED, 48 | VERIFY_NEURON_FAILED, 49 | STRUCT_LARGER_THAN_16B 50 | }; 51 | 52 | 53 | #endif // __TOOLS_SRC_ENCODING_H_ 54 | -------------------------------------------------------------------------------- /tools/src/fann-eval.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | // See LICENSE.IBM for license details. 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "tools/src/copyright.h" 9 | #ifndef FIXEDFANN 10 | #include "fann/src/include/fann.h" 11 | #else 12 | #include "fann/src/include/fixedfann.h" 13 | #undef FANNPRINTF 14 | #define FANNPRINTF "%08x" 15 | #endif 16 | 17 | static char * usage_message = 18 | "Usage: fann-eval -n[CONFIG] -t[TRAIN_FILE]\n" 19 | "Fun feedforward inference for a given FANN configuration (CONFIG) and testing\n" 20 | "file (TEST FILE).\n" 21 | "\n" 22 | "Options:\n" 23 | " -n, --nn-config [CONFIG] read FANN floating point network from FILE\n" 24 | " -t, --test-file [TRAIN FILE]\n" 25 | " read FANN testing file FILE\n" 26 | " --verbose print information while running\n" 27 | "\n"; 28 | 29 | void usage () { 30 | printf("Usage: %s", usage_message); 31 | } 32 | 33 | int main (int argc, char * argv[]) { 34 | PRINT_NOTICES(COPYRIGHT_FANN); 35 | int exit_code = 0; 36 | 37 | struct fann * ann = NULL; 38 | struct fann_train_data * data = NULL; 39 | 40 | int c; 41 | static int opt_verbose = 0; 42 | while (1) { 43 | static struct option long_options[] = { 44 | {"nn-config", required_argument, 0, 'n'}, 45 | {"train-file", required_argument, 0, 't'}, 46 | {"verbose", no_argument, &opt_verbose, 1}, 47 | {0, 0, 0, 0} 48 | }; 49 | int option_index = 0; 50 | c = getopt_long (argc, argv, "n:t:", 51 | long_options, &option_index); 52 | if (c == -1) 53 | break; 54 | switch (c) { 55 | case 'n': ann = fann_create_from_file(optarg); break; 56 | case 't': data = fann_read_train_from_file(optarg); break; 57 | } 58 | } 59 | 60 | if (ann == NULL || data == NULL) { 61 | fprintf(stderr, "[ERROR] Missing required input argument\n\n"); 62 | usage(); 63 | exit_code = -1; 64 | goto bail; 65 | } 66 | 67 | // double mse; 68 | fann_type * calc_out; 69 | for (int i = 0; i < fann_length_train_data(data); i++) { 70 | calc_out = fann_test(ann, data->input[i], data->output[i]); 71 | if (opt_verbose) { 72 | for (int k = 0; k < data->num_output; k++) { 73 | printf("[info] %d -> " FANNPRINTF " \n", k, calc_out[k]); 74 | } 75 | } 76 | } 77 | 78 | bail: 79 | if (ann != NULL) 80 | fann_destroy(ann); 81 | if (data != NULL) 82 | fann_destroy_train(data); 83 | 84 | return exit_code; 85 | } 86 | -------------------------------------------------------------------------------- /tools/src/fann-float-to-fixed.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include 4 | 5 | #include "fann/src/include/fann.h" 6 | #include "tools/src/copyright.h" 7 | 8 | void usage() { 9 | printf("usage: fann-float-to-fixed [OPTIONS] \n" 10 | "\n" 11 | "Options:\n" 12 | " -h, --help print this help and exit\n" 13 | " -v, --verbose print exhaustive debug info\n" 14 | ); 15 | } 16 | 17 | int main(int argc, char *argv[]) 18 | { 19 | PRINT_NOTICES(COPYRIGHT_FANN); 20 | struct fann * ann = NULL; 21 | char * fixed_file_name = NULL; 22 | unsigned int decimal_point = -1; 23 | 24 | int c; 25 | int flag_verbose = 0, exit_code = 0; 26 | while ((c = getopt (argc, argv, "hv")) != -1) 27 | switch (c) { 28 | case 'h': 29 | usage(); 30 | goto bail; 31 | break; 32 | case 'v': 33 | flag_verbose = 1; 34 | break; 35 | default: 36 | abort (); 37 | } 38 | 39 | int index; 40 | for (index = 1; index < argc - optind + 1; index++) { 41 | int index_optind = optind + index - 1; 42 | switch (index) { 43 | case 1: 44 | if ((ann = fann_create_from_file(argv[index_optind])) == 0) { 45 | fprintf(stderr, "[ERROR] Failed to read ANN %s\n", argv[index_optind]); 46 | usage(); 47 | exit_code = 2; 48 | goto bail; 49 | } 50 | if (flag_verbose) 51 | printf("[INFO] Reading floating point net: %s\n", argv[index_optind]); 52 | break; 53 | case 2: 54 | fixed_file_name = argv[index_optind]; 55 | if (flag_verbose) 56 | printf("[INFO] Will write to fixed point net: %s\n", argv[index_optind]); 57 | break; 58 | default: 59 | fprintf(stderr, "[ERROR] Too many arguments\n\n"); 60 | usage(); 61 | exit_code = 1; 62 | goto bail; 63 | } 64 | } 65 | 66 | if (ann == NULL || fixed_file_name == NULL) { 67 | fprintf(stderr, "[ERROR] Missing input arguments\n\n"); 68 | usage(); 69 | exit_code = 1; 70 | goto bail; 71 | } 72 | 73 | decimal_point = fann_save_to_fixed(ann, fixed_file_name); 74 | if (flag_verbose) 75 | printf("[INFO] Decimal point is: %d\n", decimal_point); 76 | 77 | bail: 78 | if (ann != NULL) 79 | fann_destroy(ann); 80 | return exit_code; 81 | } 82 | -------------------------------------------------------------------------------- /tools/src/fann-train-to-c-header-fixed.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "fann/src/include/fixedfann.h" 9 | #include "tools/src/copyright.h" 10 | 11 | int main (int argc, char * argv[]) { 12 | PRINT_NOTICES(COPYRIGHT_FANN); 13 | FILE * fp; 14 | struct fann * ann; 15 | int i, j, decimal_point; 16 | unsigned int num_data, num_input, num_output; 17 | fann_type ** inputs, ** outputs_expected, ** outputs_fann; 18 | 19 | // Check that we have two input arguments 20 | if (argc != 4) { 21 | printf("Usage: %s \n", argv[0]); 22 | return -1; 23 | } 24 | 25 | // Open the training file and create the network 26 | fp = fopen(argv[2], "r"); 27 | if (fp == NULL) { 28 | fprintf(stderr, "Failed to open file %s\n", argv[2]); 29 | return -1; 30 | } 31 | ann = fann_create_from_file(argv[1]); 32 | if (ann == NULL) { 33 | fprintf(stderr, "Failed to open FANN config %s\n", argv[1]); 34 | return -2; 35 | } 36 | 37 | decimal_point = ann->decimal_point; 38 | 39 | // Read the header 40 | fscanf(fp, "%d %d %d", &num_data, &num_input, &num_output); 41 | printf("// Automatically generated using:\n// %s %s %s %s\n", 42 | argv[0], argv[1], argv[2], argv[3]); 43 | printf("static int %s_decimal_point __attribute__((unused)) = %d;\n", argv[3], 44 | decimal_point); 45 | printf("static int %s_num_data __attribute__((unused)) = %d;\n", argv[3], 46 | num_data); 47 | printf("static int %s_num_input __attribute__((unused)) = %d;\n", argv[3], 48 | num_input); 49 | printf("static int %s_num_output __attribute__((unused)) = %d;\n", argv[3], 50 | num_output); 51 | inputs = (fann_type **) malloc(num_data * sizeof(fann_type *)); 52 | outputs_expected = (fann_type **) malloc(num_data * sizeof(fann_type *)); 53 | outputs_fann = (fann_type **) malloc(num_data * sizeof(fann_type *)); 54 | for (i = 0; i < num_data; i++) { 55 | inputs[i] = (fann_type *) malloc(num_input * sizeof(fann_type)); 56 | outputs_expected[i] = (fann_type *) malloc(num_input * sizeof(fann_type)); 57 | outputs_fann[i] = (fann_type *) malloc(num_input * sizeof(fann_type)); 58 | } 59 | 60 | // Read all the input--output pairs 61 | for (i = 0; i < num_data; i++) { 62 | for (j = 0; j < num_input; j++) 63 | fscanf(fp, "%d", &inputs[i][j]); 64 | for (j = 0; j < num_output; j++) 65 | fscanf(fp, "%d", &outputs_expected[i][j]); 66 | memcpy(outputs_fann[i], fann_run(ann, inputs[i]), 67 | num_output * sizeof(fann_type)); 68 | } 69 | 70 | // Print out the inputs, expected, and actual outputs (what FANN produced) 71 | printf("static int %s_inputs[%d][%d] __attribute__((unused)) = {\n", argv[3], 72 | num_data, num_input); 73 | for (i = 0; i < num_data; i++) { 74 | printf(" {"); 75 | for (j = 0; j < num_input - 1; j++) 76 | printf("0x08%x,", (int) (inputs[i][j])); 77 | printf("0x%08x},\n", (int) (inputs[i][j])); 78 | } 79 | printf("};\n"); 80 | 81 | printf("static int %s_outputs_expected[%d][%d] __attribute__((unused)) = {\n", 82 | argv[3], num_data, 83 | num_output); 84 | for (i = 0; i < num_data; i++) { 85 | printf(" {"); 86 | for (j = 0; j < num_output - 1; j++) 87 | printf("0x%08x,", (int) (outputs_expected[i][j])); 88 | printf("0x%08x},\n", (int) (outputs_expected[i][j])); 89 | } 90 | printf("};\n"); 91 | 92 | printf("static int %s_outputs_fann[%d][%d] __attribute__((unused)) = {\n", 93 | argv[3], num_data, 94 | num_output); 95 | for (i = 0; i < num_data; i++) { 96 | printf(" {"); 97 | for (j = 0; j < num_output - 1; j++) 98 | printf("0x%08x,", (int) (outputs_fann[i][j])); 99 | printf("0x%08x},\n", (int) (outputs_fann[i][j])); 100 | } 101 | printf("};\n"); 102 | 103 | // Cleanup 104 | for (i = 0; i < num_data; i++) { 105 | free(inputs[i]); 106 | free(outputs_expected[i]); 107 | free(outputs_fann[i]); 108 | } 109 | free(inputs); 110 | free(outputs_expected); 111 | free(outputs_fann); 112 | fann_destroy(ann); 113 | fclose(fp); 114 | return 0; 115 | } 116 | -------------------------------------------------------------------------------- /tools/src/fann-train-to-c-header.c: -------------------------------------------------------------------------------- 1 | // See LICENSE.BU for license details. 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "fann/src/include/fann.h" 9 | #include "tools/src/copyright.h" 10 | 11 | int main (int argc, char * argv[]) { 12 | PRINT_NOTICES(COPYRIGHT_FANN); 13 | FILE * fp; 14 | struct fann * ann; 15 | int i, j, decimal_point, multiplier; 16 | unsigned int num_data, num_input, num_output; 17 | fann_type ** inputs, ** outputs_expected, ** outputs_fann; 18 | 19 | // Check that we have two input arguments 20 | if (argc != 4) { 21 | printf("Usage: %s \n", argv[0]); 22 | return -1; 23 | } 24 | 25 | // Open the training file and create the network 26 | fp = fopen(argv[2], "r"); 27 | if (fp == NULL) { 28 | fprintf(stderr, "Failed to open file %s\n", argv[2]); 29 | return -1; 30 | } 31 | ann = fann_create_from_file(argv[1]); 32 | if (ann == NULL) { 33 | fprintf(stderr, "Failed to open FANN config %s\n", argv[1]); 34 | return -2; 35 | } 36 | 37 | // Figure out what the decimal point should be 38 | decimal_point = fann_save_to_fixed(ann, "/dev/null"); 39 | multiplier = pow(2, decimal_point); 40 | 41 | // Read the header 42 | fscanf(fp, "%d %d %d", &num_data, &num_input, &num_output); 43 | printf("// Automatically generated using:\n// %s %s %s %s\n", 44 | argv[0], argv[1], argv[2], argv[3]); 45 | printf("static int %s_decimal_point __attribute__((unused)) = %d;\n", argv[3], 46 | decimal_point); 47 | printf("static int %s_num_data __attribute__((unused)) = %d;\n", argv[3], 48 | num_data); 49 | printf("static int %s_num_input __attribute__((unused)) = %d;\n", argv[3], 50 | num_input); 51 | printf("static int %s_num_output __attribute__((unused)) = %d;\n", argv[3], 52 | num_output); 53 | inputs = (fann_type **) malloc(num_data * sizeof(fann_type *)); 54 | outputs_expected = (fann_type **) malloc(num_data * sizeof(fann_type *)); 55 | outputs_fann = (fann_type **) malloc(num_data * sizeof(fann_type *)); 56 | for (i = 0; i < num_data; i++) { 57 | inputs[i] = (fann_type *) malloc(num_input * sizeof(fann_type)); 58 | outputs_expected[i] = (fann_type *) malloc(num_input * sizeof(fann_type)); 59 | outputs_fann[i] = (fann_type *) malloc(num_input * sizeof(fann_type)); 60 | } 61 | 62 | // Read all the input--output pairs 63 | for (i = 0; i < num_data; i++) { 64 | for (j = 0; j < num_input; j++) 65 | fscanf(fp, "%f", &inputs[i][j]); 66 | for (j = 0; j < num_output; j++) 67 | fscanf(fp, "%f", &outputs_expected[i][j]); 68 | memcpy(outputs_fann[i], fann_run(ann, inputs[i]), 69 | num_output * sizeof(fann_type)); 70 | } 71 | 72 | // Print out the inputs, expected, and actual outputs (what FANN produced) 73 | printf("static int %s_inputs[%d][%d] __attribute__((unused)) = {\n", argv[3], 74 | num_data, num_input); 75 | for (i = 0; i < num_data; i++) { 76 | printf(" {"); 77 | for (j = 0; j < num_input - 1; j++) 78 | printf("%d,", (int) (inputs[i][j] * multiplier)); 79 | printf("%d},\n", (int) (inputs[i][j] * multiplier)); 80 | } 81 | printf("};\n"); 82 | 83 | printf("static int %s_outputs_expected[%d][%d] __attribute__((unused)) = {\n", 84 | argv[3], num_data, 85 | num_output); 86 | for (i = 0; i < num_data; i++) { 87 | printf(" {"); 88 | for (j = 0; j < num_output - 1; j++) 89 | printf("%d,", (int) (outputs_expected[i][j] * multiplier)); 90 | printf("%d},\n", (int) (outputs_expected[i][j] * multiplier)); 91 | } 92 | printf("};\n"); 93 | 94 | printf("static int %s_outputs_fann[%d][%d] __attribute__((unused)) = {\n", 95 | argv[3], num_data, 96 | num_output); 97 | for (i = 0; i < num_data; i++) { 98 | printf(" {"); 99 | for (j = 0; j < num_output - 1; j++) 100 | printf("%d,", (int) (outputs_fann[i][j] * multiplier)); 101 | printf("%d},\n", (int) (outputs_fann[i][j] * multiplier)); 102 | } 103 | printf("};\n"); 104 | 105 | // Cleanup 106 | for (i = 0; i < num_data; i++) { 107 | free(inputs[i]); 108 | free(outputs_expected[i]); 109 | free(outputs_fann[i]); 110 | } 111 | free(inputs); 112 | free(outputs_expected); 113 | free(outputs_fann); 114 | fann_destroy(ann); 115 | fclose(fp); 116 | return 0; 117 | } 118 | -------------------------------------------------------------------------------- /util/.vimrc: -------------------------------------------------------------------------------- 1 | set tabstop=2 2 | set shiftwidth=2 3 | set softtabstop=2 4 | set expandtab 5 | set nosmarttabs 6 | 7 | map gf :CtrlPClearAllCaches :CtrlP features_wip 8 | --------------------------------------------------------------------------------