├── .gitignore ├── README.md ├── buildtools ├── Makefile.base ├── verilogcopy.sh ├── vivado-impl-kc705.tcl └── vivado-impl-vc707.tcl ├── core ├── kc705 │ └── core_gen_pcie.tcl └── vc707 │ └── core_gen_pcie.tcl ├── cpp ├── DRAMHostDMA.cpp ├── DRAMHostDMA.h ├── PcieBdpi.cpp ├── ShmFifo.cpp ├── ShmFifo.h ├── bdbmpcie.cpp ├── bdbmpcie.h ├── dmacircularqueue.cpp ├── dmacircularqueue.h ├── dmasplitter.cpp └── dmasplitter.h ├── distribute ├── bsrescan │ ├── Makefile │ └── main.cpp ├── driver │ ├── 99-bscpcie.rules │ ├── Makefile │ ├── bdbmpcie.c │ └── test │ │ ├── Makefile │ │ └── test.cpp ├── manager_deprecated │ ├── Makefile │ └── main.cpp ├── program.sh └── program.tcl ├── dram ├── kc705 │ ├── Makefile │ ├── ddr3_gen_tcl.txt │ ├── ddr3_wrapper.v │ ├── dram.xdc │ ├── mig_a.prj │ ├── syncfifo.xdc │ └── synth-ip.tcl ├── src │ ├── DDR3Common.bsv │ ├── DDR3Controller.bsv │ ├── DDR3Sim.bsv │ ├── DRAMBurstController.bsv │ ├── DRAMController.bsv │ ├── DRAMControllerTypes.bsv │ └── DRAMHostDMA.bsv └── vc707 │ ├── Makefile │ ├── chipscope_wrapper.v │ ├── ddr3_v2_0.xdc │ ├── ddr3_wrapper.v │ ├── dram.xdc │ ├── mig_a.prj │ ├── syncfifo.xdc │ └── synth-ip.tcl ├── examples ├── dmatest │ ├── HwMain.bsv │ ├── Makefile │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── dmesg-first.txt │ ├── run.sh │ ├── sw │ └── user-ip.tcl ├── dramtest │ ├── HwMain.bsv │ ├── Makefile │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── run.sh │ ├── sw │ └── user-ip.tcl ├── float │ ├── HwMain.bsv │ ├── Makefile │ ├── README.md │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── run.sh │ ├── sw │ └── user-ip.tcl ├── queuealu │ ├── HwMain.bsv │ ├── Makefile │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── run.sh │ ├── sw │ └── user-ip.tcl ├── simple │ ├── HwMain.bsv │ ├── Makefile │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── run.sh │ ├── sw │ └── user-ip.tcl ├── sortreduce │ ├── DMAReadOrdered.bsv │ ├── DramStripeLoader.bsv │ ├── HwMain.bsv │ ├── Makefile │ ├── SortReduceSingle.bsv │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── run.sh │ ├── sw │ └── user-ip.tcl ├── streaming │ ├── HwMain.bsv │ ├── Makefile │ ├── StreamKernel.bsv │ ├── Top.bsv │ ├── cpp │ │ ├── Makefile │ │ └── main.cpp │ ├── run.sh │ ├── sw │ └── user-ip.tcl └── test │ ├── hw │ ├── DmaSplitter.bsv │ ├── HwMain.bsv │ ├── Makefile │ ├── Platform.bsv │ ├── Top.bsv │ ├── bsimres.txt │ ├── res.txt │ ├── run.sh │ ├── verilogcopy.sh │ └── vivado-impl.tcl │ └── sw │ ├── Makefile │ ├── flashmanager.cpp │ ├── flashmanager.h │ └── main.cpp └── src ├── ClockImport.bsv ├── CompletionFIFO.bsv ├── DMACircularQueue.bsv ├── DMAReadHelper.bsv ├── DMAWideCtrl.bsv ├── DMAWriteHelper.bsv ├── MergeN.bsv ├── PcieCtrl.bsv ├── PcieCtrl_bsim.bsv ├── PcieImport.bsv ├── ScatterN.bsv ├── Shifter.bsv ├── pcie_7x_0_pipe_clock.v ├── pcie_7x_0_support.v ├── xilinx_pcie_2_1_ep_7x.v ├── xilinx_pcie_7x_ep_x8g2_KC705.xdc └── xilinx_pcie_7x_ep_x8g2_VC707.xdc /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | build_bsim 3 | bsim 4 | 5 | .*.sw? 6 | .bit 7 | *.o 8 | obj 9 | 10 | *.jou 11 | *.log 12 | res.txt 13 | 14 | fsm_encoding.os 15 | webtalk.jou 16 | webtalk.log 17 | 18 | .sched 19 | core 20 | vc707 21 | kc705 22 | 23 | pcie_7x_0 24 | *.Xil 25 | *.bak 26 | *.prj 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bluespec PCIe library 2 | 3 | BluespecPCIe is a PCIe library for the Bluespec language. 4 | It includes a Bluespec wrapper for the Xilinx PCIe core, device driver for Linux, as well as a userspace library for easily communicating with the FPGA device. 5 | It supports DMA as well as memory-mapped I/O over PCIe. 6 | It also supports reprogramming the FPGA and using the PCIe without rebooting in between. 7 | 8 | The biggest strength of BluespecPCIe over other PCIe implementations is its simplicity. 9 | A DMA memcpy demo builds in 5 minutes using vivado, and everything is designed to be plugged into a bluespec design as a module. BluespecPCIe does not need a special build tool, script or a meta language. 10 | 11 | BluespecPCIe currently supports the KC705 and VC707 boards. 12 | 13 | BluespecPCIe is still under active development. If you discover bugs, or has feature requests, please let me know! 14 | 15 | ## Getting Started 16 | 17 | ### Clone bluelib 18 | BluespecPCIe depends on the bluelib library, which can be found here [https://github.com/sangwoojun/bluelib](https://github.com/sangwoojun/bluelib). 19 | 20 | By default, bluelib must be cloned at the same level as BluespecPCIe (e.g., ~/bluespecpcie and ~/bluelib). 21 | However, you can always change the individual Makefiles to point to different locations. 22 | 23 | ### Installing the software 24 | - Driver: In **distribution/driver**, run **make**, and **sudo make install**. 25 | - Rescan tool: **bsrescan** lets the BIOS recognize the PCIe device without system reboot between re-programming the FPGA. In **distribution/bsrescan**, run **make**, and **sudo make install**. This installs **bsrescan** to **/opt/bluespecpcie_manager/**. You may want to add **/opt/bluespecpcie_manager/** to your **PATH**. 26 | 27 | ### Building and running a demo 28 | - Example designs are in **examples/**. For the basic demo, go to **examples/simple**. 29 | - Generate the Xilinx core by running **make core BOARD=vc707** or **make core BOARD=kc705**, depending on the target board. This only needs to be done once. 30 | - Build the demo by running **make BOARD=vc707** or **make BOARD=kc705**. 31 | - Program the FPGA by running **vivado -mode batch -source ../../distribute/program.tcl** 32 | - **_If_** this is the first time programming this FPGA device after board power-on, the system must be rebooted. 33 | - **_If_** this device has been programmed and rebooted before, run **/opt/bluespecpcie_manager/bsrescan**. This will re-discover the device and reload the driver. 34 | - The device is programmed and ready to communicate with. 35 | - Go to **./cpp** and run **make**. 36 | - Run **./obj/main** to run the software demo. 37 | 38 | ## Working examples 39 | 40 | - **example/simple**: Memory-mapped I/O example 41 | - **example/dmatest**: DMA example 42 | - **example/dramtest**: Uses the 1 GB on-board DRAM on both VC707 and KC705 43 | - **example/float**: Floating point example 44 | 45 | 46 | ## Developing custom designs 47 | 48 | When creating a new project, it's simple to start by creating a copy of an example project. 49 | If creating a project outside the example directory, some variables need to be modified for the project to build correctly. 50 | 51 | - in hardware Makefile, change **LIBPATH** 52 | - in software Makefile, change **LIBPATH** 53 | - in vivado-impl.tcl, change **pciedir** 54 | 55 | **Top.bsv** contains the top level module. The interface **interface PcieImportPins pcie_pins** and the top level input clocks and resets including **pcie_clk_p** neet to be maintaind. 56 | 57 | Software related files are located in the **cpp** directory. 58 | 59 | ### Using more cores 60 | 61 | You are free to copy and modify Makefile.base in the buildtools directory, as well as the vivado-impl\*.tcl files. 62 | 63 | However, it may be simpler to add cores and other functionality using the **user-ip.tcl** file, which is included by the implementation tcl script before synthesis starts. 64 | For example on how to use this, please look at the examples **dramtest** and **float**. 65 | 66 | 67 | ## Simulation using Bluesim. 68 | 69 | BluespecPCIe emulates the PCIe using a shared memory FIFO. 70 | 71 | When building the hardware, run **make bsim**. 72 | When building the software, also run **make bsim**. 73 | 74 | A symlink to the bsim software binary (or the actual binary) should be created at the top level (where the Makefile is), with the name **./sw**. 75 | 76 | To execute the hardware bsim simulation and software, run **./run.sh**. 77 | 78 | **Note**: The shared memory files may not be correctly deleted after a run. You may have to delete them using **rm /dev/shm/bdbm\*** 79 | 80 | ## Environment 81 | 82 | - Development was done on Vivado 2018.2 83 | 84 | -------------------------------------------------------------------------------- /buildtools/Makefile.base: -------------------------------------------------------------------------------- 1 | #BSVPATH =$(LIBPATH)/src/:$(BLUESPEC_HOME)/BSVSource/Xilinx/ 2 | BSVPATH =$(LIBPATH)/src/ 3 | COREPATH =$(LIBPATH)/core/$(BOARD)/ 4 | #DRAMPATH =$(LIBPATH)/dram/src/ 5 | 6 | BSCFLAGS = -show-schedule -aggressive-conditions 7 | 8 | BSCFLAGS_SYNTH = -bdir ./$(BOARD)/obj -vdir ./$(BOARD)/verilog/top -simdir ./$(BOARD)/obj -info-dir ./$(BOARD) -fdir ./$(BOARD) -D $(BOARD) 9 | BSCFLAGS_BSIM = -bdir ./bsim/obj -vdir ./bsim/verilog/top -simdir ./bsim/obj -info-dir ./bsim -fdir ./bsim 10 | 11 | BSIM_CPPFILES =$(LIBPATH)/cpp/PcieBdpi.cpp \ 12 | $(LIBPATH)/cpp/ShmFifo.cpp \ 13 | $(CUSTOMCPP_BSIM) 14 | 15 | DEBUGFLAGS = -D BSIM 16 | 17 | all: 18 | mkdir -p $(BOARD) 19 | mkdir -p $(BOARD)/obj 20 | mkdir -p $(BOARD)/verilog 21 | mkdir -p $(BOARD)/verilog/top 22 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH) $(CUSTOMBSV) -verilog -u -g mkProjectTop Top.bsv 23 | cp $(BUILDTOOLS)/vivado-impl-$(BOARD).tcl ./$(BOARD)/impl.tcl 24 | cp user-ip.tcl $(BOARD)/ || : 25 | cd $(BOARD); cd verilog/top; ../../../$(BUILDTOOLS)/verilogcopy.sh; cd ../../; vivado -mode batch -source impl.tcl -tclargs $(TCLARGS) 26 | tar czf $(BOARD).tgz $(BOARD)/ 27 | mv $(BOARD).tgz $(BOARD)/ 28 | 29 | bsim: HwMain.bsv Top.bsv 30 | mkdir -p bsim 31 | mkdir -p bsim/obj 32 | mkdir -p $(BOARD)/verilog 33 | mkdir -p bsim/verilog/top 34 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH) $(CUSTOMBSV) -sim -u -g mkProjectTop_bsim Top.bsv 35 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkProjectTop_bsim -o bsim/obj/bsim bsim/obj/*.ba $(BSIM_CPPFILES) 36 | 37 | clean: 38 | rm -rf $(BOARD) 39 | rm -rf bsim 40 | 41 | .PHONY: all 42 | 43 | core: 44 | cd $(COREPATH) ; vivado -mode batch -source core_gen_pcie.tcl -nolog -nojournal 45 | 46 | -------------------------------------------------------------------------------- /buildtools/verilogcopy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VFILES=" 4 | SyncResetA.v 5 | SyncRegister.v 6 | SyncHandshake.v 7 | MakeReset0.v 8 | MakeResetA.v 9 | SizedFIFO.v 10 | Counter.v 11 | TriState.v 12 | FIFOL1.v 13 | FIFOL2.v 14 | FIFO1.v 15 | FIFO2.v 16 | ResetInverter.v 17 | SyncFIFO.v 18 | ClockDiv.v 19 | ResetEither.v 20 | MakeReset.v 21 | SyncReset0.v 22 | BRAM2.v 23 | SyncWire.v 24 | " 25 | 26 | CURDIR=`pwd` 27 | cd $BLUESPECDIR/Verilog; 28 | #cp *.v $CURDIR/ 29 | 30 | for VFILE in $VFILES ; 31 | do 32 | echo $VFILE 33 | cp $VFILE $CURDIR/ 34 | done 35 | -------------------------------------------------------------------------------- /buildtools/vivado-impl-kc705.tcl: -------------------------------------------------------------------------------- 1 | set_param general.maxThreads 8 2 | 3 | set boardname kc705 4 | 5 | set pciedir ../../../ 6 | 7 | if { $::argc > 0 } { 8 | set pciedir [lindex $argv 0] 9 | puts $pciedir 10 | } else { 11 | puts "using default pcie core path" 12 | 13 | } 14 | 15 | set outputDir ./hw 16 | file mkdir $outputDir 17 | 18 | set partname {xc7k325tffg900-2} 19 | 20 | read_verilog [ glob {verilog/top/*.v} ] 21 | 22 | set_property part $partname [current_project] 23 | 24 | ############# Pcie Stuff 25 | read_ip $pciedir/core/kc705/pcie_7x_0/pcie_7x_0.xci 26 | read_verilog [ glob $pciedir/src/*.v ] 27 | read_xdc $pciedir/src/xilinx_pcie_7x_ep_x8g2_KC705.xdc 28 | ############## end Pcie Stuff 29 | 30 | if { [file exists "user-ip.tcl"] == 1} { 31 | source user-ip.tcl 32 | } 33 | 34 | 35 | synth_design -name mkProjectTop -top mkProjectTop -part $partname -flatten rebuilt 36 | 37 | write_checkpoint -force $outputDir/mkprojecttop_post_synth 38 | report_timing_summary -verbose -file $outputDir/mkprojecttop_post_synth_timing_summary.rpt 39 | report_timing -sort_by group -max_paths 100 -path_type summary -file $outputDir/mkprojecttop_post_synth_timing.rpt 40 | report_utilization -verbose -file $outputDir/mkprojecttop_post_synth_utilization.txt 41 | report_utilization -hierarchical -file $outputDir/mkprojecttop_post_synth_util_hier.rpt 42 | report_datasheet -file $outputDir/mkprojecttop_post_synth_datasheet.txt 43 | write_verilog -force $outputDir/mkprojecttop_netlist.v 44 | write_debug_probes -force probes.ltx 45 | #report_power -file $outputDir/mkprojecttop_post_synth_power.rpt 46 | 47 | opt_design 48 | # power_opt_design 49 | place_design 50 | phys_opt_design 51 | write_checkpoint -force $outputDir/mkprojecttop_post_place 52 | report_timing_summary -file $outputDir/mkprojecttop_post_place_timing_summary.rpt 53 | route_design 54 | write_checkpoint -force $outputDir/mkprojecttop_post_route 55 | report_timing_summary -file $outputDir/mkprojecttop_post_route_timing_summary.rpt 56 | report_timing -sort_by group -max_paths 100 -path_type summary -file $outputDir/mkprojecttop_post_route_timing.rpt 57 | report_clock_utilization -file $outputDir/mkprojecttop_clock_util.rpt 58 | report_utilization -file $outputDir/mkprojecttop_post_route_util.rpt 59 | report_datasheet -file $outputDir/mkprojecttop_post_route_datasheet.rpt 60 | #report_power -file $outputDir/mkprojecttop_post_route_power.rpt 61 | #report_drc -file $outputDir/mkprojecttop_post_imp_drc.rpt 62 | #write_verilog -force $outputDir/mkprojecttop_impl_netlist.v 63 | write_xdc -no_fixed_only -force $outputDir/mkprojecttop_impl.xdc 64 | write_bitstream -force -bin_file $outputDir/mkProjectTop.bit 65 | -------------------------------------------------------------------------------- /buildtools/vivado-impl-vc707.tcl: -------------------------------------------------------------------------------- 1 | set_param general.maxThreads 8 2 | 3 | set boardname vc707 4 | 5 | set pciedir ../../../ 6 | 7 | if { $::argc > 0 } { 8 | set pciedir [lindex $argv 0] 9 | puts $pciedir 10 | } else { 11 | puts "using default pcie core path" 12 | 13 | } 14 | 15 | set outputDir ./hw 16 | file mkdir $outputDir 17 | 18 | set partname {xc7vx485tffg1761-2} 19 | 20 | read_verilog [ glob {verilog/top/*.v} ] 21 | 22 | set_property part $partname [current_project] 23 | 24 | ############# Pcie Stuff 25 | read_ip $pciedir/core/vc707/pcie_7x_0/pcie_7x_0.xci 26 | read_verilog [ glob $pciedir/src/*.v ] 27 | read_xdc $pciedir/src/xilinx_pcie_7x_ep_x8g2_VC707.xdc 28 | ############## end Pcie Stuff 29 | 30 | if { [file exists "user-ip.tcl"] == 1} { 31 | source user-ip.tcl 32 | } 33 | 34 | 35 | synth_design -name mkProjectTop -top mkProjectTop -part $partname -flatten rebuilt 36 | 37 | write_checkpoint -force $outputDir/mkprojecttop_post_synth 38 | report_timing_summary -verbose -file $outputDir/mkprojecttop_post_synth_timing_summary.rpt 39 | report_timing -sort_by group -max_paths 100 -path_type summary -file $outputDir/mkprojecttop_post_synth_timing.rpt 40 | report_utilization -verbose -file $outputDir/mkprojecttop_post_synth_utilization.txt 41 | report_utilization -hierarchical -file $outputDir/mkprojecttop_post_synth_util_hier.rpt 42 | report_datasheet -file $outputDir/mkprojecttop_post_synth_datasheet.txt 43 | write_verilog -force $outputDir/mkprojecttop_netlist.v 44 | write_debug_probes -force probes.ltx 45 | #report_power -file $outputDir/mkprojecttop_post_synth_power.rpt 46 | 47 | 48 | opt_design 49 | # power_opt_design 50 | place_design 51 | phys_opt_design 52 | write_checkpoint -force $outputDir/mkprojecttop_post_place 53 | report_timing_summary -file $outputDir/mkprojecttop_post_place_timing_summary.rpt 54 | route_design 55 | write_checkpoint -force $outputDir/mkprojecttop_post_route 56 | report_timing_summary -file $outputDir/mkprojecttop_post_route_timing_summary.rpt 57 | report_timing -sort_by group -max_paths 100 -path_type summary -file $outputDir/mkprojecttop_post_route_timing.rpt 58 | report_clock_utilization -file $outputDir/mkprojecttop_clock_util.rpt 59 | report_utilization -file $outputDir/mkprojecttop_post_route_util.rpt 60 | report_utilization -hierarchical -file $outputDir/mkprojecttop_post_route_util_hier.rpt 61 | report_datasheet -file $outputDir/mkprojecttop_post_route_datasheet.rpt 62 | #report_power -file $outputDir/mkprojecttop_post_route_power.rpt 63 | #report_drc -file $outputDir/mkprojecttop_post_imp_drc.rpt 64 | #write_verilog -force $outputDir/mkprojecttop_impl_netlist.v 65 | write_xdc -no_fixed_only -force $outputDir/mkprojecttop_impl.xdc 66 | write_bitstream -force -bin_file $outputDir/mkProjectTop.bit 67 | -------------------------------------------------------------------------------- /core/kc705/core_gen_pcie.tcl: -------------------------------------------------------------------------------- 1 | #proc core_gen_pcie {} { 2 | set coredir "./" 3 | set corename "pcie_7x_0" 4 | 5 | file mkdir $coredir 6 | if [file exists ./$coredir/$corename] { 7 | file delete -force ./$coredir/$corename 8 | } 9 | 10 | create_project -name local_synthesized_ip -in_memory -part xc7k325tffg900-2 11 | set_property board_part xilinx.com:kc705:part0:1.5 [current_project] 12 | create_ip -name pcie_7x -vendor xilinx.com -library ip -version 3.* -module_name $corename -dir ./$coredir 13 | #set_property -dict [list CONFIG.Maximum_Link_Width {X8} CONFIG.Interface_Width {128_bit} CONFIG.Bar0_Scale {Megabytes} CONFIG.Bar0_Size {1} CONFIG.Link_Speed {2.5_GT/s} CONFIG.User_Clk_Freq {125} CONFIG.Device_ID {7028} CONFIG.Max_Payload_Size {512_bytes} CONFIG.Trgt_Link_Speed {4'h1} CONFIG.PCIe_Blk_Locn {X0Y0} CONFIG.Trans_Buf_Pipeline {None} CONFIG.Ref_Clk_Freq {100_MHz}] [get_ips $corename] 14 | set_property -dict [list CONFIG.Maximum_Link_Width {X8} CONFIG.Link_Speed {5.0_GT/s} CONFIG.Bar0_Scale {Megabytes} CONFIG.Bar0_Size {1} CONFIG.IntX_Generation {false} CONFIG.MSI_Enabled {false} CONFIG.Interface_Width {128_bit} CONFIG.User_Clk_Freq {250} CONFIG.Device_ID {7028} CONFIG.Max_Payload_Size {256_bytes} CONFIG.Trgt_Link_Speed {4'h2} CONFIG.Legacy_Interrupt {NONE} CONFIG.PCIe_Blk_Locn {X0Y0} CONFIG.Trans_Buf_Pipeline {None} CONFIG.Ref_Clk_Freq {100_MHz}] [get_ips $corename] 15 | 16 | generate_target {instantiation_template} [get_files ./$coredir/$corename/$corename.xci] 17 | generate_target all [get_files ./$coredir/$corename/$corename.xci] 18 | create_ip_run [get_files -of_objects [get_fileset sources_1] ./$coredir/$corename/$corename.xci] 19 | generate_target {Synthesis} [get_files ./$coredir/$corename/$corename.xci] 20 | read_ip ./$coredir/$corename/$corename.xci 21 | synth_ip [get_ips $corename] 22 | #} 23 | #core_gen_pcie 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /core/vc707/core_gen_pcie.tcl: -------------------------------------------------------------------------------- 1 | #proc core_gen_pcie {} { 2 | set coredir "./" 3 | set corename "pcie_7x_0" 4 | 5 | file mkdir $coredir 6 | if [file exists ./$coredir/$corename] { 7 | file delete -force ./$coredir/$corename 8 | } 9 | 10 | create_project -name local_synthesized_ip -in_memory -part xc7vx485tffg1761-2 11 | set_property board_part xilinx.com:vc707:part0:1.0 [current_project] 12 | create_ip -name pcie_7x -version 3.* -vendor xilinx.com -library ip -module_name $corename -dir ./$coredir 13 | set_property -dict [list CONFIG.Maximum_Link_Width {X8} CONFIG.Link_Speed {5.0_GT/s} CONFIG.Bar0_Scale {Megabytes} CONFIG.Bar0_Size {1} CONFIG.Base_Class_Menu {Memory_controller} CONFIG.Use_Class_Code_Lookup_Assistant {true} CONFIG.Xlnx_Ref_Board {VC707} CONFIG.Ref_Clk_Freq {100_MHz} CONFIG.Max_Payload_Size {256_bytes}] [get_ips $corename] 14 | 15 | generate_target {instantiation_template} [get_files ./$coredir/$corename/$corename.xci] 16 | generate_target all [get_files ./$coredir/$corename/$corename.xci] 17 | create_ip_run [get_files -of_objects [get_fileset sources_1] ./$coredir/$corename/$corename.xci] 18 | generate_target {Synthesis} [get_files ./$coredir/$corename/$corename.xci] 19 | read_ip ./$coredir/$corename/$corename.xci 20 | synth_ip [get_ips $corename] 21 | #} 22 | #core_gen_pcie 23 | -------------------------------------------------------------------------------- /cpp/DRAMHostDMA.cpp: -------------------------------------------------------------------------------- 1 | /**** 2 | For use with dram/src/DRAMHostDMA.bsv 3 | ****/ 4 | 5 | 6 | 7 | #include "DRAMHostDMA.h" 8 | 9 | DRAMHostDMA* 10 | DRAMHostDMA::m_pInstance = NULL; 11 | 12 | DRAMHostDMA* 13 | DRAMHostDMA::GetInstance() { 14 | if ( m_pInstance == NULL ) { 15 | m_pInstance = new DRAMHostDMA(); 16 | } 17 | return m_pInstance; 18 | } 19 | 20 | DRAMHostDMA::DRAMHostDMA() { 21 | BdbmPcie* pcie = BdbmPcie::getInstance(); 22 | 23 | m_read_done_cnt = 0; 24 | m_write_done_cnt = 0; 25 | m_write_done_total = pcie->userReadWord(m_fpga_write_stat_off); 26 | m_read_done_total = pcie->userReadWord(m_fpga_read_stat_off); 27 | } 28 | 29 | // offset: in FPGA mem (in bytes) 30 | bool 31 | DRAMHostDMA::CopyToFPGA(size_t offset, void* buffer, size_t bytes) { 32 | m_mutex.lock(); 33 | BdbmPcie* pcie = BdbmPcie::getInstance(); 34 | 35 | size_t offset_frag = offset % m_fpga_alignment; 36 | if ( offset_frag != 0 ) { 37 | offset = (offset/m_fpga_alignment)*m_fpga_alignment; 38 | //io is simply shifted to align 39 | //bytes += offset_frag; 40 | } 41 | size_t src_bytes = bytes; 42 | size_t bytes_frag = bytes % m_fpga_alignment; 43 | if ( bytes_frag != 0 ) { 44 | bytes = (bytes/m_fpga_alignment+1)*m_fpga_alignment; 45 | } 46 | size_t writes_cnt = ( bytes/(m_max_dma_bytes/2) ); 47 | if ( bytes%(m_max_dma_bytes/2) != 0 ) writes_cnt ++; 48 | 49 | size_t host_offset = 0; 50 | uint8_t* dmabuf8 = (uint8_t*)pcie->dmaBuffer(); 51 | 52 | //printf( "Starting write with %ld chunks\n", writes_cnt ); fflush(stdout); 53 | for ( size_t i = 0; i < writes_cnt; i++ ) { 54 | size_t curbyte = m_max_dma_bytes/2; 55 | if ( m_max_dma_bytes/2 > bytes ) curbyte = bytes; 56 | 57 | size_t bufoff = 0; 58 | if ( i%2 != 0 ) bufoff = m_max_dma_bytes/2; 59 | if ( src_bytes < curbyte ) { 60 | memcpy(dmabuf8+bufoff, ((uint8_t*)buffer)+host_offset, src_bytes); 61 | memset(dmabuf8+bufoff+src_bytes, 0xFF, (curbyte-src_bytes)); 62 | } else { 63 | memcpy(dmabuf8+bufoff, ((uint8_t*)buffer)+host_offset, curbyte); 64 | } 65 | 66 | size_t hostpageoff = bufoff/m_fpga_alignment; 67 | size_t pages = curbyte/m_fpga_alignment; 68 | size_t pageoff = offset/m_fpga_alignment; 69 | 70 | //printf( "Writing %lx pages from %lx to %lx\n", pages, hostpageoff, pageoff ); 71 | 72 | pcie->userWriteWord(m_host_mem_arg, hostpageoff); // host mem page 73 | pcie->userWriteWord(m_fpga_mem_arg, pageoff);// fpga mem page 74 | pcie->userWriteWord(m_to_fpga_cmd, pages); 75 | 76 | 77 | uint32_t writecnt = pcie->userReadWord(m_fpga_write_stat_off); 78 | //printf( "Waiting for %d to reach %ld\n", writecnt, m_write_done_total + i ); 79 | while ( writecnt < m_write_done_total + i ) { 80 | writecnt = pcie->userReadWord(m_fpga_write_stat_off); 81 | } 82 | //printf( "Write done!\n" ); 83 | 84 | host_offset += curbyte; 85 | offset += curbyte; 86 | bytes -= curbyte; 87 | src_bytes -= curbyte; 88 | } 89 | 90 | if ( bytes != 0 ) { 91 | fprintf( stderr, "DRAMHostDMA CopyToFPGA bytes remaining after write! %ld %s:%d\n", bytes, __FILE__, __LINE__ ); 92 | } 93 | 94 | uint32_t writecnt = pcie->userReadWord(m_fpga_write_stat_off); 95 | //printf( "Waiting for %d to reach %ld\n", writecnt, m_write_done_total +writes_cnt ); 96 | while ( writecnt < m_write_done_total + writes_cnt ) { 97 | writecnt = pcie->userReadWord(m_fpga_write_stat_off); 98 | } 99 | m_write_done_total = writecnt; 100 | //printf( "Write done!\n" ); 101 | 102 | 103 | m_mutex.unlock(); 104 | return true; 105 | } 106 | 107 | bool 108 | DRAMHostDMA::CopyFromFPGA(size_t offset, void* buffer, size_t bytes) { 109 | m_mutex.lock(); 110 | BdbmPcie* pcie = BdbmPcie::getInstance(); 111 | //m_read_done_total = pcie->userReadWord(m_fpga_read_stat_off); 112 | 113 | size_t dst_bytes = bytes; 114 | size_t offset_frag = offset % m_fpga_alignment; 115 | if ( offset_frag != 0 ) { 116 | offset = (offset/m_fpga_alignment)*m_fpga_alignment; 117 | //io is simply shifted to align 118 | //bytes += offset_frag; 119 | } 120 | size_t bytes_frag = bytes % m_fpga_alignment; 121 | if ( bytes_frag != 0 ) { 122 | bytes = (bytes/m_fpga_alignment+1)*m_fpga_alignment; 123 | } 124 | size_t reads_cnt = ( bytes/(m_max_dma_bytes/2) ); //DOUBLE BUFFERING! 125 | if ( bytes%(m_max_dma_bytes/2) != 0 ) reads_cnt ++; 126 | 127 | size_t host_offset = 0; 128 | uint8_t* dmabuf8 = (uint8_t*)pcie->dmaBuffer(); 129 | size_t curbyte = 0; 130 | size_t lastbyte = 0; 131 | //printf( "Starting write with %ld chunks\n", reads_cnt ); fflush(stdout); 132 | for ( size_t i = 0; i < reads_cnt; i++ ) { 133 | curbyte = m_max_dma_bytes/2; 134 | if ( m_max_dma_bytes/2 > bytes ) curbyte = bytes; 135 | 136 | 137 | size_t hostbufoff = 0; 138 | if ( i%2 != 0 ) hostbufoff = m_max_dma_bytes/2; 139 | size_t hostpageoff = hostbufoff/m_fpga_alignment; 140 | size_t pages = curbyte/m_fpga_alignment; 141 | size_t pageoff = offset/m_fpga_alignment; 142 | 143 | //printf( "%lx,%lx,%lx\n", hostpageoff, pageoff, pages ); 144 | pcie->userWriteWord(m_host_mem_arg, hostpageoff); 145 | pcie->userWriteWord(m_fpga_mem_arg, pageoff); 146 | pcie->userWriteWord(m_to_host_cmd, pages); 147 | 148 | uint32_t readcnt = pcie->userReadWord(m_fpga_read_stat_off); 149 | //printf( "Waiting for %d to be %ld\n", readcnt, m_read_done_total + i ); 150 | while ( readcnt < m_read_done_total + i ) { 151 | readcnt = pcie->userReadWord(m_fpga_read_stat_off); 152 | } 153 | 154 | if ( i > 0 ) { 155 | size_t bufoff = 0; 156 | if ( i%2 == 0 ) bufoff = m_max_dma_bytes/2; 157 | memcpy(((uint8_t*)buffer)+host_offset, dmabuf8 + bufoff, lastbyte); 158 | host_offset += lastbyte; 159 | dst_bytes -= lastbyte; 160 | } 161 | lastbyte = curbyte; 162 | 163 | offset += curbyte; 164 | bytes -= curbyte; 165 | } 166 | if ( bytes != 0 ) { 167 | fprintf( stderr, "DRAMHostDMA CopyToFPGA bytes remaining after read! %ld %s:%d\n", bytes, __FILE__, __LINE__ ); 168 | } 169 | 170 | uint32_t readcnt = pcie->userReadWord(m_fpga_read_stat_off); 171 | while ( readcnt < m_read_done_total + reads_cnt ) { 172 | readcnt = pcie->userReadWord(m_fpga_read_stat_off); 173 | } 174 | m_read_done_total = readcnt; 175 | 176 | size_t bufoff = 0; 177 | if ( reads_cnt%2 == 0 ) bufoff = m_max_dma_bytes/2; 178 | if ( dst_bytes >= m_max_dma_bytes/2 ) { 179 | memcpy(((uint8_t*)buffer)+host_offset, dmabuf8+bufoff, m_max_dma_bytes/2); 180 | } else { 181 | memcpy(((uint8_t*)buffer)+host_offset, dmabuf8+bufoff, dst_bytes); 182 | memset(((uint8_t*)buffer)+host_offset+dst_bytes, 0xff, m_max_dma_bytes/2-dst_bytes); 183 | } 184 | 185 | m_mutex.unlock(); 186 | return true; 187 | } 188 | 189 | -------------------------------------------------------------------------------- /cpp/DRAMHostDMA.h: -------------------------------------------------------------------------------- 1 | #ifndef __DRAMHOSTDMA_H__ 2 | #define __DRAMHOSTDMA_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "bdbmpcie.h" 11 | 12 | #include 13 | #include 14 | 15 | 16 | class DRAMHostDMA { 17 | public: 18 | static DRAMHostDMA* GetInstance(); 19 | 20 | // offset and bytes will be force-aligned to whatever alignment the FPGA hardware requires 21 | // So it's best to always use it in an aligned fashion 22 | // (Right now it's 4 KB for everything) 23 | bool CopyToFPGA(size_t offset, void* buffer, size_t bytes); 24 | bool CopyFromFPGA(size_t offset, void* buffer, size_t bytes); 25 | 26 | private: 27 | DRAMHostDMA(); 28 | static DRAMHostDMA* m_pInstance; 29 | std::mutex m_mutex; 30 | 31 | 32 | void ProcDoneCnt(); 33 | uint32_t m_read_done_cnt; 34 | uint32_t m_write_done_cnt; 35 | uint32_t m_write_done_total; 36 | uint32_t m_read_done_total; 37 | 38 | 39 | private: // constants 40 | static const uint32_t m_host_mem_arg = 256*4; 41 | static const uint32_t m_fpga_mem_arg = 257*4; 42 | static const uint32_t m_fpga_write_stat_off = 256*4; 43 | static const uint32_t m_fpga_read_stat_off = 257*4; 44 | static const uint32_t m_to_fpga_cmd = 258*4; 45 | static const uint32_t m_to_host_cmd = 259*4; 46 | 47 | // m_max_dma_bytes MUST be multiples of m_fpga_alignment 48 | static const uint32_t m_fpga_alignment = (4*1024); 49 | static const uint32_t m_max_dma_bytes = (1024*1024); 50 | }; 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /cpp/PcieBdpi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "ShmFifo.h" 14 | 15 | #define DMA_BUFFER_SIZE (1024*1024*4) 16 | #define SHM_SIZE (1024*8*3 + DMA_BUFFER_SIZE) 17 | 18 | ShmFifo* infifo = NULL; 19 | ShmFifo* outfifo = NULL; 20 | ShmFifo* interruptfifo = NULL; 21 | void* shm_ptr = NULL; 22 | bool shm_ready = false; 23 | bool shmReady() { 24 | if ( shm_ready == true ) { 25 | return true; 26 | } 27 | 28 | pid_t mypid = getpid(); 29 | char shmname[64]; 30 | sprintf(shmname, "/bdbm%d", mypid); 31 | 32 | int shm_fd = shm_open(shmname, O_CREAT | O_RDWR, 0666); 33 | printf( "hardware shm_open %s returned %d with errno %d\n", shmname, shm_fd, errno); 34 | int ret = ftruncate(shm_fd, SHM_SIZE); 35 | shm_ptr = mmap(0,SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); 36 | if ( shm_ptr == MAP_FAILED || shm_ptr == NULL ) 37 | { 38 | shm_ptr = NULL; 39 | shm_ready = false; 40 | return false; 41 | } 42 | shm_ready = true; 43 | //printf( "shmReady Called %x\n", (unsigned int)shm_ptr ); fflush(stdout); 44 | 45 | uint64_t* shm_uptr = (uint64_t*)shm_ptr; 46 | outfifo = new ShmFifo(shm_uptr+(DMA_BUFFER_SIZE/sizeof(uint64_t)), 1024); 47 | infifo = new ShmFifo(shm_uptr+(DMA_BUFFER_SIZE/sizeof(uint64_t))+1024, 1024); 48 | interruptfifo = new ShmFifo(shm_uptr+(DMA_BUFFER_SIZE/sizeof(uint64_t))+(1024*2), 1024); 49 | 50 | return true; 51 | } 52 | 53 | extern "C" bool bdpiDmaWriteData(unsigned int addr, uint64_t data1, uint64_t data2) { 54 | //TODO..... 55 | if ( !shmReady() ) return false; 56 | 57 | uint64_t *llptr = (uint64_t*)shm_ptr; 58 | 59 | llptr[addr>>3] = data1; 60 | llptr[(addr>>3)+1] = data2; 61 | 62 | return true; 63 | } 64 | 65 | unsigned int dmaReadStartAddr; 66 | unsigned int dmaReadWordsRemain; 67 | unsigned int dmaReadWordsOffset; 68 | extern "C" bool bdpiDmaReadReq(unsigned int addr, int words) { 69 | if ( !shmReady() ) return false; 70 | 71 | dmaReadStartAddr = addr; 72 | dmaReadWordsRemain = words*4; // 128 bit words, instead of 32 73 | dmaReadWordsOffset = 0; 74 | return true; 75 | } 76 | 77 | extern "C" bool bdpiDmaReadReady() { 78 | if ( !shmReady() ) return false; 79 | 80 | if ( dmaReadWordsRemain > 0 ) return true; 81 | 82 | return false; 83 | } 84 | 85 | extern "C" uint32_t bdpiDmaReadData() { 86 | uint32_t *lptr = (uint32_t *)shm_ptr; 87 | //printf( "%d %d %x\n", dmaReadStartAddr>>2, dmaReadWordsOffset, ); 88 | uint32_t r = lptr[(dmaReadStartAddr>>2)+dmaReadWordsOffset]; 89 | dmaReadWordsRemain --; 90 | dmaReadWordsOffset++; 91 | return r; 92 | } 93 | 94 | extern "C" bool bdpiIOReady() { 95 | if ( !shmReady() ) return false; 96 | if ( infifo->empty() ) return false; 97 | 98 | printf( "bdpiIOReady returning true!\n" ); 99 | return true; 100 | } 101 | 102 | extern "C" uint64_t bdpiIOData() { 103 | if ( !shmReady() ) { 104 | uint64_t r = 1; 105 | r <<= (32+31); 106 | return r; 107 | } 108 | if ( infifo->empty() ) { 109 | //fprintf(stderr, "bdpiIOData called while infifo is empty!\n" ); 110 | uint64_t r = 1; 111 | r <<= (32+31); 112 | return r; 113 | } 114 | 115 | uint64_t d = infifo->tail(); 116 | infifo->pop(); 117 | 118 | //printf("returning data %lx\n", d ); 119 | return d; 120 | } 121 | 122 | extern "C" bool bdpiIOReadRespReady() { 123 | if ( !shmReady() ) return false; 124 | if ( outfifo->full() ) return false; 125 | 126 | return true; 127 | } 128 | 129 | extern "C" bool bdpiIOReadResp(uint64_t dat) { 130 | if ( !shmReady() ) return false; 131 | if ( outfifo->full() ) return false; //THIS SHOULD NOT HAPPEN 132 | 133 | outfifo->push(dat); 134 | 135 | return true; 136 | } 137 | 138 | extern "C" bool bdpiInterruptReady() { 139 | if ( !shmReady() ) return false; 140 | if ( interruptfifo->full() ) return false; 141 | 142 | return true; 143 | } 144 | extern "C" void bdpiAssertInterrupt() { 145 | if ( !shmReady() ) return;//THIS SHOULD NOT HAPPEN 146 | if ( interruptfifo->full() ) return; //THIS SHOULD NOT HAPPEN 147 | 148 | interruptfifo->push(0); 149 | } 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /cpp/ShmFifo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "ShmFifo.h" 3 | 4 | /* 5 | thread safety is not enforced, because it's going to be used for shmem 6 | MAKE SURE THERE IS ONLY ONE SOURCE AND ONE DRAIN! 7 | */ 8 | 9 | ShmFifo::ShmFifo(uint64_t* mem_, int size) { 10 | this->mem = mem_+3; 11 | this->size = (uint64_t)size-3; 12 | 13 | headidx = &mem_[0]; 14 | tailidx = &mem_[1]; 15 | 16 | // Check magic number so that only one host inits 17 | if ( mem_[2] != 0xc001d00d ) { 18 | *headidx = 0; 19 | *tailidx = 0; 20 | printf( "Initializing shared memory fifo structures\n" ); 21 | mem_[2] = 0xc001d00d; 22 | } 23 | } 24 | 25 | void 26 | ShmFifo::pop() { 27 | if ( *tailidx == *headidx ) return; 28 | 29 | //printf( "Popped data from %ld\n", *tailidx ); 30 | 31 | (*tailidx)++; 32 | if (*tailidx >= this->size ) *tailidx = 0; 33 | 34 | 35 | return; 36 | } 37 | 38 | void 39 | ShmFifo::push(uint64_t v) { 40 | uint64_t nexthead = *headidx + 1; 41 | if ( nexthead >= this->size ) nexthead = 0; 42 | if ( nexthead == *tailidx ) return; 43 | 44 | mem[*headidx] = v; 45 | (*headidx) = nexthead; 46 | 47 | //printf( "Pushed %lx to idx %ld\n", v, *headidx-1 ); 48 | //fflush(stdout); 49 | 50 | return; 51 | } 52 | 53 | uint64_t 54 | ShmFifo::tail() { 55 | return mem[*tailidx]; 56 | } 57 | 58 | bool 59 | ShmFifo::empty() { 60 | if ( *tailidx == *headidx ) return true; 61 | return false; 62 | } 63 | 64 | bool 65 | ShmFifo::full() { 66 | uint64_t nexthead = *headidx + 1; 67 | if ( nexthead >= this->size ) nexthead = 0; 68 | if ( nexthead == *tailidx ) return true; 69 | 70 | return false; 71 | } 72 | 73 | 74 | /* 75 | void shmfifo_init(unsigned int* mem, int size); 76 | bool shmfifo_push(int v); 77 | unsigned int shmfifo_tail(); 78 | bool shmfifo_pop(); 79 | bool shmfifo_empty(); 80 | bool shmfifo_full(); 81 | 82 | unsigned int* shmfifo_mem = NULL; 83 | int shmfifo_size = 0; 84 | int shmfifo_headidx = 0; 85 | int shmfifo_tailidx = 0; 86 | 87 | void shmfifo_init(unsigned int* mem, int size) { 88 | shmfifo_mem = mem; 89 | if ( size > 0 ) shmfifo_size = size; 90 | } 91 | 92 | bool shmfifo_push(int v) { 93 | int nexthead = shmfifo_headidx + 1; 94 | if ( nexthead >= shmfifo_size ) nexthead = 0; 95 | if ( nexthead == shmfifo_tailidx ) return false; 96 | 97 | shmfifo_mem[shmfifo_headidx] = v; 98 | shmfifo_headidx++; 99 | 100 | return true; 101 | } 102 | 103 | unsigned int shmfifo_tail() { 104 | return shmfifo_mem[shmfifo_tailidx]; 105 | } 106 | 107 | bool shmfifo_pop() { 108 | if ( shmfifo_tailidx == shmfifo_headidx ) return false; 109 | 110 | shmfifo_tailidx++; 111 | if (shmfifo_tailidx >= shmfifo_size ) shmfifo_tailidx = 0; 112 | 113 | return true; 114 | } 115 | 116 | bool shmfifo_empty() { 117 | if ( shmfifo_tailidx == shmfifo_headidx ) return true; 118 | return false; 119 | } 120 | 121 | bool shmfifo_full() { 122 | int nexthead = shmfifo_headidx + 1; 123 | if ( nexthead >= shmfifo_size ) nexthead = 0; 124 | if ( nexthead == shmfifo_tailidx ) return true; 125 | 126 | return false; 127 | } 128 | */ 129 | -------------------------------------------------------------------------------- /cpp/ShmFifo.h: -------------------------------------------------------------------------------- 1 | #ifndef __SHM_FIFO__H__ 2 | #define __SHM_FIFO__H__ 3 | 4 | #include 5 | 6 | class ShmFifo{ 7 | public: 8 | ShmFifo(uint64_t* mem, int size); 9 | void pop(); 10 | void push(uint64_t v); 11 | 12 | uint64_t tail(); 13 | bool empty(); 14 | bool full(); 15 | 16 | 17 | private: 18 | uint64_t* mem; 19 | uint64_t size; 20 | 21 | uint64_t* headidx; 22 | uint64_t* tailidx; 23 | }; 24 | 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /cpp/bdbmpcie.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "ShmFifo.h" 7 | 8 | #ifndef __BDBM_PCIE__H__ 9 | #define __BDBM_PCIE__H__ 10 | 11 | #define DMA_BUFFER_SIZE (1024*1024*4) 12 | #define BAR0_SIZE (1024*1024) 13 | //For BSIM 14 | #define SHM_SIZE ((1024*8*3) + DMA_BUFFER_SIZE) 15 | #define IO_QUEUE_SIZE 512 16 | #define CONFIG_BUFFER_SIZE (1024*16) 17 | #define CONFIG_BUFFER_ISIZE (CONFIG_BUFFER_SIZE/4) 18 | 19 | void* bdbmPollThread(void* arg); 20 | 21 | class BdbmPcie { 22 | public: 23 | static BdbmPcie* getInstance(); 24 | 25 | void writeWord(unsigned int addr, unsigned int data); 26 | uint32_t readWord(unsigned int addr); 27 | 28 | void userWriteWord(unsigned int addr, unsigned int data); 29 | uint32_t userReadWord(unsigned int addr); 30 | 31 | void waitInterrupt(int timeout); 32 | void waitInterrupt(); 33 | void* dmaBuffer(); 34 | 35 | void Ioctl(unsigned int cmd, unsigned long arg); 36 | 37 | private: 38 | BdbmPcie(); 39 | void Init_Bluesim(); 40 | void Init_Pcie(); 41 | 42 | BdbmPcie(BdbmPcie const&) = delete; 43 | BdbmPcie& operator=(BdbmPcie const&) = delete; 44 | 45 | bool bsim; 46 | 47 | 48 | pthread_t pollThread; 49 | 50 | static BdbmPcie* m_pInstance; 51 | 52 | //#ifdef BLUESIM 53 | void* shm_ptr; 54 | 55 | uint32_t io_wreq; 56 | uint32_t io_rreq; 57 | uint32_t io_wbudget; 58 | uint32_t io_rbudget; 59 | 60 | ShmFifo* infifo; 61 | ShmFifo* outfifo; 62 | ShmFifo* interruptfifo; 63 | //#else 64 | void* mmap_dma; 65 | void* mmap_io; 66 | int reg_fd; 67 | //#endif 68 | 69 | pthread_mutex_t write_lock; 70 | pthread_mutex_t read_lock; 71 | 72 | //pthread_cond_t pcie_cond; 73 | }; 74 | 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /cpp/dmacircularqueue.cpp: -------------------------------------------------------------------------------- 1 | #include "dmacircularqueue.h" 2 | 3 | DMACircularQueue* 4 | DMACircularQueue::m_pInstance = NULL; 5 | 6 | DMACircularQueue* 7 | DMACircularQueue::getInstance() { 8 | if ( m_pInstance == NULL ) { 9 | m_pInstance = new DMACircularQueue(); 10 | } 11 | return m_pInstance; 12 | } 13 | 14 | DMACircularQueue::DMACircularQueue() { 15 | BdbmPcie* pcie = BdbmPcie::getInstance(); 16 | void* dmabuf = pcie->dmaBuffer(); 17 | uint32_t* ubuf = (uint32_t*)dmabuf; 18 | readBytes = 0; 19 | pcie->userWriteWord(16*4, 0); //start 20 | } 21 | void 22 | DMACircularQueue::deq(uint32_t bytes) { 23 | readBytes += bytes; 24 | BdbmPcie* pcie = BdbmPcie::getInstance(); 25 | pcie->userWriteWord(17*4, readBytes); 26 | } 27 | 28 | void* 29 | DMACircularQueue::dmaBuffer() { 30 | BdbmPcie* pcie = BdbmPcie::getInstance(); 31 | return pcie->dmaBuffer(); 32 | } 33 | -------------------------------------------------------------------------------- /cpp/dmacircularqueue.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include "bdbmpcie.h" 11 | 12 | #define IO_USER_OFFSET 4096 13 | 14 | #ifndef __CIRCULAR_QUEUE__H__ 15 | #define __CIRCULAR_QUEUE__H__ 16 | 17 | class DMACircularQueue { 18 | public: 19 | static DMACircularQueue* getInstance(); 20 | void* dmaBuffer(); 21 | void deq(uint32_t bytes); 22 | private: 23 | uint32_t readBytes; 24 | 25 | 26 | static DMACircularQueue* m_pInstance; 27 | DMACircularQueue(); 28 | DMACircularQueue(DMACircularQueue const&){}; 29 | DMACircularQueue& operator=(DMACircularQueue const&){}; 30 | }; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /cpp/dmasplitter.cpp: -------------------------------------------------------------------------------- 1 | #include "dmasplitter.h" 2 | 3 | DMASplitter* 4 | DMASplitter::m_pInstance = NULL; 5 | 6 | DMASplitter* 7 | DMASplitter::getInstance() { 8 | if (m_pInstance == NULL) { 9 | printf( "Initializing DMASplitter\n" ); fflush(stdout); 10 | m_pInstance = new DMASplitter(); 11 | } 12 | 13 | return m_pInstance; 14 | } 15 | 16 | DMASplitter::DMASplitter() { 17 | BdbmPcie* pcie = BdbmPcie::getInstance(); 18 | void* dmabuf = pcie->dmaBuffer(); 19 | uint32_t* ubuf = (uint32_t*)dmabuf; 20 | 21 | bool found = false; 22 | for ( int i = 0; i < (1024*4/sizeof(uint32_t)); i++ ) { 23 | ubuf[i] = 0xffffffff; 24 | } 25 | 26 | //nextrecvoff = 0; 27 | nextrecvidx = 0; 28 | nextrecvoff = 0; 29 | 30 | pthread_mutex_init(&recv_lock, NULL); 31 | pthread_cond_init(&recv_cond, NULL); 32 | 33 | //init enqReceiveIdx 34 | pcie->writeWord((IO_USER_OFFSET+16)*4, 0); 35 | //init enqIdx 36 | pcie->writeWord((IO_USER_OFFSET+17)*4, 0); 37 | 38 | //pthread_create(&pollThread, NULL, dmaSplitterThread, NULL); 39 | } 40 | 41 | 42 | void 43 | DMASplitter::sendWord(PCIeWord word) { 44 | BdbmPcie* pcie = BdbmPcie::getInstance(); 45 | 46 | 47 | pcie->writeWord((IO_USER_OFFSET+4)*4, word.header); 48 | for ( int i = 3; i >= 0; i-- ) { 49 | pcie->writeWord((IO_USER_OFFSET+i)*4, word.d[i]); 50 | } 51 | } 52 | 53 | void 54 | DMASplitter::sendWord(uint32_t header, uint32_t d1, uint32_t d2, uint32_t d3, uint32_t d4) { 55 | BdbmPcie* pcie = BdbmPcie::getInstance(); 56 | 57 | pcie->writeWord((IO_USER_OFFSET+4)*4, header); 58 | pcie->writeWord((IO_USER_OFFSET+3)*4, d4); 59 | pcie->writeWord((IO_USER_OFFSET+2)*4, d3); 60 | pcie->writeWord((IO_USER_OFFSET+1)*4, d2); 61 | pcie->writeWord((IO_USER_OFFSET+0)*4, d1); 62 | } 63 | 64 | void 65 | DMASplitter::sendWord(uint32_t header, uint32_t d1, uint32_t d2) { 66 | BdbmPcie* pcie = BdbmPcie::getInstance(); 67 | 68 | pcie->writeWord((IO_USER_OFFSET+4)*4, header); 69 | pcie->writeWord((IO_USER_OFFSET+1)*4, d2); 70 | pcie->writeWord((IO_USER_OFFSET+0)*4, d1); 71 | } 72 | 73 | 74 | int 75 | DMASplitter::scanReceive() { 76 | BdbmPcie* pcie = BdbmPcie::getInstance(); 77 | void* dmabuf = pcie->dmaBuffer(); 78 | uint32_t* ubuf = (uint32_t*)dmabuf; 79 | 80 | int recvd = 0; 81 | bool found = false; 82 | for ( int i = 0; i < (1024*4/32); i++ ) { 83 | uint32_t u32off = ((i+nextrecvoff)%(1024*4/32))*4*2; 84 | 85 | 86 | uint32_t nidx = ubuf[u32off+5]; 87 | if ( nidx == nextrecvidx ) { 88 | PCIeWord w; 89 | w.d[0] = ubuf[u32off]; 90 | w.d[1] = ubuf[u32off+1]; 91 | w.d[2] = ubuf[u32off+2]; 92 | w.d[3] = ubuf[u32off+3]; 93 | w.header = ubuf[u32off+4]; 94 | 95 | pthread_mutex_lock(&recv_lock); 96 | recvList.push_front(w); 97 | pthread_cond_broadcast(&recv_cond); 98 | pthread_mutex_unlock(&recv_lock); 99 | 100 | nextrecvidx++; 101 | recvd++; 102 | found = true; 103 | } else if ( found ) { 104 | break; 105 | } 106 | } 107 | 108 | nextrecvoff = nextrecvoff+recvd; 109 | //enqReceiveIdx 110 | if ( recvd > 0 ) { 111 | pcie->writeWord((IO_USER_OFFSET+16)*4, nextrecvidx); 112 | } 113 | return recvd; 114 | } 115 | 116 | PCIeWord 117 | DMASplitter::recvWord() { 118 | BdbmPcie* pcie = BdbmPcie::getInstance(); 119 | DMASplitter* dma = DMASplitter::getInstance(); 120 | 121 | while ( recvList.empty() ) { 122 | pcie->waitInterrupt(0); 123 | dma->scanReceive(); 124 | } 125 | /* 126 | pthread_mutex_lock(&recv_lock); 127 | while ( recvList.empty() ) { 128 | pthread_cond_wait(&recv_cond, &recv_lock); 129 | } 130 | */ 131 | PCIeWord w = recvList.back(); 132 | recvList.pop_back(); 133 | //pthread_mutex_unlock(&recv_lock); 134 | 135 | return w; 136 | } 137 | 138 | void* 139 | DMASplitter::dmaBuffer() { 140 | BdbmPcie* pcie = BdbmPcie::getInstance(); 141 | void* dmabuf = pcie->dmaBuffer(); 142 | uint8_t* bbuf = (uint8_t*)dmabuf; 143 | 144 | //+1024*4 because of hw->sw queue 145 | return (void*)(bbuf+(1024*4)); 146 | } 147 | 148 | void* dmaSplitterThread(void* arg) { 149 | BdbmPcie* pcie = BdbmPcie::getInstance(); 150 | DMASplitter* dma = DMASplitter::getInstance(); 151 | 152 | while (1) { 153 | //pcie->waitInterrupt(0); 154 | dma->scanReceive(); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /cpp/dmasplitter.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include "bdbmpcie.h" 11 | 12 | #define IO_USER_OFFSET 4096 13 | 14 | #ifndef __DMA_SPLITTER__H__ 15 | #define __DMA_SPLITTER__H__ 16 | 17 | void* dmaSplitterThread(void* arg); 18 | 19 | typedef struct PCIeWord { 20 | uint32_t d[4]; 21 | uint32_t header; 22 | } PCIeWord; 23 | 24 | class DMASplitter { 25 | public: 26 | static DMASplitter* getInstance(); 27 | 28 | //sends 16 bytes (128 bits) 29 | void sendWord(uint32_t header, uint32_t d1, uint32_t d2, uint32_t d3, uint32_t d4); 30 | void sendWord(uint32_t header, uint32_t d1, uint32_t d2); 31 | void sendWord(PCIeWord word); 32 | PCIeWord recvWord(); 33 | 34 | int scanReceive(); 35 | 36 | void* dmaBuffer(); 37 | 38 | private: 39 | static DMASplitter* m_pInstance; 40 | DMASplitter(); 41 | DMASplitter(DMASplitter const&){}; 42 | DMASplitter& operator=(DMASplitter const&){}; 43 | 44 | //int nextrecvoff; 45 | int nextrecvidx; 46 | uint32_t nextrecvoff; 47 | std::list recvList; 48 | pthread_mutex_t recv_lock; 49 | pthread_cond_t recv_cond; 50 | 51 | pthread_t pollThread; 52 | }; 53 | 54 | #endif 55 | 56 | -------------------------------------------------------------------------------- /distribute/bsrescan/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | mkdir -p obj 3 | g++ -o obj/main main.cpp -pedantic -Wall 4 | install: all 5 | sudo chown root:root obj/main 6 | sudo mkdir -p /opt/bluespecpcie_manager 7 | sudo cp obj/main /opt/bluespecpcie_manager/bsrescan 8 | sudo chmod 4755 /opt/bluespecpcie_manager/bsrescan 9 | -------------------------------------------------------------------------------- /distribute/bsrescan/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #define XILINX_DEVICE 0x7028 18 | #define XILINX_VENDOR 0x10ee 19 | #define XILINX_SUBSYSTEM 0x7 20 | 21 | /* Remember the effective and real UIDs. */ 22 | static uid_t euid, ruid; 23 | 24 | 25 | void 26 | do_setuid (void) 27 | { 28 | int status; 29 | 30 | #ifdef _POSIX_SAVED_IDS 31 | status = seteuid (euid); 32 | #else 33 | status = setreuid (ruid, euid); 34 | #endif 35 | if (status < 0) { 36 | fprintf (stderr, "Couldn't set uid.\n"); 37 | exit (status); 38 | } 39 | } 40 | 41 | void 42 | undo_setuid (void) 43 | { 44 | int status; 45 | 46 | #ifdef _POSIX_SAVED_IDS 47 | status = seteuid (ruid); 48 | #else 49 | status = setreuid (euid, ruid); 50 | #endif 51 | if (status < 0) { 52 | fprintf (stderr, "Couldn't set uid.\n"); 53 | exit (status); 54 | } 55 | } 56 | 57 | int read_pci_file_hex(char* dname, char* fname) { 58 | char path[128]; 59 | char buf[128]; 60 | sprintf( path, "/sys/bus/pci/devices/%s/%s", dname, fname ); 61 | FILE* fdev = fopen(path, "r"); 62 | if ( !fdev ) return 0; 63 | 64 | fgets(buf, 128, fdev); 65 | int c = strtol(buf, NULL, 16); 66 | return c; 67 | } 68 | 69 | void 70 | unload_driver() { 71 | int ret = syscall(__NR_delete_module, "bdbmpcie", 0); 72 | if ( ret != 0 ) { 73 | printf( "delete_module returned %d\n", ret ); 74 | } else { 75 | printf( "unloaded driver\n" ); 76 | } 77 | } 78 | 79 | bool 80 | rescan_pcie_device(char* dname) { 81 | /* 82 | echo 1 > /sys/bus/pci//remove 83 | echo 1 > /sys/bus/pci/rescan 84 | 85 | // it sets Command register (offset 4) to 7 (memory/IO/bus master enable bit) 86 | // https://forums.xilinx.com/t5/PCI-Express/Is-it-possible-to-do-enumeration-without-restart-the-PC/td-p/740999 87 | setpci -s 04.w=7 88 | */ 89 | 90 | char dpath[128]; 91 | sprintf( dpath, "/sys/bus/pci/devices/%s/remove", dname ); 92 | FILE* fdev = fopen(dpath, "w"); 93 | if ( !fdev ) { 94 | fprintf(stderr, "error: failed to open sys file to remove\n" ); 95 | return false; 96 | } 97 | fprintf(fdev, "1\n"); 98 | fclose(fdev); 99 | 100 | FILE* fscan = fopen("/sys/bus/pci/rescan", "w"); 101 | if ( !fscan ) { 102 | fprintf(stderr, "error: failed to open sys file to rescan\n" ); 103 | return false; 104 | } 105 | fprintf(fdev, "1\n"); 106 | fclose(fdev); 107 | 108 | char cpath[128]; 109 | sprintf( cpath, "/sys/bus/pci/devices/%s/config", dname ); 110 | FILE* fconf = fopen(cpath, "wb"); 111 | if ( !fconf ) { 112 | fprintf(stderr, "error: failed to open sys file to config\n" ); 113 | return false; 114 | } 115 | fseek(fconf, 4, SEEK_SET); 116 | uint16_t cmd = 7; 117 | size_t ret = fwrite(&cmd, sizeof(uint16_t), 1, fconf); 118 | 119 | if ( ret != 1 ) { 120 | fprintf( stderr, "error: fwrite to config file returned %ld\n", ret ); 121 | return false; 122 | } 123 | 124 | return true; 125 | } 126 | 127 | bool is_pcie_device(char* dname) { 128 | int device = read_pci_file_hex(dname, (char*)"device"); 129 | int vendor = read_pci_file_hex(dname, (char*)"vendor"); 130 | int subsystem_device = read_pci_file_hex(dname, (char*)"subsystem_device"); 131 | 132 | if ( device == XILINX_DEVICE && vendor == XILINX_VENDOR && subsystem_device == XILINX_SUBSYSTEM ) { 133 | printf ( "%x %x %x\n", vendor, device, subsystem_device ); 134 | return true; 135 | } 136 | return false; 137 | } 138 | 139 | bool find_pcie_device(char** id) { 140 | DIR *dp; 141 | struct dirent *dirp; 142 | dp = opendir("/sys/bus/pci/devices"); 143 | while ( (dirp = readdir(dp)) ) { 144 | bool is = is_pcie_device(dirp->d_name); 145 | if ( is ) { 146 | *id = (char*)malloc(sizeof(char) * strlen(dirp->d_name)+1); 147 | strncpy(*id, dirp->d_name, strlen(dirp->d_name)); 148 | return true; 149 | } 150 | } 151 | return false; 152 | } 153 | 154 | 155 | int 156 | main (int argc, char** argv) 157 | { 158 | ruid = getuid (); 159 | euid = geteuid (); 160 | undo_setuid (); 161 | 162 | printf( "BluespecPCIe rescan tool\n" ); fflush(stdout); 163 | 164 | /* 165 | Functions: 166 | */ 167 | 168 | 169 | char* loc; 170 | bool found = find_pcie_device(&loc); 171 | if ( !found ) { 172 | printf( "ERROR: BluespecPCIe device not found!\n" ); 173 | exit(1); 174 | } 175 | 176 | printf( "BluespecPCIe device found!\n" ); 177 | 178 | do_setuid(); 179 | unload_driver(); 180 | sleep(1); 181 | bool ret = rescan_pcie_device(loc); 182 | undo_setuid(); 183 | if ( ret ) { 184 | printf( "Rescan successful!\n" ); 185 | } else { 186 | printf( "Rescan failed...\n" ); 187 | } 188 | 189 | } 190 | -------------------------------------------------------------------------------- /distribute/driver/99-bscpcie.rules: -------------------------------------------------------------------------------- 1 | ACTION=="add",SUBSYSTEM=="pci",ATTR{vendor}=="0x10ee", ATTR{device}="0x7028", RUN+="/sbin/modprobe bdbmpcie" 2 | KERNEL=="bdbm_regs0",MODE="666" 3 | -------------------------------------------------------------------------------- /distribute/driver/Makefile: -------------------------------------------------------------------------------- 1 | # If KERNELRELEASE is defined, we've been invoked from the 2 | # kernel build system and can use its language. 3 | # Otherwise we were called directly from the command 4 | # line; invoke the kernel build system. 5 | 6 | ifneq ($(KERNELRELEASE),) 7 | obj-m := bdbmpcie.o 8 | else 9 | KERNELDIR ?= /lib/modules/$(shell uname -r)/build 10 | PWD := $(shell pwd) 11 | 12 | default: 13 | $(MAKE) -C $(KERNELDIR) M=$(PWD) modules 14 | clean: 15 | $(MAKE) -C $(KERNELDIR) M=$(PWD) clean 16 | endif 17 | 18 | install: 19 | sudo cp ./bdbmpcie.ko /lib/modules/`uname -r`/ 20 | sudo depmod -a 21 | sudo cp 99-bscpcie.rules /etc/udev/rules.d/ 22 | 23 | #insmod: 24 | # cp pcieconfig /sys/bus/pci/devices/0000:03:00.0/config 25 | # insmod ./bdbmpcie.ko 26 | # chmod agu+rw /dev/bdbm_regs0 27 | # 28 | #rmmod: 29 | # rmmod bdbmpcie 30 | # 31 | #configbackup: 32 | # cp pcieconfig _pcieconfig 33 | # cp /sys/bus/pci/devices/0000:03:00.0/config pcieconfig 34 | -------------------------------------------------------------------------------- /distribute/driver/test/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | g++ -o test test.cpp -lrt -g -lpthread 3 | -------------------------------------------------------------------------------- /distribute/driver/test/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | #define MAGIC 0xc001d001 14 | 15 | double timespec_diff_sec( timespec start, timespec end ) { 16 | double t = end.tv_sec - start.tv_sec; 17 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 18 | return t; 19 | } 20 | 21 | int fd; 22 | void* pollthread(void *arg) { 23 | struct pollfd pfd; 24 | pfd.fd = fd; 25 | pfd.events = POLLIN; 26 | while (1) { 27 | printf( "Waiting for poll\n" ); 28 | poll(&pfd, 1, -1); 29 | printf( "Poll returned!\n" ); 30 | } 31 | } 32 | 33 | int main() { 34 | fd = open("/dev/bdbm_regs0", O_RDWR, 0); 35 | void* mmd = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 36 | void* mmdbuf = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 1024*1024); 37 | unsigned int* ummd = (unsigned int*)mmd; 38 | unsigned int* ummdb = (unsigned int*)mmdbuf; 39 | timespec start,end; 40 | 41 | unsigned int magic = ummd[0]; 42 | printf( "Magic number: %x ?= %x\n", magic, MAGIC ); 43 | 44 | unsigned int ioctl_alloc_dma = ummd[1]; 45 | 46 | pthread_t pollthreadval; 47 | pthread_create(&pollthreadval, NULL, pollthread, NULL); 48 | 49 | clock_gettime(CLOCK_REALTIME, &start); 50 | /* 51 | for ( int i = 0; i < 1024*1024/4*128; i++ ) { 52 | //unsigned int d = ummd[i%(1024*1024/4)]; 53 | //printf( "%d: %x\n", i, ummd[i] ); 54 | if ( i % 2048 == 0 ) printf( "%d: %x\n", i, ummd[i%128] ); 55 | } 56 | */ 57 | for ( int i = 0; i < 32; i++ ) { 58 | //unsigned int d = ummd[i%(1024*1024/4)]; 59 | printf( "%d: %x\n", i, ummd[i] ); 60 | } 61 | //printf( "%d: %x\n", 1024*4, ummd[1024*4] ); 62 | 63 | //test dma write? 64 | ummd[1024] = 8; 65 | 66 | sleep(2); 67 | ummd[1024+1] = 1; 68 | 69 | sleep(2); 70 | ummd[1024+2] = 8; // test dma read 71 | 72 | // NOT ACTUALLY ALLOCING NOW 73 | //ioctl(fd, ioctl_alloc_dma, 32); 74 | //printf( "IOCTL command no: %x\n", ioctl_alloc_dma ); 75 | 76 | for ( int i = 0; i < 128; i++ ) { 77 | printf( "%x ", ummdb[i] ); 78 | } 79 | printf( "\n" ); 80 | 81 | sleep(2); 82 | 83 | clock_gettime(CLOCK_REALTIME, &end); 84 | float totallat = timespec_diff_sec(start, end); 85 | printf( "%f\n", totallat ); 86 | munmap(mmd, 1024*1024); 87 | munmap(mmdbuf, 1024*1024); 88 | close(fd); 89 | } 90 | -------------------------------------------------------------------------------- /distribute/manager_deprecated/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | mkdir -p obj 3 | g++ -o obj/main main.cpp -pedantic -Wall 4 | install: all 5 | sudo chown root:root obj/main 6 | sudo mkdir -p /opt/bluespecpcie_manager 7 | sudo cp obj/main /opt/bluespecpcie_manager/bsman 8 | sudo chmod 4755 /opt/bluespecpcie_manager/bsman 9 | -------------------------------------------------------------------------------- /distribute/program.sh: -------------------------------------------------------------------------------- 1 | if [ $# -eq 0 ] 2 | then 3 | vivado -mode batch -source /opt/shared/program.tcl -nolog -nojournal 4 | else 5 | if [ $# -eq 1 ] 6 | then 7 | vivado -mode batch -source /opt/shared/program.tcl -nolog -nojournal -tclargs $1 8 | else 9 | vivado -mode batch -source /opt/shared/program.tcl -nolog -nojournal -tclargs $1 $2 10 | fi 11 | fi 12 | sleep 2 13 | bsrescan 14 | -------------------------------------------------------------------------------- /distribute/program.tcl: -------------------------------------------------------------------------------- 1 | open_hw_manager 2 | connect_hw_server 3 | set hwtargets [get_hw_targets] 4 | 5 | if { $::argc > 1 } { 6 | open_hw_target [lindex [get_hw_targets] [lindex $argv 1] ] 7 | } else { 8 | open_hw_target [lindex [get_hw_targets] 0] 9 | } 10 | 11 | if { $::argc > 0 } { 12 | set file [lindex $argv 0] 13 | } else { 14 | set file ./vc707/hw/mkProjectTop.bit 15 | } 16 | 17 | foreach fpga [get_hw_devices] { 18 | if {[string first "xc7vx485t" $fpga] != -1} { 19 | puts "fpga is $fpga, bit file size is [exec ls -sh $file], PROGRAM BEGIN" 20 | 21 | set_property PROGRAM.FILE $file $fpga 22 | program_hw_devices -verbose $fpga 23 | refresh_hw_device $fpga 24 | break 25 | } 26 | } 27 | 28 | 29 | -------------------------------------------------------------------------------- /dram/kc705/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | vivado -mode batch -source synth-ip.tcl -nolog -nojournal 3 | 4 | -------------------------------------------------------------------------------- /dram/kc705/ddr3_gen_tcl.txt: -------------------------------------------------------------------------------- 1 | start_gui 2 | open_project /home/swjun/temp/project_1/project_1.xpr 3 | open_project /home/swjun/temp/project_1/project_1.xpr 4 | Scanning sources... 5 | Finished scanning sources 6 | INFO: [IP_Flow 19-234] Refreshing IP repositories 7 | INFO: [IP_Flow 19-1704] No user IP repositories specified 8 | INFO: [IP_Flow 19-2313] Loaded Vivado IP repository '/opt/Xilinx/Vivado/2018.2/data/ip'. 9 | update_compile_order -fileset sources_1 10 | create_ip -name mig_7series -vendor xilinx.com -library ip -version 4.1 -module_name ddr3_0 11 | set_property -dict [list CONFIG.XML_INPUT_FILE {mig_a.prj} CONFIG.RESET_BOARD_INTERFACE {Custom} CONFIG.MIG_DONT_TOUCH_PARAM {Custom} CONFIG.BOARD_MIG_PARAM {Custom}] [get_ips ddr3_0] 12 | generate_target {instantiation_template} [get_files /home/swjun/temp/project_1/project_1.srcs/sources_1/ip/ddr3_0/ddr3_0.xci] 13 | INFO: [IP_Flow 19-1686] Generating 'Instantiation Template' target for IP 'ddr3_0'... 14 | generate_target all [get_files /home/swjun/temp/project_1/project_1.srcs/sources_1/ip/ddr3_0/ddr3_0.xci] 15 | INFO: [IP_Flow 19-1686] Generating 'Synthesis' target for IP 'ddr3_0'... 16 | INFO: [IP_Flow 19-1686] Generating 'Simulation' target for IP 'ddr3_0'... 17 | INFO: [IP_Flow 19-1686] Generating 'Implementation' target for IP 'ddr3_0'... 18 | INFO: [IP_Flow 19-1686] Generating 'Change Log' target for IP 'ddr3_0'... 19 | catch { config_ip_cache -export [get_ips -all ddr3_0] } 20 | export_ip_user_files -of_objects [get_files /home/swjun/temp/project_1/project_1.srcs/sources_1/ip/ddr3_0/ddr3_0.xci] -no_script -sync -force -quiet 21 | create_ip_run [get_files -of_objects [get_fileset sources_1] /home/swjun/temp/project_1/project_1.srcs/sources_1/ip/ddr3_0/ddr3_0.xci] 22 | launch_runs -jobs 6 ddr3_0_synth_1 23 | [Mon Sep 24 15:57:22 2018] Launched ddr3_0_synth_1... 24 | Run output will be captured here: /home/swjun/temp/project_1/project_1.runs/ddr3_0_synth_1/runme.log 25 | export_simulation -of_objects [get_files /home/swjun/temp/project_1/project_1.srcs/sources_1/ip/ddr3_0/ddr3_0.xci] -directory /home/swjun/temp/project_1/project_1.ip_user_files/sim_scripts -ip_user_files_dir /home/swjun/temp/project_1/project_1.ip_user_files -ipstatic_source_dir /home/swjun/temp/project_1/project_1.ip_user_files/ipstatic -lib_map_path [list {modelsim=/home/swjun/temp/project_1/project_1.cache/compile_simlib/modelsim} {questa=/home/swjun/temp/project_1/project_1.cache/compile_simlib/questa} {ies=/home/swjun/temp/project_1/project_1.cache/compile_simlib/ies} {xcelium=/home/swjun/temp/project_1/project_1.cache/compile_simlib/xcelium} {vcs=/home/swjun/temp/project_1/project_1.cache/compile_simlib/vcs} {riviera=/home/swjun/temp/project_1/project_1.cache/compile_simlib/riviera}] -use_ip_compiled_libs -force -quiet 26 | 27 | -------------------------------------------------------------------------------- /dram/kc705/dram.xdc: -------------------------------------------------------------------------------- 1 | #create_generated_clock -name ddr3_usrclk -source ddr3_refclk -multiply_by 5 -divide_by 5 [get_pins -hier -regexp { .*ddr3_ctrl/u_ddr3_0/ui_clk}] 2 | create_generated_clock -name ddr3_usrclk -source [get_pins sys_clk_200mhz_buf/O] -multiply_by 5 -divide_by 5 [get_pins -hier -regexp { .*ddr3_ctrl/u_ddr3_0/ui_clk}] 3 | 4 | set_clock_groups -asynchronous -group {pcie_clk_125mhz} -group {ddr3_usrclk} 5 | set_clock_groups -asynchronous -group {pcie_clk_250mhz} -group {ddr3_usrclk} 6 | #set_clock_groups -asynchronous -group {pcie_clk_125mhz} -group {clk_pll_i} 7 | #set_clock_groups -asynchronous -group {pcie_clk_250mhz} -group {clk_pll_i} 8 | set_clock_groups -asynchronous -group {userclk2} -group {clk_pll_i} 9 | set_clock_groups -asynchronous -group {userclk2} -group {ddr3_usrclk} 10 | set_clock_groups -asynchronous -group {userclk2} -group {sys_clk_200} 11 | set_clock_groups -asynchronous -group {clk_pll_i} -group {sys_clk_200} 12 | 13 | #set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3_ctrl_user_reset_n/*}] -hier -filter {NAME=~ *C}] 14 | #set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3_ctrl_user_reset_n/*}] -hier -filter {NAME=~ *C}] 15 | #set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3ref_rst_n/*}] -hier -filter {NAME=~ *CLR}] 16 | #set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr_cli_200Mhz_reqs/*}] -hier -filter {NAME=~ *CLR}] 17 | 18 | ## ADDED FOR KC705 19 | set_property DCI_CASCADE {32 34} [get_iobanks 33] 20 | -------------------------------------------------------------------------------- /dram/kc705/syncfifo.xdc: -------------------------------------------------------------------------------- 1 | ##create_clock -name ddr3_refclk -period 5 [get_pins host_sys_clk_200mhz_buf/O] 2 | #create_generated_clock -name ddr3_usrclk -source clk_gen_pll_CLKOUT2 -multiply_by 5 -divide_by 5 [get_pins *ddr3_ctrl/CLK] 3 | #create_generated_clock -name app_clk -source [get_pins */clkgen_pll/CLKIN1] -divide_by 2 [get_pins */clkgen_pll/CLKOUT0] 4 | 5 | #set_max_delay -from [get_clocks app_clk] -to [get_clocks ddr3_usrclk] 5.000 -datapath_only 6 | #set_max_delay -from [get_clocks ddr3_usrclk] -to [get_clocks app_clk] 5.000 -datapath_only 7 | -------------------------------------------------------------------------------- /dram/kc705/synth-ip.tcl: -------------------------------------------------------------------------------- 1 | set coredir "./core/" 2 | set corename "ddr3_0" 3 | 4 | file mkdir $coredir 5 | if [file exists ./$coredir/$corename] { 6 | file delete -force ./$coredir/$corename 7 | } 8 | 9 | create_project -name local_synthesized_ip -in_memory -part xc7k325tffg900-2 10 | set_property board_part xilinx.com:kc705:part0:1.5 [current_project] 11 | create_ip -name mig_7series -version 4.* -vendor xilinx.com -library ip -module_name $corename -dir ./$coredir 12 | 13 | set_property -dict [list CONFIG.XML_INPUT_FILE "../../mig_a.prj" CONFIG.RESET_BOARD_INTERFACE {Custom} CONFIG.MIG_DONT_TOUCH_PARAM {Custom} CONFIG.BOARD_MIG_PARAM {Custom}] [get_ips $corename] 14 | 15 | generate_target {instantiation_template} [get_files ./$coredir/$corename/$corename.xci] 16 | generate_target all [get_files ./$coredir/$corename/$corename.xci] 17 | create_ip_run [get_files -of_objects [get_fileset sources_1] ./$coredir/$corename/$corename.xci] 18 | generate_target {Synthesis} [get_files ./$coredir/$corename/$corename.xci] 19 | read_ip ./$coredir/$corename/$corename.xci 20 | synth_ip [get_ips $corename] 21 | -------------------------------------------------------------------------------- /dram/src/DDR3Controller.bsv: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // Copyright (c) 2014 Bluespec, Inc. ALL RIGHTS RESERVED. 3 | //////////////////////////////////////////////////////////////////////////////// 4 | // Filename : XilinxVC707DDR3.bsv 5 | // Description : 6 | //////////////////////////////////////////////////////////////////////////////// 7 | package DDR3Controller; 8 | 9 | // Notes : 10 | 11 | //////////////////////////////////////////////////////////////////////////////// 12 | /// Imports 13 | //////////////////////////////////////////////////////////////////////////////// 14 | import Connectable ::*; 15 | import Clocks ::*; 16 | import FIFO ::*; 17 | import FIFOF ::*; 18 | import SpecialFIFOs ::*; 19 | import TriState ::*; 20 | import Vector ::*; 21 | import DefaultValue ::*; 22 | import Counter ::*; 23 | import CommitIfc ::*; 24 | import Memory ::*; 25 | import ClientServer ::*; 26 | import GetPut ::*; 27 | import BUtils ::*; 28 | //import I2C ::*; 29 | import StmtFSM ::*; 30 | import DDR3Common ::*; 31 | 32 | //import XilinxCells ::*; 33 | 34 | //////////////////////////////////////////////////////////////////////////////// 35 | /// Exports 36 | //////////////////////////////////////////////////////////////////////////////// 37 | 38 | //////////////////////////////////////////////////////////////////////////////// 39 | /// Types 40 | //////////////////////////////////////////////////////////////////////////////// 41 | //`define DDR3_VC707 29, 256, 32, 64, 8, 15, 10, 3, 1, 1, 1, 1, 1, 4 42 | `define DDR3_1GB 28, 512, 64, 64, 8, 14, 10, 3, 1, 1, 1, 1, 1, 2 43 | 44 | typedef DDR3_Pins#(`DDR3_1GB) DDR3_Pins_1GB; 45 | typedef DDR3_User#(`DDR3_1GB) DDR3_User_1GB; 46 | typedef DDR3_Controller#(`DDR3_1GB) DDR3_Controller_1GB; 47 | typedef VDDR3_User_Xilinx#(`DDR3_1GB) VDDR3_User_Xilinx_1GB; 48 | typedef VDDR3_Controller_Xilinx#(`DDR3_1GB) VDDR3_Controller_Xilinx_1GB; 49 | 50 | //////////////////////////////////////////////////////////////////////////////// 51 | /// Interfaces 52 | //////////////////////////////////////////////////////////////////////////////// 53 | 54 | //////////////////////////////////////////////////////////////////////////////// 55 | //////////////////////////////////////////////////////////////////////////////// 56 | /// 57 | /// Implementation 58 | /// 59 | //////////////////////////////////////////////////////////////////////////////// 60 | //////////////////////////////////////////////////////////////////////////////// 61 | import "BVI" ddr3_wrapper = 62 | module vMkDDR3_1GB_Controller#(DDR3_Configure cfg, Clock refclk)(VDDR3_Controller_Xilinx_1GB); 63 | default_clock clk(sys_clk_i); 64 | default_reset rst(sys_rst); 65 | 66 | input_clock refclk(clk_ref_i) = refclk; 67 | 68 | parameter SIM_BYPASS_INIT_CAL = (cfg.simulation) ? "FAST" : "OFF"; 69 | parameter SIMULATION = (cfg.simulation) ? "TRUE" : "FALSE"; 70 | 71 | interface DDR3_Pins ddr3; 72 | ifc_inout dq(ddr3_dq) clocked_by(no_clock) reset_by(no_reset); 73 | ifc_inout dqs_p(ddr3_dqs_p) clocked_by(no_clock) reset_by(no_reset); 74 | ifc_inout dqs_n(ddr3_dqs_n) clocked_by(no_clock) reset_by(no_reset); 75 | method ddr3_ck_p clk_p clocked_by(no_clock) reset_by(no_reset); 76 | method ddr3_ck_n clk_n clocked_by(no_clock) reset_by(no_reset); 77 | method ddr3_cke cke clocked_by(no_clock) reset_by(no_reset); 78 | method ddr3_cs_n cs_n clocked_by(no_clock) reset_by(no_reset); 79 | method ddr3_ras_n ras_n clocked_by(no_clock) reset_by(no_reset); 80 | method ddr3_cas_n cas_n clocked_by(no_clock) reset_by(no_reset); 81 | method ddr3_we_n we_n clocked_by(no_clock) reset_by(no_reset); 82 | method ddr3_reset_n reset_n clocked_by(no_clock) reset_by(no_reset); 83 | method ddr3_dm dm clocked_by(no_clock) reset_by(no_reset); 84 | method ddr3_ba ba clocked_by(no_clock) reset_by(no_reset); 85 | method ddr3_addr a clocked_by(no_clock) reset_by(no_reset); 86 | method ddr3_odt odt clocked_by(no_clock) reset_by(no_reset); 87 | endinterface 88 | 89 | interface VDDR3_User_Xilinx user; 90 | output_clock clock(ui_clk); 91 | output_reset reset(ui_clk_sync_rst); 92 | method init_calib_complete init_done clocked_by(no_clock) reset_by(no_reset); 93 | method app_addr(app_addr) enable((*inhigh*)en0) clocked_by(user_clock) reset_by(no_reset); 94 | method app_cmd(app_cmd) enable((*inhigh*)en00) clocked_by(user_clock) reset_by(no_reset); 95 | method app_en(app_en) enable((*inhigh*)en1) clocked_by(user_clock) reset_by(no_reset); 96 | method app_wdf_data(app_wdf_data) enable((*inhigh*)en2) clocked_by(user_clock) reset_by(no_reset); 97 | method app_wdf_end(app_wdf_end) enable((*inhigh*)en3) clocked_by(user_clock) reset_by(no_reset); 98 | method app_wdf_mask(app_wdf_mask) enable((*inhigh*)en4) clocked_by(user_clock) reset_by(no_reset); 99 | method app_wdf_wren(app_wdf_wren) enable((*inhigh*)en5) clocked_by(user_clock) reset_by(no_reset); 100 | method app_rd_data app_rd_data clocked_by(user_clock) reset_by(no_reset); 101 | method app_rd_data_end app_rd_data_end clocked_by(user_clock) reset_by(no_reset); 102 | method app_rd_data_valid app_rd_data_valid clocked_by(user_clock) reset_by(no_reset); 103 | method app_rdy app_rdy clocked_by(user_clock) reset_by(no_reset); 104 | method app_wdf_rdy app_wdf_rdy clocked_by(user_clock) reset_by(no_reset); 105 | endinterface 106 | 107 | schedule 108 | ( 109 | ddr3_clk_p, ddr3_clk_n, ddr3_cke, ddr3_cs_n, ddr3_ras_n, ddr3_cas_n, ddr3_we_n, 110 | ddr3_reset_n, ddr3_dm, ddr3_ba, ddr3_a, ddr3_odt, user_init_done 111 | ) 112 | CF 113 | ( 114 | ddr3_clk_p, ddr3_clk_n, ddr3_cke, ddr3_cs_n, ddr3_ras_n, ddr3_cas_n, ddr3_we_n, 115 | ddr3_reset_n, ddr3_dm, ddr3_ba, ddr3_a, ddr3_odt, user_init_done 116 | ); 117 | 118 | schedule 119 | ( 120 | user_app_addr, user_app_en, user_app_wdf_data, user_app_wdf_end, user_app_wdf_mask, user_app_wdf_wren, user_app_rd_data, 121 | user_app_rd_data_end, user_app_rd_data_valid, user_app_rdy, user_app_wdf_rdy, user_app_cmd 122 | ) 123 | CF 124 | ( 125 | user_app_addr, user_app_en, user_app_wdf_data, user_app_wdf_end, user_app_wdf_mask, user_app_wdf_wren, user_app_rd_data, 126 | user_app_rd_data_end, user_app_rd_data_valid, user_app_rdy, user_app_wdf_rdy, user_app_cmd 127 | ); 128 | 129 | endmodule 130 | module mkDDR3Controller_1GB#(DDR3_Configure cfg, Clock refclk)(DDR3_Controller_1GB); 131 | (* hide_all *) 132 | VDDR3_Controller_Xilinx_1GB _v <- vMkDDR3_1GB_Controller(cfg, refclk); 133 | let _m <- mkXilinxDDR3Controller_2_1_(_v, cfg); 134 | return _m; 135 | endmodule 136 | 137 | 138 | endpackage: DDR3Controller 139 | 140 | -------------------------------------------------------------------------------- /dram/src/DDR3Sim.bsv: -------------------------------------------------------------------------------- 1 | import Clocks::*; 2 | import FIFO::*; 3 | import Vector::*; 4 | import RegFile::*; 5 | import Connectable::*; 6 | import GetPut::*; 7 | /* 8 | import XilinxVC707DDR3::*; 9 | import DDR3::*; 10 | */ 11 | import DDR3Controller::*; 12 | import DDR3Common::*; 13 | 14 | typedef Bit#(28) DDR3Address; 15 | typedef Bit#(64) ByteEn; 16 | typedef Bit#(512) DDR3Data; 17 | 18 | module mkDDR3Simulator(DDR3_User_1GB); 19 | RegFile#(Bit#(26), DDR3Data) data <- mkRegFileFull(); 20 | //Vector#(TExp#(26), Reg#(DDR3Data)) data <- replicateM(mkReg(0)); 21 | FIFO#(DDR3Data) responses <- mkFIFO(); 22 | 23 | Clock user_clock <- exposeCurrentClock; 24 | Reset user_reset_n <- exposeCurrentReset; 25 | 26 | // Rotate 512 bit word by offset 64 bit words. 27 | function Bit#(512) rotate(Bit#(3) offset, Bit#(512) x); 28 | Vector#(8, Bit#(64)) words = unpack(x); 29 | Vector#(8, Bit#(64)) rotated = rotateBy(words, unpack((~offset) + 1)); 30 | return pack(rotated); 31 | endfunction 32 | 33 | // Unrotate 512 bit word by offset 64 bit words. 34 | function Bit#(512) unrotate(Bit#(3) offset, Bit#(512) x); 35 | Vector#(8, Bit#(64)) words = unpack(x); 36 | Vector#(8, Bit#(64)) unrotated = rotateBy(words, unpack(offset)); 37 | return pack(unrotated); 38 | endfunction 39 | 40 | Vector#(32, FIFO#(DDR3Data)) delayQs <- replicateM(mkFIFO()); 41 | 42 | for (Integer i = 0; i < 31; i = i + 1) begin 43 | mkConnection(toGet(delayQs[i]), toPut(delayQs[i+1])); 44 | /* rule doDelay; 45 | let v <- toGet(delayQs[i]).get(); 46 | $display("%t %d %h", $time, i , v); 47 | delayQs[i+1].enq(v); 48 | endrule*/ 49 | end 50 | 51 | interface clock = user_clock; 52 | interface reset_n = user_reset_n; 53 | method Bool init_done() = True; 54 | 55 | method Action request(DDR3Address addr, ByteEn writeen, DDR3Data datain); 56 | Bit#(26) burstaddr = truncate(addr>>3); 57 | Bit#(3) offset = addr[2:0]; 58 | 59 | Bit#(512) mask = 0; 60 | for (Integer i = 0; i < 64; i = i+1) begin 61 | if (writeen[i] == 'b1) begin 62 | mask[(i*8+7):i*8] = 8'hFF; 63 | end 64 | end 65 | 66 | Bit#(512) old_rotated = rotate(offset, data.sub(burstaddr)); 67 | //Bit#(512) old_rotated = rotate(offset, data[burstaddr]); 68 | Bit#(512) new_masked = mask & datain; 69 | Bit#(512) old_masked = (~mask) & old_rotated; 70 | Bit#(512) new_rotated = new_masked | old_masked; 71 | Bit#(512) new_unrotated = unrotate(offset, new_rotated); 72 | data.upd(burstaddr, new_unrotated); 73 | //data[burstaddr] <= new_unrotated; 74 | 75 | if (writeen == 0) begin 76 | //responses.enq(new_rotated); 77 | delayQs[0].enq(new_rotated); 78 | end 79 | endmethod 80 | 81 | method ActionValue#(DDR3Data) read_data; 82 | //let v <- toGet(responses).get(); 83 | let v <- toGet(delayQs[31]).get(); 84 | //$display("last, %d, %h", $time, v); 85 | return v; 86 | endmethod 87 | 88 | endmodule 89 | -------------------------------------------------------------------------------- /dram/src/DRAMBurstController.bsv: -------------------------------------------------------------------------------- 1 | import Clocks::*; 2 | import FIFO::*; 3 | import BRAMFIFO::*; 4 | import FIFOF::*; 5 | import Vector::*; 6 | 7 | import MergeN::*; 8 | 9 | import DRAMController::*; 10 | import DRAMControllerTypes::*; 11 | 12 | interface DRAMBurstControllerIfc; 13 | method Action writeReq(Bit#(64) addr, Bit#(32) words); 14 | method Action readReq(Bit#(64) addr, Bit#(32) words); 15 | method Action write(Bit#(512) word); 16 | method ActionValue#(Bit#(512)) read; 17 | 18 | interface Clock user_clk; 19 | interface Reset user_rst; 20 | endinterface 21 | 22 | module mkDRAMBurstController#(DRAMUserIfc dram) (DRAMBurstControllerIfc); 23 | Clock dramclk = dram.user_clk; 24 | Reset dramrst = dram.user_rst; 25 | 26 | Reg#(Bit#(64)) writeCurAddr <- mkReg(0, clocked_by dramclk, reset_by dramrst); 27 | Reg#(Bit#(32)) writeWordLeft <- mkReg(0, clocked_by dramclk, reset_by dramrst); 28 | Reg#(Bit#(64)) readCurAddr <- mkReg(0, clocked_by dramclk, reset_by dramrst); 29 | Reg#(Bit#(32)) readWordLeft <- mkReg(0, clocked_by dramclk, reset_by dramrst); 30 | 31 | Clock curClk <- exposeCurrentClock; 32 | Reset curRst <- exposeCurrentReset; 33 | 34 | rule dramReadReq ( readWordLeft > 0 ); 35 | dram.readReq(readCurAddr, 64); 36 | readWordLeft <= readWordLeft - 1; 37 | readCurAddr <= readCurAddr + 64; 38 | endrule 39 | 40 | interface Clock user_clk = dramclk; 41 | interface Reset user_rst = dramrst; 42 | method Action writeReq(Bit#(64) addr, Bit#(32) words) if ( writeWordLeft == 0 && readWordLeft == 0 ); 43 | writeCurAddr <= addr; 44 | writeWordLeft <= words; 45 | endmethod 46 | method Action readReq(Bit#(64) addr, Bit#(32) words) if ( readWordLeft == 0 && writeWordLeft == 0 ); 47 | readCurAddr <= addr; 48 | readWordLeft <= words; 49 | endmethod 50 | method Action write(Bit#(512) word) if ( writeWordLeft > 0 ); 51 | dram.write(writeCurAddr, word, 64); 52 | writeCurAddr <= writeCurAddr + 64; 53 | writeWordLeft <= writeWordLeft - 1; 54 | endmethod 55 | method ActionValue#(Bit#(512)) read; 56 | let v <- dram.read; 57 | return v; 58 | endmethod 59 | endmodule 60 | 61 | interface DRAMBurstReaderIfc; 62 | method Action readReq(Bit#(64) addr, Bit#(32) words); 63 | method ActionValue#(Bit#(512)) read; 64 | endinterface 65 | 66 | interface DRAMBurstWriterIfc; 67 | method Action writeReq(Bit#(64) addr, Bit#(32) words); 68 | method Action write(Bit#(512) word); 69 | endinterface 70 | 71 | interface DRAMBurstSplitterIfc#(numeric type rcnt, numeric type wcnt); 72 | interface Vector#(rcnt, DRAMBurstReaderIfc) readers; 73 | interface Vector#(wcnt, DRAMBurstWriterIfc) writers; 74 | endinterface 75 | 76 | module mkDRAMBurstSplitter#(DRAMBurstControllerIfc dram) (DRAMBurstSplitterIfc#(rcnt,wcnt)) 77 | provisos(Add#(rcnt,a__,256), Add#(wcnt,b__,256)); 78 | 79 | Clock dramclk = dram.user_clk; 80 | Reset dramrst = dram.user_rst; 81 | Clock curclk <- exposeCurrentClock; 82 | Reset currst <- exposeCurrentReset; 83 | 84 | MergeNIfc#(rcnt, Tuple3#(Bit#(64), Bit#(32), Bit#(8))) mReader <- mkMergeN; 85 | MergeNIfc#(wcnt, Tuple3#(Bit#(64), Bit#(32), Bit#(8))) mWriter <- mkMergeN; 86 | Vector#(wcnt, SyncFIFOIfc#(Bit#(512))) writerQs <- replicateM(mkSyncFIFO(8,curclk,currst,dramclk)); 87 | Vector#(rcnt, FIFO#(Bit#(512))) readerQs <- replicateM(mkFIFO); 88 | 89 | MergeNIfc#(wcnt, Bit#(512)) writerM <- mkMergeN; 90 | SyncFIFOIfc#(Bit#(512)) writerdQ <- mkSyncFIFO(8,curclk,currst,dramclk); 91 | SyncFIFOIfc#(Tuple2#(Bit#(8),Bit#(512))) readerdQ <- mkSyncFIFO(8,dramclk, dramrst, curclk); 92 | 93 | SyncFIFOIfc#(Tuple3#(Bit#(64), Bit#(32), Bit#(8))) readerQ <- mkSyncFIFO(2, curclk, currst, dramclk); 94 | SyncFIFOIfc#(Tuple3#(Bit#(64), Bit#(32), Bit#(8))) writerQ <- mkSyncFIFO(2, curclk, currst, dramclk); 95 | 96 | Reg#(Bit#(32)) curReadCnt <- mkReg(0,clocked_by dramclk, reset_by dramrst); 97 | Reg#(Bit#(32)) curWriteCnt <- mkReg(0,clocked_by dramclk, reset_by dramrst); 98 | Reg#(Bit#(8)) curClientIdx <- mkReg(0,clocked_by dramclk, reset_by dramrst); 99 | 100 | Reg#(Bit#(32)) curCnt <- mkReg(0); 101 | 102 | rule relayDramRead; 103 | let r = mReader.first; 104 | mReader.deq; 105 | readerQ.enq(r); 106 | endrule 107 | rule relayDramWrite; 108 | let r = mWriter.first; 109 | mWriter.deq; 110 | writerQ.enq(r); 111 | endrule 112 | rule startDramWrite ( curReadCnt == 0 && curWriteCnt == 0 ); 113 | writerQ.deq; 114 | let r = writerQ.first; 115 | dram.writeReq(tpl_1(r), tpl_2(r)); 116 | curWriteCnt <= tpl_2(r); 117 | curClientIdx <= tpl_3(r); 118 | endrule 119 | rule startDramRead( curReadCnt == 0 && curWriteCnt == 0 ); 120 | readerQ.deq; 121 | let r = readerQ.first; 122 | dram.readReq(tpl_1(r), tpl_2(r)); 123 | curReadCnt <= tpl_2(r); 124 | curClientIdx <= tpl_3(r); 125 | endrule 126 | 127 | 128 | rule relayWriteC; 129 | writerM.deq; 130 | writerdQ.enq(writerM.first); 131 | curCnt <= curCnt - 1; 132 | endrule 133 | rule relayWrite ( curReadCnt == 0 && curWriteCnt > 0 ); 134 | curWriteCnt <= curWriteCnt - 1; 135 | writerdQ.deq; 136 | let d = writerdQ.first; 137 | dram.write(d); 138 | endrule 139 | 140 | rule relayRead ( curReadCnt > 0 && curWriteCnt == 0 ); 141 | curReadCnt <= curReadCnt - 1; 142 | let d <- dram.read; 143 | readerdQ.enq(tuple2(curClientIdx, d)); 144 | endrule 145 | rule relayReadClient; 146 | readerdQ.deq; 147 | let r = readerdQ.first; 148 | curCnt <= curCnt - 1; 149 | 150 | readerQs[tpl_1(r)].enq(tpl_2(r)); 151 | endrule 152 | 153 | Vector#(rcnt, DRAMBurstReaderIfc) readers_; 154 | Vector#(wcnt, DRAMBurstWriterIfc) writers_; 155 | 156 | for ( Integer i = 0; i < valueOf(rcnt); i=i+1) begin 157 | readers_[i] = interface DRAMBurstReaderIfc; 158 | method Action readReq(Bit#(64) addr, Bit#(32) words) if ( curCnt == 0 ); 159 | mReader.enq[i].enq(tuple3(addr,words,fromInteger(i))); 160 | curCnt <= words; 161 | endmethod 162 | method ActionValue#(Bit#(512)) read; 163 | readerQs[i].deq; 164 | return readerQs[i].first; 165 | endmethod 166 | 167 | endinterface: DRAMBurstReaderIfc; 168 | end 169 | for ( Integer i = 0; i < valueOf(wcnt); i=i+1) begin 170 | writers_[i] = interface DRAMBurstWriterIfc; 171 | method Action writeReq(Bit#(64) addr, Bit#(32) words) if ( curCnt == 0 ); 172 | mWriter.enq[i].enq(tuple3(addr,words,fromInteger(i))); 173 | curCnt <= words; 174 | endmethod 175 | method Action write(Bit#(512) word); 176 | writerM.enq[i].enq(word); 177 | endmethod 178 | endinterface: DRAMBurstWriterIfc; 179 | end 180 | interface readers = readers_; 181 | interface writers = writers_; 182 | endmodule 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /dram/src/DRAMControllerTypes.bsv: -------------------------------------------------------------------------------- 1 | import ClientServer::*; 2 | import Connectable::*; 3 | 4 | typedef Bit#(64) DDR3Address; 5 | typedef Bit#(512) DDR3Data; 6 | 7 | // DDR3 Request 8 | // Used for both reads and writes. 9 | // 10 | // To perform a read: 11 | // writeen should be 0 12 | // address contains the address to read from 13 | // datain is ignored. 14 | 15 | // To perform a write: 16 | // writeen should be 'hFFFFFFFF (to write all bytes, or something else 17 | // nonzero to only write some of the bytes). 18 | // address contains the address to write to 19 | // datain contains the data to be written. 20 | typedef struct { 21 | // writeen: Enable writing. 22 | // Set the ith bit of writeen to 1 to write the ith byte of datain to the 23 | // ith byte of data at the given address. 24 | // If writeen is 0, this is a read request, and a response is returned. 25 | // If writeen is not 0, this is a write request, and no response is 26 | // returned. 27 | Bit#(64) writeen; 28 | 29 | // Address to read to or write from. 30 | // The DDR3 is 64 bit word addressed, but in bursts of 8 64 bit words. 31 | // The address should always be a multiple of 8 (bottom 3 bits 0), 32 | // otherwise strange things will happen. 33 | // For example: address 0 refers to the first 8 64 bit words in memory. 34 | // address 4 refers to the second 8 64 bit words in memory. 35 | //DDR3Address address; 36 | Bit#(64) address; 37 | 38 | // Data to write. 39 | // For read requests this is ignored. 40 | // Only those bytes with corresponding bit set in writeen will be written. 41 | // DDR3Data datain; 42 | Bit#(512) datain; 43 | } DDRRequest deriving(Bits, Eq); 44 | 45 | // DDR2 Response. 46 | // Data read from requested address. 47 | // There will only be a response if writeen was 0 in the request. 48 | typedef Bit#(512) DDRResponse; 49 | 50 | typedef Client#(DDRRequest, DDRResponse) DDR3Client; 51 | 52 | typedef struct { 53 | //Bool rnw; 54 | Bit#(7) nBytes; 55 | //Bit#(7) firstNbytes; 56 | //Bool oneCmd; 57 | Bit#(64) addr; 58 | Bit#(512) data; 59 | Bit#(64) mask0; 60 | Bit#(64) mask1; 61 | //Bit#(64) mask1; 62 | } DRAMWrRequest deriving (Bits, Eq); 63 | 64 | 65 | 66 | typedef struct { 67 | Bit#(7) nBytes; 68 | Bit#(64) addr; 69 | } DRAMRdRequest deriving (Bits, Eq); 70 | -------------------------------------------------------------------------------- /dram/vc707/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | vivado -mode batch -source synth-ip.tcl -nolog -nojournal 3 | 4 | -------------------------------------------------------------------------------- /dram/vc707/chipscope_wrapper.v: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | module debug_ddr 5 | ( 6 | input ui_clk, 7 | input ui_clk_sync_rst,// 8 | input init_calib_complete,// 9 | input sys_rst,// 10 | 11 | input [27:0] app_addr, // 12 | input [2:0] app_cmd,// 13 | input app_en,// 14 | input [511:0] app_wdf_data,// 15 | input app_wdf_end,// 16 | input [63:0] app_wdf_mask,// 17 | input app_wdf_wren,// 18 | input [511:0] app_rd_data,// 19 | input app_rd_data_end,// 20 | input app_rd_data_valid,// 21 | input app_rdy,// 22 | input app_wdf_rdy// 23 | ); 24 | 25 | (* mark_debug = "true" *) reg ui_clk_sync_rst_reg;// 26 | (* mark_debug = "true" *) reg init_calib_complete_reg;// 27 | (* mark_debug = "true" *) reg sys_rst_reg; 28 | 29 | (* mark_debug = "true" *) reg [27:0] app_addr_reg; // 30 | (* mark_debug = "true" *) reg [2:0] app_cmd_reg;// 31 | (* mark_debug = "true" *) reg app_en_reg;// 32 | (* mark_debug = "true" *) reg [511:0] app_wdf_data_reg;// 33 | (* mark_debug = "true" *) reg app_wdf_end_reg;// 34 | (* mark_debug = "true" *) reg [63:0] app_wdf_mask_reg;// 35 | (* mark_debug = "true" *) reg app_wdf_wren_reg;// 36 | (* mark_debug = "true" *) reg [511:0] app_rd_data_reg;// 37 | (* mark_debug = "true" *) reg app_rd_data_end_reg;// 38 | (* mark_debug = "true" *) reg app_rd_data_valid_reg;// 39 | (* mark_debug = "true" *) reg app_rdy_reg;// 40 | (* mark_debug = "true" *) reg app_wdf_rdy_reg;// 41 | 42 | 43 | always @ (posedge ui_clk) begin 44 | ui_clk_sync_rst_reg <= ui_clk_sync_rst;// 45 | init_calib_complete_reg <= init_calib_complete;// 46 | sys_rst_reg <= sys_rst; 47 | 48 | app_addr_reg <= app_addr; // 49 | app_cmd_reg <= app_cmd;// 50 | app_en_reg <= app_en;// 51 | app_wdf_data_reg <= app_wdf_data;// 52 | app_wdf_end_reg <= app_wdf_end;// 53 | app_wdf_mask_reg <= app_wdf_mask;// 54 | app_wdf_wren_reg <= app_wdf_wren;// 55 | app_rd_data_reg <= app_rd_data;// 56 | app_rd_data_end_reg <= app_rd_data_end;// 57 | app_rd_data_valid_reg <= app_rd_data_valid;// 58 | app_rdy_reg <= app_rdy;// 59 | app_wdf_rdy_reg <= app_wdf_rdy;// 60 | end 61 | 62 | 63 | ila_ddr u_ila_ddr 64 | ( 65 | .clk (ui_clk), 66 | .probe0 (app_addr_reg), 67 | .probe1 (app_cmd_reg), 68 | .probe2 (app_en_reg), 69 | .probe3 (app_wdf_data_reg), 70 | .probe4 (app_wdf_end_reg), 71 | .probe5 (app_wdf_mask_reg), 72 | .probe6 (app_wdf_wren_reg), 73 | .probe7 (app_rd_data_reg), 74 | .probe8 (app_rd_data_end_reg), 75 | .probe9 (app_rd_data_valid_reg), 76 | .probe10 (app_rdy_reg), 77 | .probe11 (app_wdf_rdy_reg), 78 | .probe12 (ui_clk_sync_rst_reg), 79 | .probe13 (init_calib_complete_reg), 80 | .probe14 (sys_rst_reg), 81 | .probe15 (0) 82 | ); 83 | 84 | endmodule 85 | -------------------------------------------------------------------------------- /dram/vc707/ddr3_v2_0.xdc: -------------------------------------------------------------------------------- 1 | #create_clock -name ddr3_refclk -period 5 [get_pins host_sys_clk_200mhz_buf/O] 2 | #create_generated_clock -name ddr3_usrclk -source clk_gen_pll_CLKOUT2 -multiply_by 5 -divide_by 5 [get_pins ddr3_ctrl_ui_clk] 3 | create_generated_clock -name ddr3_usrclk -source clk_200mhz -multiply_by 5 -divide_by 5 [get_pins ddr3_ctrl/u_ddr3_v2_0/ui_clk] 4 | 5 | set_clock_groups -asynchronous -group {clk_125mhz} -group {ddr3_usrclk} 6 | set_clock_groups -asynchronous -group {clk_250mhz} -group {ddr3_usrclk} 7 | set_clock_groups -asynchronous -group {clk_125mhz} -group {clk_pll_i} 8 | set_clock_groups -asynchronous -group {clk_250mhz} -group {clk_pll_i} 9 | 10 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3_ctrl_user_reset_n/*}] -hier -filter {NAME=~ *C}] 11 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3_ctrl_user_reset_n/*}] -hier -filter {NAME=~ *C}] 12 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3ref_rst_n/*}] -hier -filter {NAME=~ *CLR}] 13 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr_cli_200Mhz_reqs/*}] -hier -filter {NAME=~ *CLR}] 14 | -------------------------------------------------------------------------------- /dram/vc707/dram.xdc: -------------------------------------------------------------------------------- 1 | #FIXME old stuff from KC705 2 | create_generated_clock -name ddr3_usrclk -source [get_pins sys_clk_200mhz_buf/O] -multiply_by 5 -divide_by 5 [get_pins -hier -regexp { .*ddr3_ctrl/u_ddr3_0/ui_clk}] 3 | 4 | set_clock_groups -asynchronous -group {pcie_clk_125mhz} -group {ddr3_usrclk} 5 | set_clock_groups -asynchronous -group {pcie_clk_250mhz} -group {ddr3_usrclk} 6 | #set_clock_groups -asynchronous -group {pcie_clk_125mhz} -group {clk_pll_i} 7 | #set_clock_groups -asynchronous -group {pcie_clk_250mhz} -group {clk_pll_i} 8 | set_clock_groups -asynchronous -group {userclk2} -group {clk_pll_i} 9 | set_clock_groups -asynchronous -group {userclk2} -group {ddr3_usrclk} 10 | set_clock_groups -asynchronous -group {userclk2} -group {sys_clk_200} 11 | set_clock_groups -asynchronous -group {clk_pll_i} -group {sys_clk_200} 12 | set_clock_groups -asynchronous -group {sys_clk_200} -group {ddr3_usrclk} 13 | 14 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3_ctrl_user_reset_n/*}] -hier -filter {NAME=~ *C}] 15 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3_ctrl_user_reset_n/*}] -hier -filter {NAME=~ *C}] 16 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr3ref_rst_n/*}] -hier -filter {NAME=~ *CLR}] 17 | set_false_path -from [get_pins -of_objects [get_cells -hier -filter {NAME =~ *ddr_cli_200Mhz_reqs/*}] -hier -filter {NAME=~ *CLR}] 18 | 19 | -------------------------------------------------------------------------------- /dram/vc707/syncfifo.xdc: -------------------------------------------------------------------------------- 1 | #create_clock -name ddr3_refclk -period 5 [get_pins host_sys_clk_200mhz_buf/O] 2 | create_generated_clock -name ddr3_usrclk -source clk_gen_pll_CLKOUT2 -multiply_by 5 -divide_by 5 [get_pins *ddr3_ctrl/CLK] 3 | create_generated_clock -name app_clk -source [get_pins */clkgen_pll/CLKIN1] -divide_by 2 [get_pins */clkgen_pll/CLKOUT0] 4 | 5 | set_max_delay -from [get_clocks app_clk] -to [get_clocks ddr3_usrclk] 5.000 -datapath_only 6 | set_max_delay -from [get_clocks ddr3_usrclk] -to [get_clocks app_clk] 5.000 -datapath_only 7 | -------------------------------------------------------------------------------- /dram/vc707/synth-ip.tcl: -------------------------------------------------------------------------------- 1 | set coredir "./core/" 2 | set corename "ddr3_0" 3 | 4 | file mkdir $coredir 5 | if [file exists ./$coredir/$corename] { 6 | file delete -force ./$coredir/$corename 7 | } 8 | 9 | create_project -name local_synthesized_ip -in_memory -part xc7vx485tffg1761-2 10 | set_property board_part xilinx.com:vc707:part0:1.0 [current_project] 11 | create_ip -name mig_7series -version 4.* -vendor xilinx.com -library ip -module_name $corename -dir ./$coredir 12 | 13 | set_property -dict [list CONFIG.XML_INPUT_FILE "../../mig_a.prj" CONFIG.RESET_BOARD_INTERFACE {Custom} CONFIG.MIG_DONT_TOUCH_PARAM {Custom} CONFIG.BOARD_MIG_PARAM {Custom}] [get_ips $corename] 14 | 15 | generate_target {instantiation_template} [get_files ./$coredir/$corename/$corename.xci] 16 | generate_target all [get_files ./$coredir/$corename/$corename.xci] 17 | create_ip_run [get_files -of_objects [get_fileset sources_1] ./$coredir/$corename/$corename.xci] 18 | generate_target {Synthesis} [get_files ./$coredir/$corename/$corename.xci] 19 | read_ip ./$coredir/$corename/$corename.xci 20 | synth_ip [get_ips $corename] 21 | -------------------------------------------------------------------------------- /examples/dmatest/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | 11 | 12 | interface HwMainIfc; 13 | endinterface 14 | 15 | module mkHwMain#(PcieUserIfc pcie) 16 | (HwMainIfc); 17 | 18 | Clock curClk <- exposeCurrentClock; 19 | Reset curRst <- exposeCurrentReset; 20 | 21 | Clock pcieclk = pcie.user_clk; 22 | Reset pcierst = pcie.user_rst; 23 | 24 | 25 | Reg#(Bit#(32)) wordReadLeft <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 26 | Reg#(Bit#(32)) wordWriteLeft <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 27 | Reg#(Bit#(32)) wordWriteReq <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 28 | 29 | rule getCmd ( wordWriteLeft == 0 ); 30 | let w <- pcie.dataReceive; 31 | let a = w.addr; 32 | let d = w.data; 33 | let off = (a>>2); 34 | if ( off == 0 ) begin 35 | wordWriteLeft <= d; 36 | wordWriteReq <= d; 37 | pcie.dmaWriteReq( 0, truncate(d) ); // offset, words 38 | end else if ( off == 1 ) begin 39 | pcie.dmaReadReq( 0, truncate(d)); // offset, words 40 | wordReadLeft <= wordReadLeft + d; 41 | end 42 | endrule 43 | 44 | Reg#(DMAWord) lastRecvWord <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 45 | 46 | rule recvDMAData; 47 | let rd <- pcie.dmaReadWord; 48 | wordReadLeft <= wordReadLeft - 1; 49 | lastRecvWord <= rd; 50 | endrule 51 | 52 | Reg#(Bit#(32)) writeData <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 53 | rule sendDMAData ( wordWriteLeft > 0 ); 54 | pcie.dmaWriteData({writeData+3,writeData+2,writeData+1,writeData}); 55 | writeData <= writeData + 4; 56 | wordWriteLeft <= wordWriteLeft - 1; 57 | endrule 58 | 59 | rule readStat; 60 | let r <- pcie.dataReq; 61 | let a = r.addr; 62 | 63 | // PCIe IO is done at 4 byte granularities 64 | // lower 2 bits are always zero 65 | let offset = (a>>2); 66 | if ( offset == 0 ) begin 67 | pcie.dataSend(r, wordWriteLeft); 68 | end else if ( offset == 1 ) begin 69 | pcie.dataSend(r, wordWriteReq); 70 | end else if ( offset == 2 ) begin 71 | pcie.dataSend(r, wordReadLeft); 72 | end else begin 73 | let noff = (offset-3)*32; 74 | //pcie.dataSend(r, pcie.debug_data); 75 | pcie.dataSend(r, truncate(lastRecvWord>>noff)); 76 | end 77 | endrule 78 | 79 | endmodule 80 | -------------------------------------------------------------------------------- /examples/dmatest/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=kc705 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV= -p +:$(BLIBPATH)/ 8 | CUSTOMCPP_BSIM= $(BLIBPATH)/bdpi.cpp 9 | 10 | include $(BUILDTOOLS)/Makefile.base 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/dmatest/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks :: *; 5 | import ClockImport::*; 6 | import DefaultValue :: *; 7 | 8 | import PcieImport :: *; 9 | import PcieCtrl :: *; 10 | import PcieCtrl_bsim :: *; 11 | 12 | import Clocks :: *; 13 | import FIFO::*; 14 | 15 | import HwMain::*; 16 | 17 | //import Platform :: *; 18 | 19 | //import NullReset :: *; 20 | //import IlaImport :: *; 21 | 22 | interface TopIfc; 23 | (* always_ready *) 24 | interface PcieImportPins pcie_pins; 25 | (* always_ready *) 26 | method Bit#(4) led; 27 | endinterface 28 | 29 | (* no_default_clock, no_default_reset *) 30 | module mkProjectTop #( 31 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 32 | Clock sys_clk_p, Clock sys_clk_n, 33 | Reset pcie_rst_n 34 | ) 35 | (TopIfc); 36 | 37 | 38 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 39 | Clock pcie_clk_buf = pcie.sys_clk_o; 40 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 41 | 42 | ClockGenIfc clk_200mhz_import <- mkClockIBUFDSImport(sys_clk_p, sys_clk_n); 43 | Clock sys_clk_200mhz = clk_200mhz_import.gen_clk; 44 | ClockGenIfc sys_clk_200mhz_buf_import <- mkClockBUFGImport(clocked_by sys_clk_200mhz); 45 | Clock sys_clk_200mhz_buf = sys_clk_200mhz_buf_import.gen_clk; 46 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n, sys_clk_200mhz_buf); 47 | 48 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 49 | /* 50 | ClockGenerator7Params clk_params = defaultValue(); 51 | clk_params.clkin1_period = 10.000; // 100 MHz reference 52 | clk_params.clkin_buffer = False; // necessary buffer is instanced above 53 | clk_params.reset_stages = 0; // no sync on reset so input clock has pll as only load 54 | clk_params.clkfbout_mult_f = 10.000; // 1000 MHz VCO 55 | clk_params.clkout0_divide_f = 4; // 250MHz clock 56 | clk_params.clkout1_divide = 8; // 125MHz clock 57 | ClockGenerator7 clk_gen <- mkClockGenerator7(clk_params, clocked_by sys_clk_buf, reset_by sys_rst_n_buf); 58 | Clock clk250 = clk_gen.clkout0; 59 | Reset rst250 <- mkAsyncReset( 4, sys_rst_n_buf, clk250); 60 | 61 | Clock clk125 = clk_gen.clkout0; 62 | Reset rst125 <- mkAsyncReset( 4, sys_rst_n_buf, clk125); 63 | */ 64 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by sys_clk_200mhz_buf, reset_by rst200); 65 | 66 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 67 | 68 | // Interfaces //// 69 | interface PcieImportPins pcie_pins = pcie.pins; 70 | 71 | method Bit#(4) led; 72 | //return leddata; 73 | return 0; 74 | endmethod 75 | endmodule 76 | 77 | module mkProjectTop_bsim (Empty); 78 | Clock curclk <- exposeCurrentClock; 79 | 80 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 81 | 82 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user); 83 | endmodule 84 | -------------------------------------------------------------------------------- /examples/dmatest/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp 6 | LIB= -lrt -lpthread 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g -O2 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/dmatest/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "bdbmpcie.h" 5 | //#include "dmasplitter.h" 6 | 7 | double timespec_diff_sec( timespec start, timespec end ) { 8 | double t = end.tv_sec - start.tv_sec; 9 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 10 | return t; 11 | } 12 | 13 | int main(int argc, char** argv) { 14 | BdbmPcie* pcie = BdbmPcie::getInstance(); 15 | //DMASplitter* dma = DMASplitter::getInstance(); 16 | 17 | //uint32_t size = 128*128; 18 | 19 | /* 20 | if ( argc > 1 ) { 21 | size = atoi(argv[1]); 22 | } 23 | */ 24 | unsigned int d = pcie->readWord(0); 25 | printf( "Magic: %x\n", d ); 26 | fflush(stdout); 27 | d = pcie->readWord(32); 28 | printf( "Dma Addr 0: %x\n", d ); 29 | fflush(stdout); 30 | 31 | 32 | //uint8_t* dmabuf = (uint8_t*)dma->dmaBuffer(); 33 | uint8_t* dmabuf = (uint8_t*)pcie->dmaBuffer(); 34 | for ( int i = 0; i < 8*1024; i++ ) { 35 | dmabuf[i] = (char)i; 36 | } 37 | for ( int i = 0; i < 16; i++ ) { 38 | dmabuf[i] = 0xaa; 39 | } 40 | 41 | timespec start; 42 | timespec now; 43 | clock_gettime(CLOCK_REALTIME, & start); 44 | for ( int i = 0; i < 1024*1024*32; i++ ) { 45 | pcie->userWriteWord(0, 8); 46 | } 47 | 48 | clock_gettime(CLOCK_REALTIME, & now); 49 | double diff = timespec_diff_sec(start, now); 50 | printf( "DMA HW->SW elapsed: %f\n", diff ); 51 | sleep(1); 52 | 53 | printf( "read 0: %x\n", pcie->userReadWord(0) ); 54 | printf( "read 4: %x\n", pcie->userReadWord(4) ); 55 | printf( "read 8: %x\n", pcie->userReadWord(8) ); 56 | 57 | 58 | uint32_t* buf32 = (uint32_t*)dmabuf; 59 | for ( int i = 0; i < 8; i++ ) { 60 | printf( "%d %x\n", i, buf32[i] ); 61 | buf32[i] = i; 62 | } 63 | 64 | clock_gettime(CLOCK_REALTIME, & start); 65 | for ( int i = 0; i < 1024*1024*32; i++ ) { 66 | pcie->userWriteWord(4, 8); 67 | } 68 | 69 | clock_gettime(CLOCK_REALTIME, & now); 70 | diff = timespec_diff_sec(start, now); 71 | printf( "DMA SW->HW elapsed: %f\n", diff ); 72 | 73 | sleep(1); 74 | 75 | printf( "read 0: %x\n", pcie->userReadWord(0) ); 76 | printf( "read 4: %x\n", pcie->userReadWord(4) ); 77 | printf( "read 8: %x\n", pcie->userReadWord(8) ); 78 | printf( "read 12: %x\n", pcie->userReadWord(12) ); 79 | printf( "read 16: %x\n", pcie->userReadWord(16) ); 80 | printf( "read 20: %x\n", pcie->userReadWord(20) ); 81 | printf( "read 24: %x\n", pcie->userReadWord(24) ); 82 | 83 | sleep(1); 84 | 85 | //printf( "Requesting memread with size %d\n", size ); 86 | //dma->sendWord(0, size,0,0,0); 87 | 88 | /* 89 | while (1) { 90 | PCIeWord w = dma->recvWord(); 91 | printf( "%x %x %x %x %x\n", w.header, w.d[0], w.d[1], w.d[2], w.d[3] ); 92 | if ( w.header == 0 ) break; 93 | } 94 | */ 95 | 96 | 97 | 98 | 99 | /* 100 | FlashManager* flash = FlashManager::getInstance(); 101 | 102 | uint32_t* pageBufferW = (uint32_t*)malloc(8192+32); 103 | uint32_t* pageBufferR = (uint32_t*)malloc(8192+32); 104 | for ( int i = 0; i < 8192/4; i++ ) { 105 | pageBufferW[i] = i; 106 | } 107 | printf( "Sending erase message:\n" ); 108 | flash->eraseBlock(1,1,1); 109 | //sleep(1); 110 | flash->writePage(1,1,1,0, pageBufferW); 111 | 112 | sleep(5); 113 | 114 | exit(1); 115 | 116 | 117 | printf( "\t\tSending read cmd\n" ); 118 | 119 | 120 | flash->readPage(1,1,1,0, pageBufferR); 121 | 122 | 123 | printf( "\t\tStarting read\n" ); 124 | sleep(1); 125 | 126 | for ( int i = 0; i < (8192+32)/4; i++ ) { 127 | int idx = i % 4; 128 | uint32_t r = pcie->readWord((1024+1+idx)*4); 129 | printf( "%d:%x ", i, r ); 130 | if ( i % 8 == 0 ) printf( "\n" ); 131 | } 132 | */ 133 | 134 | 135 | 136 | /* 137 | for ( int i = 0; i < 1024; i++ ) { 138 | pcie->writeWord((1024+3)*4, 0); 139 | } 140 | 141 | pcie->writeWord((1024+1)*4, 0); 142 | 143 | pcie->writeWord(1024*4, 4); 144 | for ( int i = 0; i < 32; i++ ) { 145 | pcie->writeWord(1024*4, 8); 146 | } 147 | sleep(1); 148 | pcie->writeWord((1024+1)*4, 0); 149 | 150 | 151 | for ( int i = 0; i < 4096; i++ ) { 152 | if ( ubuf[i] != 0 ) { 153 | printf( "%d: %x\n", i, ubuf[i] ); 154 | } 155 | } 156 | */ 157 | } 158 | -------------------------------------------------------------------------------- /examples/dmatest/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/dmatest/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/dmatest/user-ip.tcl: -------------------------------------------------------------------------------- 1 | ## None 2 | -------------------------------------------------------------------------------- /examples/dramtest/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | import DRAMController::*; 11 | 12 | interface HwMainIfc; 13 | endinterface 14 | 15 | module mkHwMain#(PcieUserIfc pcie, DRAMUserIfc dram) 16 | (HwMainIfc); 17 | 18 | Clock curClk <- exposeCurrentClock; 19 | Reset curRst <- exposeCurrentReset; 20 | 21 | Clock pcieclk = pcie.user_clk; 22 | Reset pcierst = pcie.user_rst; 23 | 24 | //DMASplitterIfc#(4) dma <- mkDMASplitter(pcie); 25 | 26 | Reg#(Bit#(32)) cycles <- mkReg(0); 27 | rule incCycle; 28 | cycles <= cycles + 1; 29 | endrule 30 | 31 | FIFO#(Tuple2#(Bit#(16),Bit#(16))) dramReadReqQ <- mkSizedBRAMFIFO(1024); // offset, words 32 | Reg#(Bit#(16)) dramReadReqCnt <- mkReg(0); 33 | Reg#(Bit#(16)) dramReadReqDone <- mkReg(0); 34 | Reg#(Bit#(16)) dramReqWordLeft <- mkReg(0); 35 | Reg#(Bit#(16)) dramReqWordOff <- mkReg(0); 36 | Reg#(Bit#(32)) startCycle <- mkReg(0); 37 | Reg#(Bit#(32)) elapsedCycle <- mkReg(0); 38 | rule startDRAMRead(dramReadReqCnt >= 1024 && dramReqWordLeft == 0); 39 | let r = dramReadReqQ.first; 40 | dramReadReqQ.deq; 41 | dramReqWordLeft <= tpl_2(r); 42 | dramReqWordOff <= tpl_1(r); 43 | dramReadReqDone <= dramReadReqDone + 1; 44 | if ( dramReadReqDone == 0 ) startCycle <= cycles; 45 | endrule 46 | FIFO#(Bool) isLastQ <- mkSizedFIFO(64); 47 | rule issueDRAMRead (dramReqWordLeft > 0 ); 48 | dramReqWordLeft <= dramReqWordLeft -1; 49 | dramReqWordOff <= dramReqWordOff + 1; 50 | dram.readReq(zeroExtend(dramReqWordOff)*64, 64); 51 | if ( dramReqWordLeft == 1 && dramReadReqDone == dramReadReqCnt ) isLastQ.enq(True); 52 | else isLastQ.enq(False); 53 | endrule 54 | rule procDRAMRead; 55 | let d <- dram.read; 56 | isLastQ.deq; 57 | if ( isLastQ.first ) elapsedCycle <= cycles-startCycle; 58 | endrule 59 | 60 | 61 | Reg#(Bit#(32)) wordReadLeft <- mkReg(0); 62 | Reg#(Bit#(32)) wordWriteLeft <- mkReg(0); 63 | Reg#(Bit#(32)) wordWriteReq <- mkReg(0); 64 | Reg#(Bit#(32)) dramWriteLeft <- mkReg(0); 65 | Reg#(Bit#(32)) dramReadLeft <- mkReg(0); 66 | Reg#(Bit#(32)) dramWriteStartCycle <- mkReg(0); 67 | Reg#(Bit#(32)) dramWriteEndCycle <- mkReg(0); 68 | 69 | 70 | 71 | rule getCmd ( wordWriteLeft == 0 ); 72 | let w <- pcie.dataReceive; 73 | let a = w.addr; 74 | let d = w.data; 75 | let off = (a>>2); 76 | if ( off == 0 ) begin 77 | wordWriteLeft <= d; 78 | wordWriteReq <= d; 79 | pcie.dmaWriteReq( 0, truncate(d)); // offset, words 80 | end else if ( off == 1 ) begin 81 | pcie.dmaReadReq( 0, truncate(d)); // offset, words 82 | wordReadLeft <= wordReadLeft + d; 83 | end else if ( off == 2 ) begin 84 | dramWriteLeft <= d; 85 | dramWriteStartCycle <= cycles; 86 | end else if ( off == 3 ) begin 87 | dramReadReqQ.enq(tuple2(truncate(d>>16), truncate(d))); 88 | dramReadReqCnt <= dramReadReqCnt + 1; 89 | end 90 | endrule 91 | 92 | rule dramWrite( dramWriteLeft > 0 ); 93 | dramWriteLeft <= dramWriteLeft - 1; 94 | Bit#(128) v0 = 128'h11112222333344445555666600000000 | zeroExtend(dramWriteLeft); 95 | Bit#(128) v1 = 128'hcccccccccccccccccccccccc00000000 | zeroExtend(dramWriteLeft); 96 | Bit#(128) v2 = 128'hdeadbeefdeadbeeddeadbeef00000000 | zeroExtend(dramWriteLeft); 97 | Bit#(128) v3 = 128'h88887777666655554444333300000000 | zeroExtend(dramWriteLeft); 98 | 99 | dram.write(zeroExtend(dramWriteLeft)*64, {v0,v1,v2,v3},64); 100 | if ( dramWriteLeft == 1 ) begin 101 | dramWriteEndCycle <= cycles; 102 | end 103 | endrule 104 | 105 | rule dramReadReq ( dramReadLeft > 0 ); 106 | dramReadLeft <= dramReadLeft - 1; 107 | 108 | dram.readReq(zeroExtend(dramReadLeft)*64, 64); 109 | endrule 110 | Reg#(Bit#(512)) dramReadVal <- mkReg(0); 111 | rule dramReadResp; 112 | let d <- dram.read; 113 | dramReadVal <= d; 114 | endrule 115 | 116 | Reg#(DMAWord) lastRecvWord <- mkReg(0); 117 | 118 | rule recvDMAData; 119 | wordReadLeft <= wordReadLeft - 1; 120 | let d <- pcie.dmaReadWord; 121 | lastRecvWord <= d; 122 | endrule 123 | 124 | Reg#(Bit#(32)) writeData <- mkReg(0); 125 | rule sendDMAData ( wordWriteLeft > 0 ); 126 | pcie.dmaWriteData({writeData+3,writeData+2,writeData+1,writeData}); 127 | writeData <= writeData + 4; 128 | wordWriteLeft <= wordWriteLeft - 1; 129 | endrule 130 | 131 | rule readStat; 132 | let r <- pcie.dataReq; 133 | let a = r.addr; 134 | 135 | // PCIe IO is done at 4 byte granularities 136 | // lower 2 bits are always zero 137 | let offset = (a>>2); 138 | if ( offset == 0 ) begin 139 | //pcie.dataSend(r, wordWriteLeft); 140 | pcie.dataSend(r, dramWriteLeft); 141 | end else if ( offset == 1 ) begin 142 | //pcie.dataSend(r, wordWriteReq); 143 | pcie.dataSend(r, dramReadLeft); 144 | end else if ( offset == 2 ) begin 145 | //pcie.dataSend(r, wordReadLeft); 146 | pcie.dataSend(r, dramWriteEndCycle-dramWriteStartCycle); 147 | end else begin 148 | //let noff = (offset-3)*32; 149 | //pcie.dataSend(r, pcie.debug_data); 150 | //pcie.dataSend(r, truncate(dramReadVal>>noff)); 151 | pcie.dataSend(r, elapsedCycle); 152 | 153 | end 154 | endrule 155 | 156 | endmodule 157 | -------------------------------------------------------------------------------- /examples/dramtest/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=kc705 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV= -p +:$(LIBPATH)/dram/src/:$(BLIBPATH)/ 8 | CUSTOMCPP_BSIM=$(BLIBPATH)/bdpi.cpp 9 | 10 | include $(BUILDTOOLS)/Makefile.base 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/dramtest/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks::*; 5 | import ClockImport::*; 6 | import DefaultValue::*; 7 | import FIFO::*; 8 | import Connectable::*; 9 | 10 | 11 | // PCIe stuff 12 | import PcieImport :: *; 13 | import PcieCtrl :: *; 14 | import PcieCtrl_bsim :: *; 15 | 16 | // DRAM stuff 17 | import DDR3Sim::*; 18 | import DDR3Controller::*; 19 | import DDR3Common::*; 20 | import DRAMController::*; 21 | 22 | import HwMain::*; 23 | 24 | //import Platform :: *; 25 | 26 | //import NullReset :: *; 27 | //import IlaImport :: *; 28 | 29 | interface TopIfc; 30 | (* always_ready *) 31 | interface PcieImportPins pcie_pins; 32 | (* always_ready *) 33 | method Bit#(4) led; 34 | 35 | interface DDR3_Pins_1GB pins_ddr3; 36 | endinterface 37 | 38 | (* no_default_clock, no_default_reset *) 39 | module mkProjectTop #( 40 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 41 | Clock sys_clk_p, Clock sys_clk_n, 42 | Reset pcie_rst_n 43 | ) 44 | (TopIfc); 45 | 46 | 47 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 48 | Clock pcie_clk_buf = pcie.sys_clk_o; 49 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 50 | 51 | 52 | ClockGenIfc clk_200mhz_import <- mkClockIBUFDSImport(sys_clk_p, sys_clk_n); 53 | Clock sys_clk_200mhz = clk_200mhz_import.gen_clk; 54 | ClockGenIfc sys_clk_200mhz_buf_import <- mkClockBUFGImport(clocked_by sys_clk_200mhz); 55 | Clock sys_clk_200mhz_buf = sys_clk_200mhz_buf_import.gen_clk; 56 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n_buf, sys_clk_200mhz_buf); 57 | 58 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 59 | 60 | 61 | Clock ddr_buf = sys_clk_200mhz_buf; 62 | Reset ddr3ref_rst_n <- mkAsyncResetFromCR(4, ddr_buf, reset_by pcieCtrl.user.user_rst); 63 | 64 | DDR3Common::DDR3_Configure ddr3_cfg = defaultValue; 65 | ddr3_cfg.reads_in_flight = 32; // adjust as needed 66 | DDR3_Controller_1GB ddr3_ctrl <- mkDDR3Controller_1GB(ddr3_cfg, ddr_buf, clocked_by ddr_buf, reset_by ddr3ref_rst_n); 67 | DRAMControllerIfc dramController <- mkDRAMController(ddr3_ctrl.user, clocked_by pcieCtrl.user.user_clk, reset_by pcieCtrl.user.user_rst); 68 | 69 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, dramController.user, clocked_by pcieCtrl.user.user_clk, reset_by pcieCtrl.user.user_rst); 70 | 71 | 72 | 73 | 74 | 75 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 76 | 77 | // Interfaces //// 78 | interface PcieImportPins pcie_pins = pcie.pins; 79 | 80 | interface DDR3_Pins_1GB pins_ddr3 = ddr3_ctrl.ddr3; 81 | 82 | method Bit#(4) led; 83 | //return leddata; 84 | return 0; 85 | endmethod 86 | endmodule 87 | 88 | module mkProjectTop_bsim (Empty); 89 | Clock curclk <- exposeCurrentClock; 90 | 91 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 92 | 93 | let ddr3_ctrl_user <- mkDDR3Simulator; 94 | DRAMControllerIfc dramController <- mkDRAMController(ddr3_ctrl_user); 95 | //mkConnection(dramController.ddr3_cli, ddr3_ctrl_user); 96 | 97 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, dramController.user); 98 | endmodule 99 | -------------------------------------------------------------------------------- /examples/dramtest/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp $(LIBPATH)/cpp/DRAMHostDMA.cpp 6 | LIB= -lrt -lpthread 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g -O2 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/dramtest/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "bdbmpcie.h" 7 | //#include "dmasplitter.h" 8 | 9 | double timespec_diff_sec( timespec start, timespec end ) { 10 | double t = end.tv_sec - start.tv_sec; 11 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 12 | return t; 13 | } 14 | 15 | int main(int argc, char** argv) { 16 | BdbmPcie* pcie = BdbmPcie::getInstance(); 17 | //DMASplitter* dma = DMASplitter::getInstance(); 18 | 19 | //uint32_t size = 128*128; 20 | 21 | /* 22 | if ( argc > 1 ) { 23 | size = atoi(argv[1]); 24 | } 25 | */ 26 | 27 | srand(time(NULL)); 28 | printf( "Sending block read req\n" ); 29 | for ( int i = 0; i < 1024; i++ ) { 30 | int roff = rand()&0xffff; 31 | uint32_t data = (roff<<16) | (2048/64); 32 | pcie->userWriteWord(12, data); 33 | printf( "%x data\n", data ); 34 | } 35 | sleep(2); 36 | uint32_t cycles = pcie->userReadWord(32); 37 | while ( cycles == 0 ) { 38 | cycles = pcie->userReadWord(32); 39 | printf( "cycles: %d\n", cycles ); 40 | sleep(1); 41 | } 42 | printf( "cycles: %d\n", cycles ); 43 | 44 | 45 | exit(0); 46 | 47 | 48 | unsigned int d = pcie->readWord(0); 49 | printf( "Magic: %x\n", d ); 50 | fflush(stdout); 51 | d = pcie->readWord(32); 52 | printf( "Dma Addr 0: %x\n", d ); 53 | fflush(stdout); 54 | 55 | printf( "read 0: %x\n", pcie->userReadWord(0) ); 56 | printf( "read 4: %x\n", pcie->userReadWord(4) ); 57 | printf( "read 8: %x\n", pcie->userReadWord(8) ); 58 | printf( "read 12: %x\n", pcie->userReadWord(12) ); 59 | printf( "read 16: %x\n", pcie->userReadWord(16) ); 60 | printf( "read 20: %x\n", pcie->userReadWord(20) ); 61 | printf( "read 24: %x\n", pcie->userReadWord(24) ); 62 | fflush(stdout); 63 | 64 | 65 | //uint8_t* dmabuf = (uint8_t*)dma->dmaBuffer(); 66 | uint8_t* dmabuf = (uint8_t*)pcie->dmaBuffer(); 67 | for ( int i = 0; i < 8*1024; i++ ) { 68 | dmabuf[i] = (char)i; 69 | } 70 | for ( int i = 0; i < 16; i++ ) { 71 | dmabuf[i] = 0xaa; 72 | } 73 | 74 | timespec start; 75 | timespec now; 76 | clock_gettime(CLOCK_REALTIME, & start); 77 | double diff = 0; 78 | 79 | pcie->userWriteWord(8, 1024); 80 | printf( "Sent write req\n" ); 81 | fflush(stdout); 82 | sleep(2); 83 | printf( "read 0: %x\n", pcie->userReadWord(0) ); 84 | printf( "read 4: %x\n", pcie->userReadWord(4) ); 85 | printf( "read 8: %x\n", pcie->userReadWord(8) ); 86 | printf( "read 12: %x\n", pcie->userReadWord(12) ); 87 | printf( "read 16: %x\n", pcie->userReadWord(16) ); 88 | printf( "read 20: %x\n", pcie->userReadWord(20) ); 89 | printf( "read 24: %x\n", pcie->userReadWord(24) ); 90 | fflush(stdout); 91 | 92 | printf( "Sending read req\n" ); 93 | fflush(stdout); 94 | pcie->userWriteWord(12, 2); 95 | printf( "Sent read req\n" ); 96 | fflush(stdout); 97 | sleep(1); 98 | printf( "read 0: %x\n", pcie->userReadWord(0) ); 99 | printf( "read 4: %x\n", pcie->userReadWord(4) ); 100 | printf( "read 8: %x\n", pcie->userReadWord(8) ); 101 | printf( "read 12: %x\n", pcie->userReadWord(12) ); 102 | printf( "read 16: %x\n", pcie->userReadWord(16) ); 103 | printf( "read 20: %x\n", pcie->userReadWord(20) ); 104 | printf( "read 24: %x\n", pcie->userReadWord(24) ); 105 | fflush(stdout); 106 | 107 | sleep(1); 108 | 109 | //printf( "Requesting memread with size %d\n", size ); 110 | //dma->sendWord(0, size,0,0,0); 111 | 112 | /* 113 | while (1) { 114 | PCIeWord w = dma->recvWord(); 115 | printf( "%x %x %x %x %x\n", w.header, w.d[0], w.d[1], w.d[2], w.d[3] ); 116 | if ( w.header == 0 ) break; 117 | } 118 | */ 119 | 120 | 121 | 122 | 123 | /* 124 | FlashManager* flash = FlashManager::getInstance(); 125 | 126 | uint32_t* pageBufferW = (uint32_t*)malloc(8192+32); 127 | uint32_t* pageBufferR = (uint32_t*)malloc(8192+32); 128 | for ( int i = 0; i < 8192/4; i++ ) { 129 | pageBufferW[i] = i; 130 | } 131 | printf( "Sending erase message:\n" ); 132 | flash->eraseBlock(1,1,1); 133 | //sleep(1); 134 | flash->writePage(1,1,1,0, pageBufferW); 135 | 136 | sleep(5); 137 | 138 | exit(1); 139 | 140 | 141 | printf( "\t\tSending read cmd\n" ); 142 | 143 | 144 | flash->readPage(1,1,1,0, pageBufferR); 145 | 146 | 147 | printf( "\t\tStarting read\n" ); 148 | sleep(1); 149 | 150 | for ( int i = 0; i < (8192+32)/4; i++ ) { 151 | int idx = i % 4; 152 | uint32_t r = pcie->readWord((1024+1+idx)*4); 153 | printf( "%d:%x ", i, r ); 154 | if ( i % 8 == 0 ) printf( "\n" ); 155 | } 156 | */ 157 | 158 | 159 | 160 | /* 161 | for ( int i = 0; i < 1024; i++ ) { 162 | pcie->writeWord((1024+3)*4, 0); 163 | } 164 | 165 | pcie->writeWord((1024+1)*4, 0); 166 | 167 | pcie->writeWord(1024*4, 4); 168 | for ( int i = 0; i < 32; i++ ) { 169 | pcie->writeWord(1024*4, 8); 170 | } 171 | sleep(1); 172 | pcie->writeWord((1024+1)*4, 0); 173 | 174 | 175 | for ( int i = 0; i < 4096; i++ ) { 176 | if ( ubuf[i] != 0 ) { 177 | printf( "%d: %x\n", i, ubuf[i] ); 178 | } 179 | } 180 | */ 181 | } 182 | -------------------------------------------------------------------------------- /examples/dramtest/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/dramtest/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/dramtest/user-ip.tcl: -------------------------------------------------------------------------------- 1 | set ddr3dir ../../../dram/$boardname/ 2 | 3 | ############# DDR3 Stuff 4 | read_ip $ddr3dir/core/ddr3_0/ddr3_0.xci 5 | read_verilog [ glob $ddr3dir/*.v ] 6 | read_xdc $ddr3dir/dram.xdc 7 | ############# end Flash Stuff 8 | 9 | -------------------------------------------------------------------------------- /examples/float/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | 11 | import Float32::*; 12 | import Float64::*; 13 | import Cordic::*; 14 | import BLMacMSFP::*; 15 | 16 | interface HwMainIfc; 17 | endinterface 18 | 19 | 20 | (* synthesize *) 21 | module mkBLMacMSFP12_3_Fixed(BLMacMSFP12_3ChannelIfc); 22 | BLMacMSFP12_3ChannelIfc pe <- mkBLMacMSFP12_3(53'h01abcb223c54a7d, 53'h01eccb634c5ae7d, 53'h01dc674c46d8c7a); 23 | return pe; 24 | endmodule 25 | 26 | module mkHwMain#(PcieUserIfc pcie) 27 | (HwMainIfc); 28 | 29 | Clock curClk <- exposeCurrentClock; 30 | Reset curRst <- exposeCurrentReset; 31 | 32 | Clock pcieclk = pcie.user_clk; 33 | Reset pcierst = pcie.user_rst; 34 | 35 | Reg#(Bit#(32)) dataBuffer0 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 36 | Reg#(Bit#(32)) dataBuffer1 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 37 | Reg#(Bit#(32)) writeCounter <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 38 | 39 | BLMacMSFP12_3ChannelIfc msfpe <- mkBLMacMSFP12_3_Fixed(clocked_by pcieclk, reset_by pcierst); 40 | 41 | FpPairIfc#(64) mult <- mkFpMult64(clocked_by pcieclk, reset_by pcierst); 42 | FpFilterIfc#(64) sqrt <- mkFpSqrt64(clocked_by pcieclk, reset_by pcierst); 43 | CordicSinCosIfc sincos <- mkCordicSinCos(clocked_by pcieclk, reset_by pcierst); 44 | 45 | Reg#(Bit#(64)) doubleResultBuffer <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 46 | Reg#(Bit#(64)) doubleResultBuffer2 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 47 | Reg#(Bit#(32)) cordicResultBuffer <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 48 | 49 | 50 | Reg#(Bit#(64)) doubleBuffer1 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 51 | Reg#(Bit#(64)) doubleBuffer2 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 52 | 53 | rule echoRead; 54 | // read request handle must be returned with pcie.dataSend 55 | let r <- pcie.dataReq; 56 | let a = r.addr; 57 | 58 | // PCIe IO is done at 4 byte granularities 59 | // lower 2 bits are always zero 60 | let offset = (a>>2); 61 | if ( offset == 0 ) begin 62 | pcie.dataSend(r, truncate(doubleResultBuffer)); 63 | end else if ( offset == 1 ) begin 64 | pcie.dataSend(r, truncate(doubleResultBuffer>>32)); 65 | end else if ( offset == 2 ) begin 66 | pcie.dataSend(r, truncate(doubleResultBuffer2)); 67 | end else if ( offset == 3 ) begin 68 | pcie.dataSend(r, truncate(doubleResultBuffer2>>32)); 69 | end else if ( offset == 4 ) begin 70 | pcie.dataSend(r, cordicResultBuffer); 71 | end else if ( offset == 5 ) begin 72 | msfpe.deq; 73 | let rt = msfpe.first; 74 | Bit#(48) t = pack(rt); 75 | pcie.dataSend(r, truncate(t)^truncateLSB(t)); 76 | end else begin 77 | //pcie.dataSend(r, pcie.debug_data); 78 | pcie.dataSend(r, writeCounter); 79 | end 80 | $display( "Received read req at %x", r.addr ); 81 | endrule 82 | rule recvWrite; 83 | let w <- pcie.dataReceive; 84 | let a = w.addr; 85 | let d = w.data; 86 | 87 | // PCIe IO is done at 4 byte granularities 88 | // lower 2 bits are always zero 89 | let off = (a>>2); 90 | if ( off == 0 ) begin 91 | doubleBuffer1 <= zeroExtend(d); 92 | end else if ( off == 1 ) begin 93 | doubleBuffer1 <= doubleBuffer1 | (zeroExtend(d)<<32); 94 | end else if ( off == 2 ) begin 95 | doubleBuffer2 <= zeroExtend(d); 96 | end else if ( off == 3 ) begin 97 | Bit#(64) b2 = doubleBuffer2 | (zeroExtend(d)<<32); 98 | mult.enq(doubleBuffer1, b2); 99 | sqrt.enq(b2); 100 | 101 | // FIXME 102 | msfpe.enq(unpack(truncate({doubleBuffer2,doubleBuffer1}))); 103 | end else if ( off == 4 ) begin 104 | sincos.enq(truncate(d)); 105 | end else begin 106 | //pcie.assertUptrain; 107 | writeCounter <= writeCounter + 1; 108 | end 109 | $display( "Received write req at %x : %x", a, d ); 110 | endrule 111 | rule rrrr; 112 | Bit#(64) d = mult.first; 113 | mult.deq; 114 | doubleResultBuffer <= d; 115 | $display( "mult %x ", d ); 116 | endrule 117 | rule rrrr2; 118 | Bit#(64) d = sqrt.first; 119 | sqrt.deq; 120 | doubleResultBuffer2 <= d; 121 | $display( "sqrt %x ", d ); 122 | endrule 123 | rule rrrr3; 124 | let d = sincos.first; 125 | sincos.deq; 126 | cordicResultBuffer <= {tpl_1(d),tpl_2(d)}; 127 | $display( "sincos %x ", d ); 128 | endrule 129 | 130 | endmodule 131 | -------------------------------------------------------------------------------- /examples/float/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=kc705 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV= -p +:$(BLIBPATH)/:$(BLIBPATH)/msfp/ 8 | CUSTOMCPP_BSIM= $(BLIBPATH)/bdpi.cpp 9 | 10 | include $(BUILDTOOLS)/Makefile.base 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/float/README.md: -------------------------------------------------------------------------------- 1 | # Bluelib floating point test project 2 | 3 | https://github.com/sangwoojun/bluelib needs to be cloned at the same directory of bluespecpcie 4 | 5 | -------------------------------------------------------------------------------- /examples/float/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks :: *; 5 | import ClockImport::*; 6 | import DefaultValue :: *; 7 | 8 | import PcieImport :: *; 9 | import PcieCtrl :: *; 10 | import PcieCtrl_bsim :: *; 11 | 12 | import Clocks :: *; 13 | import FIFO::*; 14 | 15 | import HwMain::*; 16 | 17 | //import Platform :: *; 18 | 19 | //import NullReset :: *; 20 | //import IlaImport :: *; 21 | 22 | interface TopIfc; 23 | (* always_ready *) 24 | interface PcieImportPins pcie_pins; 25 | (* always_ready *) 26 | method Bit#(4) led; 27 | endinterface 28 | 29 | (* no_default_clock, no_default_reset *) 30 | module mkProjectTop #( 31 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 32 | Clock sys_clk_p, Clock sys_clk_n, 33 | Reset pcie_rst_n 34 | ) 35 | (TopIfc); 36 | 37 | 38 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 39 | Clock pcie_clk_buf = pcie.sys_clk_o; 40 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 41 | 42 | ClockGenIfc clk_200mhz_import <- mkClockIBUFDSImport(sys_clk_p, sys_clk_n); 43 | Clock sys_clk_200mhz = clk_200mhz_import.gen_clk; 44 | ClockGenIfc sys_clk_200mhz_buf_import <- mkClockBUFGImport(clocked_by sys_clk_200mhz); 45 | Clock sys_clk_200mhz_buf = sys_clk_200mhz_buf_import.gen_clk; 46 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n, sys_clk_200mhz_buf); 47 | 48 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 49 | /* 50 | ClockGenerator7Params clk_params = defaultValue(); 51 | clk_params.clkin1_period = 10.000; // 100 MHz reference 52 | clk_params.clkin_buffer = False; // necessary buffer is instanced above 53 | clk_params.reset_stages = 0; // no sync on reset so input clock has pll as only load 54 | clk_params.clkfbout_mult_f = 10.000; // 1000 MHz VCO 55 | clk_params.clkout0_divide_f = 4; // 250MHz clock 56 | clk_params.clkout1_divide = 8; // 125MHz clock 57 | ClockGenerator7 clk_gen <- mkClockGenerator7(clk_params, clocked_by sys_clk_buf, reset_by sys_rst_n_buf); 58 | Clock clk250 = clk_gen.clkout0; 59 | Reset rst250 <- mkAsyncReset( 4, sys_rst_n_buf, clk250); 60 | 61 | Clock clk125 = clk_gen.clkout0; 62 | Reset rst125 <- mkAsyncReset( 4, sys_rst_n_buf, clk125); 63 | */ 64 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by sys_clk_200mhz_buf, reset_by rst200); 65 | 66 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 67 | 68 | // Interfaces //// 69 | interface PcieImportPins pcie_pins = pcie.pins; 70 | 71 | method Bit#(4) led; 72 | //return leddata; 73 | return 0; 74 | endmethod 75 | endmodule 76 | 77 | module mkProjectTop_bsim (Empty); 78 | Clock curclk <- exposeCurrentClock; 79 | 80 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 81 | 82 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user); 83 | endmodule 84 | -------------------------------------------------------------------------------- /examples/float/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp $(LIBPATH)/cpp/DRAMHostDMA.cpp 6 | LIB= -lrt 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/float/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "bdbmpcie.h" 6 | #include "dmasplitter.h" 7 | 8 | double timespec_diff_sec( timespec start, timespec end ) { 9 | double t = end.tv_sec - start.tv_sec; 10 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 11 | return t; 12 | } 13 | 14 | float fixed_to_float_2i(uint32_t fixed) { 15 | fixed = (fixed&((1<<16)-1)); 16 | // 16 bits, 2 bits integer 17 | bool sign = (fixed>>15); 18 | 19 | float ret = 0; 20 | if ( sign ) { 21 | uint32_t fixedneg = ((~fixed)+1)&((1<<16)-1); 22 | //uint32_t fixedneg = (~fixed)+1; 23 | ret = -((float)fixedneg)/(1<<14); 24 | } 25 | else { 26 | ret = ((float)fixed)/(1<<14); 27 | } 28 | 29 | return ret; 30 | } 31 | 32 | uint32_t float_to_fixed_3i(float radian) { 33 | // 16 bits, 3 bits integer 34 | uint32_t integer_portion = (uint32_t)radian; 35 | uint32_t frac_portion = (uint32_t)((radian - integer_portion) * (1<<13)); 36 | uint32_t fixed = (integer_portion<<13)|frac_portion; 37 | 38 | return fixed; 39 | } 40 | 41 | 42 | int main(int argc, char** argv) { 43 | //printf( "Software startec\n" ); fflush(stdout); 44 | BdbmPcie* pcie = BdbmPcie::getInstance(); 45 | 46 | unsigned int d = pcie->readWord(0); 47 | printf( "Magic: %x\n", d ); 48 | fflush(stdout); 49 | 50 | double a = 0.13; 51 | double b = 99.15; 52 | uint64_t av = *(uint64_t*)&a; 53 | uint64_t bv = *(uint64_t*)&b; 54 | 55 | pcie->userWriteWord(0, (uint32_t)(av)); 56 | pcie->userWriteWord(4, (uint32_t)(av>>32)); 57 | pcie->userWriteWord(8, (uint32_t)(bv)); 58 | pcie->userWriteWord(12, (uint32_t)(bv>>32)); 59 | 60 | float radian = 4.71238898; // 180 + 90 degrees 61 | pcie->userWriteWord(16, float_to_fixed_3i(radian)); 62 | 63 | 64 | 65 | sleep(1); 66 | 67 | uint64_t a1 = pcie->userReadWord(0); 68 | uint64_t a2 = pcie->userReadWord(4); 69 | uint64_t ar = (a2<<32)|a1; 70 | uint64_t b1 = pcie->userReadWord(8); 71 | uint64_t b2 = pcie->userReadWord(12); 72 | uint64_t br = (b2<<32)|b1; 73 | 74 | printf( "%f %f\n", *(double*)&ar, *(double*)&br ); 75 | 76 | uint32_t sincos = pcie->userReadWord(16); 77 | uint32_t cos = (sincos&0xffff); 78 | uint32_t sin = ((sincos>>16)&0xffff); 79 | printf( "sin: %f cos: %f\n", fixed_to_float_2i(sin), fixed_to_float_2i(cos) ); 80 | 81 | for ( int i = 0; i < 6; i++ ) { 82 | pcie->userWriteWord(16, float_to_fixed_3i((3.14159/180)*30*i)); 83 | 84 | usleep(10000); 85 | 86 | uint32_t sincos = pcie->userReadWord(16); 87 | uint32_t cos = (sincos&0xffff); 88 | uint32_t sin = ((sincos>>16)&0xffff); 89 | printf( "%d sin: %f cos: %f\n", i, fixed_to_float_2i(sin), fixed_to_float_2i(cos) ); 90 | } 91 | 92 | exit(0); 93 | } 94 | -------------------------------------------------------------------------------- /examples/float/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/float/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/float/user-ip.tcl: -------------------------------------------------------------------------------- 1 | set libdir ../../../../bluelib/src/coregen/$boardname/ 2 | source $libdir/../fp_import.tcl 3 | -------------------------------------------------------------------------------- /examples/queuealu/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=vc707 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV= -p +:$(BLIBPATH)/ 8 | CUSTOMCPP_BSIM= $(BLIBPATH)/bdpi.cpp 9 | 10 | include $(BUILDTOOLS)/Makefile.base 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/queuealu/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks :: *; 5 | import ClockImport::*; 6 | import DefaultValue :: *; 7 | 8 | import PcieImport :: *; 9 | import PcieCtrl :: *; 10 | import PcieCtrl_bsim :: *; 11 | 12 | import Clocks :: *; 13 | import FIFO::*; 14 | 15 | import HwMain::*; 16 | 17 | //import Platform :: *; 18 | 19 | //import NullReset :: *; 20 | //import IlaImport :: *; 21 | 22 | interface TopIfc; 23 | (* always_ready *) 24 | interface PcieImportPins pcie_pins; 25 | (* always_ready *) 26 | method Bit#(4) led; 27 | endinterface 28 | 29 | (* no_default_clock, no_default_reset *) 30 | module mkProjectTop #( 31 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 32 | Clock sys_clk_p, Clock sys_clk_n, 33 | Reset pcie_rst_n 34 | ) 35 | (TopIfc); 36 | 37 | 38 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 39 | Clock pcie_clk_buf = pcie.sys_clk_o; 40 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 41 | 42 | ClockGenIfc clk_200mhz_import <- mkClockIBUFDSImport(sys_clk_p, sys_clk_n); 43 | Clock sys_clk_200mhz = clk_200mhz_import.gen_clk; 44 | ClockGenIfc sys_clk_200mhz_buf_import <- mkClockBUFGImport(clocked_by sys_clk_200mhz); 45 | Clock sys_clk_200mhz_buf = sys_clk_200mhz_buf_import.gen_clk; 46 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n_buf, sys_clk_200mhz_buf); 47 | 48 | 49 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 50 | /* 51 | ClockGenerator7Params clk_params = defaultValue(); 52 | clk_params.clkin1_period = 10.000; // 100 MHz reference 53 | clk_params.clkin_buffer = False; // necessary buffer is instanced above 54 | clk_params.reset_stages = 0; // no sync on reset so input clock has pll as only load 55 | clk_params.clkfbout_mult_f = 10.000; // 1000 MHz VCO 56 | clk_params.clkout0_divide_f = 4; // 250MHz clock 57 | clk_params.clkout1_divide = 8; // 125MHz clock 58 | ClockGenerator7 clk_gen <- mkClockGenerator7(clk_params, clocked_by sys_clk_buf, reset_by sys_rst_n_buf); 59 | Clock clk250 = clk_gen.clkout0; 60 | Reset rst250 <- mkAsyncReset( 4, sys_rst_n_buf, clk250); 61 | 62 | Clock clk125 = clk_gen.clkout0; 63 | Reset rst125 <- mkAsyncReset( 4, sys_rst_n_buf, clk125); 64 | */ 65 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by pcieCtrl.user.user_clk, reset_by pcieCtrl.user.user_rst); 66 | //HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by sys_clk_200mhz_buf, reset_by rst200); 67 | 68 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 69 | 70 | // Interfaces //// 71 | interface PcieImportPins pcie_pins = pcie.pins; 72 | 73 | method Bit#(4) led; 74 | //return leddata; 75 | return 0; 76 | endmethod 77 | endmodule 78 | 79 | module mkProjectTop_bsim (Empty); 80 | Clock curclk <- exposeCurrentClock; 81 | 82 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 83 | 84 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user); 85 | endmodule 86 | -------------------------------------------------------------------------------- /examples/queuealu/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp $(LIBPATH)/cpp/DRAMHostDMA.cpp 6 | LIB= -lrt 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/queuealu/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "bdbmpcie.h" 6 | #include "dmasplitter.h" 7 | 8 | double timespec_diff_sec( timespec start, timespec end ) { 9 | double t = end.tv_sec - start.tv_sec; 10 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 11 | return t; 12 | } 13 | 14 | float fixed_to_float_2i(uint32_t fixed) { 15 | fixed = (fixed&((1<<16)-1)); 16 | // 16 bits, 2 bits integer 17 | bool sign = (fixed>>15); 18 | 19 | float ret = 0; 20 | if ( sign ) { 21 | uint32_t fixedneg = ((~fixed)+1)&((1<<16)-1); 22 | //uint32_t fixedneg = (~fixed)+1; 23 | ret = -((float)fixedneg)/(1<<14); 24 | } 25 | else { 26 | ret = ((float)fixed)/(1<<14); 27 | } 28 | 29 | return ret; 30 | } 31 | 32 | uint32_t float_to_fixed_3i(float radian) { 33 | // 16 bits, 3 bits integer 34 | uint32_t integer_portion = (uint32_t)radian; 35 | uint32_t frac_portion = (uint32_t)((radian - integer_portion) * (1<<13)); 36 | uint32_t fixed = (integer_portion<<13)|frac_portion; 37 | 38 | return fixed; 39 | } 40 | 41 | 42 | int main(int argc, char** argv) { 43 | //printf( "Software startec\n" ); fflush(stdout); 44 | BdbmPcie* pcie = BdbmPcie::getInstance(); 45 | 46 | unsigned int d = pcie->readWord(0); 47 | printf( "Magic: %x\n", d ); 48 | fflush(stdout); 49 | 50 | exit(0); 51 | 52 | double a = 0.13; 53 | double b = 99.15; 54 | uint64_t av = *(uint64_t*)&a; 55 | uint64_t bv = *(uint64_t*)&b; 56 | 57 | pcie->userWriteWord(0, (uint32_t)(av)); 58 | pcie->userWriteWord(4, (uint32_t)(av>>32)); 59 | pcie->userWriteWord(8, (uint32_t)(bv)); 60 | pcie->userWriteWord(12, (uint32_t)(bv>>32)); 61 | 62 | float radian = 4.71238898; // 180 + 90 degrees 63 | pcie->userWriteWord(16, float_to_fixed_3i(radian)); 64 | 65 | 66 | 67 | sleep(1); 68 | 69 | uint64_t a1 = pcie->userReadWord(0); 70 | uint64_t a2 = pcie->userReadWord(4); 71 | uint64_t ar = (a2<<32)|a1; 72 | uint64_t b1 = pcie->userReadWord(8); 73 | uint64_t b2 = pcie->userReadWord(12); 74 | uint64_t br = (b2<<32)|b1; 75 | 76 | printf( "%f %f\n", *(double*)&ar, *(double*)&br ); 77 | 78 | uint32_t sincos = pcie->userReadWord(16); 79 | uint32_t cos = (sincos&0xffff); 80 | uint32_t sin = ((sincos>>16)&0xffff); 81 | printf( "sin: %f cos: %f\n", fixed_to_float_2i(sin), fixed_to_float_2i(cos) ); 82 | 83 | for ( int i = 0; i < 6; i++ ) { 84 | pcie->userWriteWord(16, float_to_fixed_3i((3.14159/180)*30*i)); 85 | 86 | usleep(10000); 87 | 88 | uint32_t sincos = pcie->userReadWord(16); 89 | uint32_t cos = (sincos&0xffff); 90 | uint32_t sin = ((sincos>>16)&0xffff); 91 | printf( "%d sin: %f cos: %f\n", i, fixed_to_float_2i(sin), fixed_to_float_2i(cos) ); 92 | } 93 | 94 | exit(0); 95 | } 96 | -------------------------------------------------------------------------------- /examples/queuealu/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/queuealu/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/queuealu/user-ip.tcl: -------------------------------------------------------------------------------- 1 | set libdir ../../../../bluelib/src/coregen/$boardname/ 2 | source $libdir/../fp_import.tcl 3 | -------------------------------------------------------------------------------- /examples/simple/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | 11 | interface HwMainIfc; 12 | endinterface 13 | 14 | 15 | module mkHwMain#(PcieUserIfc pcie) 16 | (HwMainIfc); 17 | 18 | Clock curClk <- exposeCurrentClock; 19 | Reset curRst <- exposeCurrentReset; 20 | 21 | Clock pcieclk = pcie.user_clk; 22 | Reset pcierst = pcie.user_rst; 23 | 24 | Reg#(Bit#(32)) dataBuffer0 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 25 | Reg#(Bit#(32)) dataBuffer1 <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 26 | Reg#(Bit#(32)) writeCounter <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 27 | 28 | 29 | rule echoRead; 30 | // read request handle must be returned with pcie.dataSend 31 | let r <- pcie.dataReq; 32 | let a = r.addr; 33 | 34 | // PCIe IO is done at 4 byte granularities 35 | // lower 2 bits are always zero 36 | let offset = (a>>2); 37 | if ( offset == 0 ) begin 38 | pcie.dataSend(r, dataBuffer0); 39 | end else if ( offset == 1 ) begin 40 | pcie.dataSend(r, dataBuffer1); 41 | end else begin 42 | pcie.dataSend(r, pcie.debug_data); 43 | end 44 | $display( "Received read req at %x", r.addr ); 45 | endrule 46 | 47 | 48 | 49 | Vector#(16, Reg#(Bit#(32))) writeBuffer <- replicateM(mkReg(0)); 50 | Reg#(Bit#(4)) writeBufferCnt <- mkReg(0); 51 | 52 | 53 | 54 | rule recvWrite; 55 | let w <- pcie.dataReceive; 56 | let a = w.addr; 57 | let d = w.data; 58 | 59 | if ( a == 0 ) begin // command 60 | end else if ( a == 4 ) begin // data load 61 | for ( Integer i = 1; i < 15; i=i+1 ) begin 62 | writeBuffer[i+1] <= writeBuffer[i]; 63 | end 64 | writeBuffer[0] <= d; 65 | if ( writeBufferCnt == 15 ) begin 66 | writeBufferCnt <= 0; 67 | end else begin 68 | writeBufferCnt <= writeBufferCnt + 1; 69 | end 70 | end 71 | 72 | // PCIe IO is done at 4 byte granularities 73 | // lower 2 bits are always zero 74 | let off = (a>>2); 75 | if ( off == 0 ) begin 76 | dataBuffer0 <= d; 77 | end else if ( off == 1 ) begin 78 | dataBuffer1 <= d; 79 | end else begin 80 | //pcie.assertUptrain; 81 | writeCounter <= writeCounter + 1; 82 | end 83 | $display( "Received write req at %x : %x", a, d ); 84 | endrule 85 | 86 | endmodule 87 | -------------------------------------------------------------------------------- /examples/simple/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=vc707 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV= -p +:$(BLIBPATH)/ 8 | CUSTOMCPP_BSIM= $(BLIBPATH)/bdpi.cpp 9 | 10 | 11 | include $(BUILDTOOLS)/Makefile.base 12 | 13 | 14 | -------------------------------------------------------------------------------- /examples/simple/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks :: *; 5 | import ClockImport::*; 6 | import DefaultValue :: *; 7 | 8 | import PcieImport :: *; 9 | import PcieCtrl :: *; 10 | import PcieCtrl_bsim :: *; 11 | 12 | import Clocks :: *; 13 | import FIFO::*; 14 | 15 | import HwMain::*; 16 | 17 | //import Platform :: *; 18 | 19 | //import NullReset :: *; 20 | //import IlaImport :: *; 21 | 22 | interface TopIfc; 23 | (* always_ready *) 24 | interface PcieImportPins pcie_pins; 25 | (* always_ready *) 26 | method Bit#(4) led; 27 | endinterface 28 | 29 | (* no_default_clock, no_default_reset *) 30 | module mkProjectTop #( 31 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 32 | Clock sys_clk_p, Clock sys_clk_n, 33 | Reset pcie_rst_n 34 | ) 35 | (TopIfc); 36 | 37 | 38 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 39 | Clock pcie_clk_buf = pcie.sys_clk_o; 40 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 41 | 42 | ClockGenIfc clk_200mhz_import <- mkClockIBUFDSImport(sys_clk_p, sys_clk_n); 43 | Clock sys_clk_200mhz = clk_200mhz_import.gen_clk; 44 | ClockGenIfc sys_clk_200mhz_buf_import <- mkClockBUFGImport(clocked_by sys_clk_200mhz); 45 | Clock sys_clk_200mhz_buf = sys_clk_200mhz_buf_import.gen_clk; 46 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n, sys_clk_200mhz_buf); 47 | 48 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 49 | /* 50 | ClockGenerator7Params clk_params = defaultValue(); 51 | clk_params.clkin1_period = 10.000; // 100 MHz reference 52 | clk_params.clkin_buffer = False; // necessary buffer is instanced above 53 | clk_params.reset_stages = 0; // no sync on reset so input clock has pll as only load 54 | clk_params.clkfbout_mult_f = 10.000; // 1000 MHz VCO 55 | clk_params.clkout0_divide_f = 4; // 250MHz clock 56 | clk_params.clkout1_divide = 8; // 125MHz clock 57 | ClockGenerator7 clk_gen <- mkClockGenerator7(clk_params, clocked_by sys_clk_buf, reset_by sys_rst_n_buf); 58 | Clock clk250 = clk_gen.clkout0; 59 | Reset rst250 <- mkAsyncReset( 4, sys_rst_n_buf, clk250); 60 | 61 | Clock clk125 = clk_gen.clkout0; 62 | Reset rst125 <- mkAsyncReset( 4, sys_rst_n_buf, clk125); 63 | */ 64 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by sys_clk_200mhz_buf, reset_by rst200); 65 | 66 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 67 | 68 | // Interfaces //// 69 | interface PcieImportPins pcie_pins = pcie.pins; 70 | 71 | method Bit#(4) led; 72 | //return leddata; 73 | return 0; 74 | endmethod 75 | endmodule 76 | 77 | module mkProjectTop_bsim (Empty); 78 | Clock curclk <- exposeCurrentClock; 79 | 80 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 81 | 82 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user); 83 | endmodule 84 | -------------------------------------------------------------------------------- /examples/simple/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp $(LIBPATH)/cpp/DRAMHostDMA.cpp 6 | LIB= -lrt 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/simple/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "bdbmpcie.h" 6 | #include "dmasplitter.h" 7 | 8 | double timespec_diff_sec( timespec start, timespec end ) { 9 | double t = end.tv_sec - start.tv_sec; 10 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 11 | return t; 12 | } 13 | 14 | 15 | int main(int argc, char** argv) { 16 | //printf( "Software startec\n" ); fflush(stdout); 17 | BdbmPcie* pcie = BdbmPcie::getInstance(); 18 | 19 | unsigned int d = pcie->readWord(0); 20 | printf( "Magic: %x\n", d ); 21 | fflush(stdout); 22 | 23 | pcie->userWriteWord(4, 0xdeadbeef); 24 | pcie->userWriteWord(0, 0xcafef00d); 25 | 26 | pcie->userWriteWord(12, 0); 27 | pcie->Ioctl(1,0); // refresh link 28 | sleep(1); 29 | for ( int i = 0; i < 8; i++ ) { 30 | printf( "read: %x\n", pcie->userReadWord(i*4) ); 31 | //sleep(1); 32 | } 33 | 34 | printf( "Starting performance testing\n" ); 35 | timespec start; 36 | timespec now; 37 | 38 | clock_gettime(CLOCK_REALTIME, & start); 39 | //for ( int i = 0; i < 1024*1024*256/4; i++ ) { // 256MB 40 | for ( int i = 0; i < 1024; i++ ) { // 256MB 41 | pcie->userWriteWord(8, 0xcccccaaf); 42 | //usleep(1001); 43 | } 44 | clock_gettime(CLOCK_REALTIME, & now); 45 | double diff = timespec_diff_sec(start, now); 46 | 47 | printf( "read: %x\n", pcie->userReadWord(4) ); 48 | printf( "Write elapsed: %f\n", diff ); 49 | fflush(stdout); 50 | 51 | clock_gettime(CLOCK_REALTIME, & start); 52 | 53 | //for ( int i = 0; i < 1024*1024*256/4; i++ ) { // 256MB 54 | for ( int i = 0; i < 1024; i++ ) { // 256MB 55 | pcie->userReadWord(4); 56 | } 57 | clock_gettime(CLOCK_REALTIME, & now); 58 | diff = timespec_diff_sec(start, now); 59 | printf( "Read elapsed: %f\n", diff ); 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /examples/simple/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/simple/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/simple/user-ip.tcl: -------------------------------------------------------------------------------- 1 | ## None 2 | -------------------------------------------------------------------------------- /examples/sortreduce/DMAReadOrdered.bsv: -------------------------------------------------------------------------------- 1 | package DMAReadOrdered; 2 | 3 | import FIFO::*; 4 | import FIFOF::*; 5 | import Clocks::*; 6 | import Vector::*; 7 | import BRAM::*; 8 | 9 | import PcieCtrl::*; 10 | 11 | import MergeN::*; 12 | 13 | typedef 16 DmaTagCnt; 14 | typedef TLog#(DmaTagCnt) DmaTagSz; 15 | 16 | interface DMAReadOrderedIfc; 17 | method Action readReq(Bit#(32) data, Bit#(10) words); 18 | method ActionValue#(DMAWord) get; 19 | endinterface 20 | 21 | module mkDMAReadOrdered#(PcieUserIfc pcie) (DMAReadOrderedIfc); 22 | 23 | Integer dmaTagCnt = valueOf(DmaTagCnt); 24 | BRAM2Port#(Bit#(TAdd#(DmaTagSz,3)), Bit#(128)) dmaReorderBuffer <- mkBRAM2Server(defaultValue); //2K, 16 tags @ 128 bytes 25 | Vector#(DmaTagCnt, Bit#(3)) dmaReadOffset <- replicateM(mkReg(0)); 26 | Vector#(DmaTagCnt, Bit#(3)) dmaReadLeft <- replicateM(mkReg(0)); 27 | 28 | FIFO#(Bit#(DmaTagSz)) freeTagQ <- mkSizedFIFO(dmaTagCnt); 29 | FIFO#(Bit#(DmaTagSz)) tagOrderQ <- mkSizedFIFO(dmaTagCnt); 30 | 31 | FIFO#(Tuple2#(Bit#(32),Bit#(10))) readReqQ <- mkFIFO; 32 | 33 | rule sendDmaRead; 34 | let r = readReqQ.first; 35 | readReqQ.deq; 36 | let tag = freeTagQ.first; 37 | freeTagQ.deq; 38 | tagOrderQ.enq(tag); 39 | 40 | let addr = tpl_1(r); 41 | let words = tpl_2(r); 42 | 43 | pcie.dmaReadReq(addr,words,tag); 44 | endrule 45 | 46 | rule getDmaRead; 47 | DMAWordTagged rd <- pcie.dmaReadWord; 48 | 49 | endrule 50 | 51 | 52 | 53 | method Action readReq(Bit#(32) data, Bit#(10) words); 54 | readReqQ.enq(tuple2(data,words)); 55 | endmethod 56 | method ActionValue#(DMAWord) get; 57 | endmethod 58 | endmodule 59 | 60 | endpackage 61 | -------------------------------------------------------------------------------- /examples/sortreduce/DramStripeLoader.bsv: -------------------------------------------------------------------------------- 1 | package DramStripeLoader; 2 | 3 | import Vector::*; 4 | import FIFO::*; 5 | import BRAM::*; 6 | import BRAMFIFO::*; 7 | 8 | interface DramStripeLoaderIfc; 9 | method ActionValue#(Tuple2#(Bit#(512),Bool)) getData; 10 | method Action command(Bit#(32) startoff, Bit#(32) stripewords, Bit#(32) limitoff); 11 | 12 | // Connection to DRAM 13 | method ActionValue#(Tuple2#(Bit#(32), Bit#(16))) getBurstReadReq; // Offset, Words 14 | method Action putData(Bit#(512) data); 15 | endinterface 16 | 17 | module mkDramStripeLoader#(Integer buffersz, Integer fetchsz) (DramStripeLoaderIfc); 18 | 19 | FIFO#(Bit#(512)) bufferQ <- mkSizedBRAMFIFO(buffersz); 20 | FIFO#(Tuple2#(Bit#(32),Bit#(16))) dramReadReqQ <- mkFIFO; 21 | FIFO#(Bit#(512)) inDataQ <- mkFIFO; 22 | FIFO#(Tuple2#(Bit#(512),Bool)) outDataQ <- mkFIFO; 23 | Reg#(Bit#(32)) bufferInCnt <- mkReg(0); 24 | Reg#(Bit#(32)) bufferOutCnt <- mkReg(0); 25 | 26 | Reg#(Bit#(32)) baseOff <- mkReg(0); 27 | Reg#(Bit#(32)) stripeWords <- mkReg(0); 28 | Reg#(Bit#(32)) limitOff <- mkReg(0); 29 | 30 | rule genReadReq ( bufferInCnt-bufferOutCnt < fromInteger(buffersz-fetchsz) && baseOff < limitOff ); 31 | dramReadReqQ.enq(tuple2(baseOff, fromInteger(fetchsz))); 32 | baseOff <= baseOff + fromInteger(fetchsz); 33 | bufferInCnt <= bufferInCnt + fromInteger(fetchsz); 34 | endrule 35 | 36 | 37 | rule relayBufferQ; 38 | let d = inDataQ.first; 39 | bufferQ.enq(d); 40 | inDataQ.deq; 41 | endrule 42 | 43 | Reg#(Bit#(32)) fetchInternalOFf <- mkReg(0); 44 | rule relayOutQ; 45 | bufferQ.deq; 46 | bufferOutCnt <= bufferOutCnt + 1; 47 | if ( fetchInternalOFf + 1 >= stripeWords ) begin 48 | fetchInternalOFf <= 0; 49 | outDataQ.enq(tuple2(bufferQ.first,True)); 50 | end else begin 51 | fetchInternalOFf <= fetchInternalOFf + 1; 52 | outDataQ.enq(tuple2(bufferQ.first,False)); 53 | end 54 | endrule 55 | method ActionValue#(Tuple2#(Bit#(512),Bool)) getData; 56 | outDataQ.deq; 57 | return outDataQ.first; 58 | endmethod 59 | method Action command(Bit#(32) startoff, Bit#(32) stripewords, Bit#(32) limitoff) if (baseOff >= limitOff); 60 | baseOff <= startoff; 61 | stripeWords <= stripewords; 62 | limitOff <= limitoff; 63 | endmethod 64 | 65 | // Connection to DRAM 66 | method ActionValue#(Tuple2#(Bit#(32), Bit#(16))) getBurstReadReq; // Offset, Words 67 | dramReadReqQ.deq; 68 | return dramReadReqQ.first; 69 | endmethod 70 | method Action putData(Bit#(512) data); 71 | inDataQ.enq(data); 72 | endmethod 73 | endmodule 74 | 75 | endpackage: DramStripeLoader 76 | -------------------------------------------------------------------------------- /examples/sortreduce/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | import DRAMController::*; 11 | 12 | import SortReduceSingle::*; 13 | import Serializer::*; 14 | 15 | interface HwMainIfc; 16 | endinterface 17 | 18 | 19 | module mkHwMain#(PcieUserIfc pcie, DRAMUserIfc dram) 20 | (HwMainIfc); 21 | 22 | // Current clock/reset is pcieclk/rst (250MHz) 23 | Clock curClk <- exposeCurrentClock; 24 | Reset curRst <- exposeCurrentReset; 25 | Clock pcieclk = pcie.user_clk; 26 | Reset pcierst = pcie.user_rst; 27 | 28 | //DMASplitterIfc#(4) dma <- mkDMASplitter(pcie); 29 | 30 | SortReduceSingleIfc sr1 <- mkSortReduceSingle; 31 | 32 | Reg#(Bit#(32)) cycles <- mkReg(0); 33 | rule incCycle; 34 | cycles <= cycles + 1; 35 | endrule 36 | 37 | Reg#(Bit#(32)) overflowCnt <- mkReg(0); 38 | rule getSr1overflow; 39 | let o <-sr1.getOverflow; 40 | overflowCnt <= overflowCnt + 1; 41 | endrule 42 | 43 | /////////////////////// dram connection 44 | Reg#(Bit#(32)) curDRAMBurstOffset <- mkReg(0); 45 | Reg#(Bit#(16)) curDRAMBurstLeft <- mkReg(0); 46 | Reg#(Bool) curDRAMBurstWrite <- mkReg(False); 47 | rule relayBurstReq ( curDRAMBurstLeft == 0 ); 48 | let r <- sr1.getBurstReq; 49 | curDRAMBurstWrite <= tpl_1(r); 50 | curDRAMBurstOffset <= tpl_2(r); 51 | curDRAMBurstLeft <= tpl_3(r); 52 | //$display( "DRAM burst read started %d %d %d", tpl_1(r), tpl_2(r), tpl_3(r) ); 53 | endrule 54 | rule sendDRAMCmd(curDRAMBurstLeft > 0); 55 | //$display( "DRAM read req %d", curDRAMBurstLeft ); 56 | 57 | curDRAMBurstLeft <= curDRAMBurstLeft - 1; 58 | curDRAMBurstOffset <= curDRAMBurstOffset + 1; 59 | if ( curDRAMBurstWrite ) begin 60 | let d <- sr1.getData; 61 | dram.write(zeroExtend(curDRAMBurstOffset)*64, d, 64); 62 | end else begin 63 | dram.readReq(zeroExtend(curDRAMBurstOffset)*64, 64); 64 | end 65 | endrule 66 | rule relayDRAMRead; 67 | let d <- dram.read; 68 | sr1.putData(d); 69 | //$display( "DRAM burst read data" ); 70 | endrule 71 | 72 | 73 | ///////////////// 74 | Reg#(Bit#(32)) startCycle <- mkReg(0); 75 | Reg#(Bit#(32)) endCycle <- mkReg(0); 76 | Reg#(Bit#(32)) doneCnt <- mkReg(0); 77 | rule getDone; 78 | let r <- sr1.debug; 79 | endCycle <= cycles-startCycle; 80 | doneCnt <= doneCnt + 1; 81 | endrule 82 | 83 | rule readStat; 84 | let r <- pcie.dataReq; 85 | let a = r.addr; 86 | 87 | // PCIe IO is done at 4 byte granularities 88 | // lower 2 bits are always zero 89 | let offset = (a>>2); 90 | if ( offset == 0 ) pcie.dataSend(r, endCycle); 91 | else if ( offset == 1 ) pcie.dataSend(r, doneCnt); 92 | else if ( offset == 2 ) pcie.dataSend(r, overflowCnt); 93 | endrule 94 | 95 | 96 | Reg#(Bit#(32)) dramWriteOff <- mkReg(0); 97 | DeSerializerIfc#(32,16) dramWDes <- mkDeSerializer; 98 | Vector#(3,Reg#(Bit#(32))) inputArgs <- replicateM(mkReg(0)); 99 | rule fillDRAM( curDRAMBurstLeft == 0 ); 100 | let d <- dramWDes.get; 101 | dram.write(zeroExtend(dramWriteOff), d, 64); 102 | dramWriteOff <= dramWriteOff + 64; 103 | endrule 104 | FIFO#(Tuple2#(Bit#(32),Bit#(32))) dramWriteCmdQ <- mkFIFO; 105 | rule dramWriteCmd; 106 | let w = dramWriteCmdQ.first; 107 | dramWriteCmdQ.deq; 108 | let d = tpl_2(w); 109 | let off = tpl_1(w); 110 | if ( off == 5 ) begin 111 | dramWDes.put(d); 112 | end 113 | else if ( off == 6 ) begin 114 | dramWriteOff <= d; 115 | end 116 | endrule 117 | rule getCmd; 118 | // dma load from host 119 | // dma write to host 120 | // start sortreduce sweep 121 | let w <- pcie.dataReceive; 122 | let a = w.addr; 123 | let d = w.data; 124 | let off = (a>>2); 125 | if ( off < 3 ) inputArgs[off] <= d; 126 | else if ( off == 3 ) begin 127 | sr1.command(truncate(inputArgs[0]), inputArgs[1], inputArgs[2], d); 128 | startCycle <= cycles; 129 | endCycle <= 0; 130 | doneCnt <= 0; 131 | end 132 | else if ( off == 4 ) begin 133 | sr1.outCommand(inputArgs[0], inputArgs[1], d); 134 | end 135 | else begin 136 | dramWriteCmdQ.enq(tuple2(zeroExtend(off),d)); 137 | end 138 | endrule 139 | 140 | 141 | endmodule 142 | -------------------------------------------------------------------------------- /examples/sortreduce/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=kc705 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV= -p +:$(LIBPATH)/dram/src/:$(BLIBPATH)/ 8 | CUSTOMCPP_BSIM= $(BLIBPATH)/bdpi.cpp 9 | 10 | include $(BUILDTOOLS)/Makefile.base 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/sortreduce/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks::*; 5 | import DefaultValue::*; 6 | import FIFO::*; 7 | import Connectable::*; 8 | 9 | import Xilinx::*; 10 | import XilinxCells::*; 11 | 12 | // PCIe stuff 13 | import PcieImport :: *; 14 | import PcieCtrl :: *; 15 | import PcieCtrl_bsim :: *; 16 | 17 | // DRAM stuff 18 | import DDR3Sim::*; 19 | import DDR3Controller::*; 20 | import DDR3Common::*; 21 | import DRAMController::*; 22 | 23 | import HwMain::*; 24 | 25 | //import Platform :: *; 26 | 27 | //import NullReset :: *; 28 | //import IlaImport :: *; 29 | 30 | interface TopIfc; 31 | (* always_ready *) 32 | interface PcieImportPins pcie_pins; 33 | (* always_ready *) 34 | method Bit#(4) led; 35 | 36 | interface DDR3_Pins_1GB pins_ddr3; 37 | endinterface 38 | 39 | (* no_default_clock, no_default_reset *) 40 | module mkProjectTop #( 41 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 42 | Clock sys_clk_p, Clock sys_clk_n, 43 | Reset pcie_rst_n 44 | ) 45 | (TopIfc); 46 | 47 | 48 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 49 | Clock pcie_clk_buf = pcie.sys_clk_o; 50 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 51 | 52 | Clock sys_clk_200mhz <- mkClockIBUFDS(defaultValue, sys_clk_p, sys_clk_n); 53 | Clock sys_clk_200mhz_buf <- mkClockBUFG(clocked_by sys_clk_200mhz); 54 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n_buf, sys_clk_200mhz_buf); 55 | 56 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 57 | 58 | 59 | Clock ddr_buf = sys_clk_200mhz_buf; 60 | Reset ddr3ref_rst_n <- mkAsyncResetFromCR(4, ddr_buf, reset_by pcieCtrl.user.user_rst); 61 | 62 | DDR3Common::DDR3_Configure ddr3_cfg = defaultValue; 63 | ddr3_cfg.reads_in_flight = 32; // adjust as needed 64 | DDR3_Controller_1GB ddr3_ctrl <- mkDDR3Controller_1GB(ddr3_cfg, ddr_buf, clocked_by ddr_buf, reset_by ddr3ref_rst_n); 65 | DRAMControllerIfc dramController <- mkDRAMController(ddr3_ctrl.user, clocked_by pcieCtrl.user.user_clk, reset_by pcieCtrl.user.user_rst); 66 | 67 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, dramController.user, clocked_by pcieCtrl.user.user_clk, reset_by pcieCtrl.user.user_rst); 68 | 69 | 70 | 71 | 72 | 73 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 74 | 75 | // Interfaces //// 76 | interface PcieImportPins pcie_pins = pcie.pins; 77 | 78 | interface DDR3_Pins_1GB pins_ddr3 = ddr3_ctrl.ddr3; 79 | 80 | method Bit#(4) led; 81 | //return leddata; 82 | return 0; 83 | endmethod 84 | endmodule 85 | 86 | module mkProjectTop_bsim (Empty); 87 | Clock curclk <- exposeCurrentClock; 88 | 89 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 90 | 91 | let ddr3_ctrl_user <- mkDDR3Simulator; 92 | DRAMControllerIfc dramController <- mkDRAMController(ddr3_ctrl_user); 93 | //mkConnection(dramController.ddr3_cli, ddr3_ctrl_user); 94 | 95 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, dramController.user); 96 | endmodule 97 | -------------------------------------------------------------------------------- /examples/sortreduce/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp $(LIBPATH)/cpp/DRAMHostDMA.cpp 6 | LIB= -lrt -lpthread 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g -O2 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/sortreduce/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "bdbmpcie.h" 9 | //#include "dmasplitter.h" 10 | 11 | double timespec_diff_sec( timespec start, timespec end ) { 12 | double t = end.tv_sec - start.tv_sec; 13 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 14 | return t; 15 | } 16 | 17 | int main(int argc, char** argv) { 18 | BdbmPcie* pcie = BdbmPcie::getInstance(); 19 | srand(time(NULL)); 20 | 21 | 22 | pcie->userWriteWord(24, 0); // dram write start off 23 | 24 | //uint32_t istripe = (1024*1024*256/16)/64; 25 | //uint32_t bufferwords = (1024*1024*256)/64; // 256MB scratchpad 26 | uint32_t buffermb = 256; 27 | uint32_t outstripemb = 1; 28 | uint32_t bufferwords = (1024*1024*buffermb)/64; // 256MB scratchpad 29 | uint32_t istripe = (1024*1024*outstripemb)/16/64; // each output stripe adding up to ... 30 | uint32_t stripecnt = buffermb/outstripemb; 31 | 32 | pcie->userWriteWord(0, bufferwords); // output stripe offset 33 | pcie->userWriteWord(4, bufferwords*2); // output stripe limit 34 | pcie->userWriteWord(16, istripe*16); // output stripe words 35 | 36 | for ( int b = 0; b < 16; b++ ) { 37 | printf( "Writing buffer %d\n",b ); fflush(stdout); 38 | uint32_t kvcnt = istripe*8; // 8 kvpairs per DRAM word 39 | for ( int s = 0; s < bufferwords/16/istripe; s++ ) { 40 | uint32_t* buffer = (uint32_t*)malloc(kvcnt*sizeof(uint32_t)); 41 | for ( int i = 0; i < kvcnt; i++ ){ 42 | //buffer[i] = rand() % (1<<24); 43 | buffer[i] = rand() % (1<<24); 44 | } 45 | //std::sort(buffer, buffer+kvcnt); 46 | buffer[kvcnt/2] = 0xffffffff; 47 | 48 | for ( int i = 0; i < kvcnt; i++ ){ 49 | if ( buffer[i] == 0xffffffff ) { 50 | pcie->userWriteWord(20, 0xffffffff); 51 | pcie->userWriteWord(20, buffer[i]); 52 | } else { 53 | pcie->userWriteWord(20, 1); 54 | pcie->userWriteWord(20, buffer[i]); 55 | } 56 | } 57 | free(buffer); 58 | } 59 | } 60 | 61 | for ( int b = 0; b < 16; b++ ) { 62 | pcie->userWriteWord(0, b); 63 | pcie->userWriteWord(4, (bufferwords/16)*b); 64 | pcie->userWriteWord(8, (bufferwords/16)*(b+1)); 65 | pcie->userWriteWord(12, istripe); 66 | } 67 | 68 | 69 | 70 | //sleep(5); 71 | uint32_t donestripes = 0; 72 | uint32_t elapsed = 0; 73 | while (donestripes < stripecnt ) { 74 | uint32_t gdonestripes = pcie->userReadWord(4); 75 | while (elapsed == 0 || gdonestripes <= donestripes) { 76 | elapsed = pcie->userReadWord(0); 77 | gdonestripes = pcie->userReadWord(4); 78 | } 79 | donestripes = gdonestripes; 80 | printf( "elapsed: %d cycles (%lf s) for %d\n", elapsed, ((double)elapsed)/250000000.0, donestripes ); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /examples/sortreduce/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/sortreduce/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/sortreduce/user-ip.tcl: -------------------------------------------------------------------------------- 1 | set ddr3dir ../../../dram/$boardname/ 2 | 3 | ############# DDR3 Stuff 4 | read_ip $ddr3dir/core/ddr3_0/ddr3_0.xci 5 | read_verilog [ glob $ddr3dir/*.v ] 6 | read_xdc $ddr3dir/dram.xdc 7 | ############# end Flash Stuff 8 | 9 | -------------------------------------------------------------------------------- /examples/streaming/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | import BRAM::*; 6 | import BRAMFIFO::*; 7 | 8 | import PcieCtrl::*; 9 | import Serializer::*; 10 | 11 | import StreamKernel::*; 12 | import LZAHCompression::*; 13 | 14 | interface HwMainIfc; 15 | endinterface 16 | 17 | module mkHwMain#(PcieUserIfc pcie) 18 | (HwMainIfc); 19 | 20 | Clock curClk <- exposeCurrentClock; 21 | Reset curRst <- exposeCurrentReset; 22 | 23 | Clock pcieclk = pcie.user_clk; 24 | Reset pcierst = pcie.user_rst; 25 | 26 | FIFO#(DMAWord) inputQ <- mkSizedBRAMFIFO(512); // 8KBs 27 | FIFO#(DMAWord) outputQ <- mkSizedBRAMFIFO(512); // 8KBs 28 | Reg#(Bit#(16)) outputCntUp <- mkReg(0); 29 | Reg#(Bit#(16)) outputCntDn <- mkReg(0); 30 | 31 | 32 | /* 33 | StreamKernelIfc kernel <- mkStreamKernelTest; 34 | DeSerializerIfc#(128, 2) des <- mkDeSerializer; 35 | SerializerIfc#(256, 2) ser <- mkSerializer; 36 | rule desIn; 37 | inputQ.deq; 38 | des.put(inputQ.first); 39 | endrule 40 | rule relayIn; 41 | let w <- des.get; 42 | kernel.enq(w); 43 | endrule 44 | rule serOut; 45 | kernel.deq; 46 | ser.put(kernel.first); 47 | endrule 48 | rule relayOut; 49 | let w <- ser.get; 50 | outputQ.enq(w); 51 | outputCntUp <= outputCntUp + 1; 52 | //$display( "outputCntUp %d %d\n", outputCntUp+1, outputCntDn); 53 | endrule 54 | */ 55 | 56 | SerializerIfc#(128,2) comp_ser <- mkSerializer; 57 | DeSerializerIfc#(64,2) comp_des <- mkDeSerializer; 58 | FIFO#(Bit#(64)) compr <- mkLZAHCompressor; 59 | rule serIn; 60 | inputQ.deq; 61 | comp_ser.put(inputQ.first); 62 | endrule 63 | rule relayIn; 64 | let w <- comp_ser.get; 65 | compr.enq(w); 66 | endrule 67 | rule serOut; 68 | compr.deq; 69 | comp_des.put(compr.first); 70 | endrule 71 | rule relayOut; 72 | let w <- comp_des.get; 73 | outputQ.enq(w); 74 | outputCntUp <= outputCntUp + 1; 75 | //$display( "outputCntUp %d %d\n", outputCntUp+1, outputCntDn); 76 | endrule 77 | 78 | BRAM2Port#(Bit#(6),DMAWord) page <- mkBRAM2Server(defaultValue); // tag, total words,words recv 79 | 80 | FIFO#(Bit#(8)) streamReadQ <- mkSizedBRAMFIFO(1024); // streamid, page offset 81 | FIFO#(Bit#(8)) streamWriteQ <- mkSizedBRAMFIFO(1024); // streamid, page offset 82 | 83 | Reg#(Bit#(32)) streamReadCnt <- mkReg(0); 84 | Reg#(Bit#(32)) streamWriteCnt <- mkReg(0); 85 | rule getCmd; 86 | let w <- pcie.dataReceive; 87 | let a = w.addr; 88 | let d = w.data; 89 | // PCIe IO is done at 4 byte granularities 90 | // lower 2 bits are always zero 91 | let off = (a>>2); 92 | // off == (in|out)<<8, d == page offset 93 | if ( off == 0 ) begin 94 | streamReadQ.enq(truncate(d)); 95 | end else begin 96 | streamWriteQ.enq(truncate(d)); 97 | //$write("writeQ enqued %x\n", d); 98 | end 99 | endrule 100 | 101 | FIFO#(IOReadReq) reqQ <- mkFIFO; 102 | rule readStat; 103 | let r <- pcie.dataReq; 104 | let a = r.addr; 105 | // PCIe IO is done at 4 byte granularities 106 | // lower 2 bits are always zero 107 | let offset = (a>>2); 108 | 109 | if ( offset == 0 ) begin 110 | pcie.dataSend(r, streamReadCnt); 111 | end else if ( offset == 1 ) begin 112 | pcie.dataSend(r, streamWriteCnt); 113 | end else if ( offset >= 2 ) begin 114 | page.portB.request.put(BRAMRequest{write:False,responseOnWrite:False,address:truncate(offset),datain:?}); 115 | reqQ.enq(r); 116 | end else begin 117 | pcie.dataSend(r, 32'hcccccccc); 118 | end 119 | endrule 120 | rule relayPageRead; 121 | let r <- page.portB.response.get(); 122 | let req = reqQ.first; 123 | reqQ.deq; 124 | pcie.dataSend(req,truncate(r)); 125 | endrule 126 | 127 | rule dmaReadReq; 128 | streamReadQ.deq; 129 | let poff = streamReadQ.first; 130 | pcie.dmaReadReq( (zeroExtend(poff)<<10), 64); // offset, words 131 | streamReadCnt <= streamReadCnt + 1; 132 | endrule 133 | Reg#(Bit#(32)) dmaReadWords <- mkReg(0); 134 | rule dmaReadDatal; 135 | DMAWord rd <- pcie.dmaReadWord; 136 | page.portA.request.put(BRAMRequest{write:True,responseOnWrite:False,address:truncate(dmaReadWords),datain:rd}); 137 | dmaReadWords <= dmaReadWords + 1; 138 | 139 | inputQ.enq(rd); 140 | endrule 141 | 142 | Reg#(Bit#(16)) curOutLeftUp <- mkReg(0); 143 | Reg#(Bit#(16)) curOutLeftDn <- mkReg(0); 144 | rule dmaWriteReq (outputCntUp - outputCntDn >= 64 && curOutLeftUp-curOutLeftDn < 128); 145 | streamWriteQ.deq; 146 | let woff = streamWriteQ.first; 147 | pcie.dmaWriteReq((zeroExtend(woff)<<10), 64); 148 | 149 | curOutLeftUp <= curOutLeftUp + 64; 150 | outputCntDn <= outputCntDn + 64; 151 | $write("Starting DMA Write\n" ); 152 | streamWriteCnt <= streamWriteCnt + 1; 153 | endrule 154 | rule dmaWriteData(curOutLeftUp != curOutLeftDn); 155 | curOutLeftDn <= curOutLeftDn + 1; 156 | 157 | //outputCntDn <= outputCntDn + 1; 158 | 159 | outputQ.deq; 160 | pcie.dmaWriteData(outputQ.first); 161 | $write("DMA Write\n" ); 162 | endrule 163 | endmodule 164 | -------------------------------------------------------------------------------- /examples/streaming/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH =../../ 2 | BOARD=vc707 3 | BUILDTOOLS=$(LIBPATH)/buildtools/ 4 | 5 | BLIBPATH=$(LIBPATH)/../bluelib/src/ 6 | 7 | CUSTOMBSV=-D INSTREAMS=2 -D OUTSTREAMS=2 -p +:$(BLIBPATH)/ 8 | CUSTOMCPP_BSIM= $(BLIBPATH)/bdpi.cpp 9 | 10 | include $(BUILDTOOLS)/Makefile.base 11 | 12 | 13 | -------------------------------------------------------------------------------- /examples/streaming/StreamKernel.bsv: -------------------------------------------------------------------------------- 1 | package StreamKernel; 2 | 3 | import FIFO::*; 4 | import FIFOF::*; 5 | import Clocks::*; 6 | import Vector::*; 7 | 8 | import BRAM::*; 9 | import BRAMFIFO::*; 10 | 11 | import PcieCtrl::*; 12 | 13 | typedef Bit#(256) StreamWord; 14 | 15 | interface StreamKernelIfc; 16 | method Action enq(StreamWord data); 17 | method StreamWord first; 18 | method Action deq; 19 | endinterface 20 | 21 | module mkStreamKernelTest (StreamKernelIfc); 22 | FIFO#(StreamWord) inQ <- mkFIFO; 23 | FIFO#(StreamWord) outQ <- mkFIFO; 24 | 25 | rule proct; 26 | inQ.deq; 27 | let d = inQ.first & 256'hffffffff_ffffffff_ffffffff_ffffffff__ffffffff_ffffffff_ffffffff_00000000; 28 | d = d | 256'hdeadbeef; 29 | outQ.enq(d); 30 | //outQ.enq(inQ.first & 256'hffffffff_ffffffff_ffffffff_ffffffff__ffffffff_ffffffff_ffffffff_ffffffff); 31 | $write("kernel proct\n"); 32 | endrule 33 | 34 | method Action enq(StreamWord data); 35 | inQ.enq(data); 36 | endmethod 37 | method StreamWord first; 38 | return outQ.first; 39 | endmethod 40 | method Action deq; 41 | outQ.deq; 42 | endmethod 43 | endmodule 44 | 45 | endpackage: StreamKernel 46 | -------------------------------------------------------------------------------- /examples/streaming/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks :: *; 5 | import ClockImport::*; 6 | import DefaultValue :: *; 7 | 8 | import PcieImport :: *; 9 | import PcieCtrl :: *; 10 | import PcieCtrl_bsim :: *; 11 | 12 | import Clocks :: *; 13 | import FIFO::*; 14 | 15 | import HwMain::*; 16 | 17 | //import Platform :: *; 18 | 19 | //import NullReset :: *; 20 | //import IlaImport :: *; 21 | 22 | interface TopIfc; 23 | (* always_ready *) 24 | interface PcieImportPins pcie_pins; 25 | (* always_ready *) 26 | method Bit#(4) led; 27 | endinterface 28 | 29 | (* no_default_clock, no_default_reset *) 30 | module mkProjectTop #( 31 | Clock pcie_clk_p, Clock pcie_clk_n, Clock emcclk, 32 | Clock sys_clk_p, Clock sys_clk_n, 33 | Reset pcie_rst_n 34 | ) 35 | (TopIfc); 36 | 37 | 38 | PcieImportIfc pcie <- mkPcieImport(pcie_clk_p, pcie_clk_n, pcie_rst_n, emcclk); 39 | Clock pcie_clk_buf = pcie.sys_clk_o; 40 | Reset pcie_rst_n_buf = pcie.sys_rst_n_o; 41 | 42 | ClockGenIfc clk_200mhz_import <- mkClockIBUFDSImport(sys_clk_p, sys_clk_n); 43 | Clock sys_clk_200mhz = clk_200mhz_import.gen_clk; 44 | ClockGenIfc sys_clk_200mhz_buf_import <- mkClockBUFGImport(clocked_by sys_clk_200mhz); 45 | Clock sys_clk_200mhz_buf = sys_clk_200mhz_buf_import.gen_clk; 46 | Reset rst200 <- mkAsyncReset( 4, pcie_rst_n, sys_clk_200mhz_buf); 47 | 48 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 49 | /* 50 | ClockGenerator7Params clk_params = defaultValue(); 51 | clk_params.clkin1_period = 10.000; // 100 MHz reference 52 | clk_params.clkin_buffer = False; // necessary buffer is instanced above 53 | clk_params.reset_stages = 0; // no sync on reset so input clock has pll as only load 54 | clk_params.clkfbout_mult_f = 10.000; // 1000 MHz VCO 55 | clk_params.clkout0_divide_f = 4; // 250MHz clock 56 | clk_params.clkout1_divide = 8; // 125MHz clock 57 | ClockGenerator7 clk_gen <- mkClockGenerator7(clk_params, clocked_by sys_clk_buf, reset_by sys_rst_n_buf); 58 | Clock clk250 = clk_gen.clkout0; 59 | Reset rst250 <- mkAsyncReset( 4, sys_rst_n_buf, clk250); 60 | 61 | Clock clk125 = clk_gen.clkout0; 62 | Reset rst125 <- mkAsyncReset( 4, sys_rst_n_buf, clk125); 63 | */ 64 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by pcieCtrl.user.user_clk, reset_by pcieCtrl.user.user_rst); 65 | 66 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 67 | 68 | // Interfaces //// 69 | interface PcieImportPins pcie_pins = pcie.pins; 70 | 71 | method Bit#(4) led; 72 | //return leddata; 73 | return 0; 74 | endmethod 75 | endmodule 76 | 77 | module mkProjectTop_bsim (Empty); 78 | Clock curclk <- exposeCurrentClock; 79 | 80 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 81 | 82 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user); 83 | endmodule 84 | -------------------------------------------------------------------------------- /examples/streaming/cpp/Makefile: -------------------------------------------------------------------------------- 1 | LIBPATH=../../../ 2 | #LIBOBJ=$(LIBPATH)/cpp/obj/ 3 | 4 | BDBMPCIEINCLUDE= -I$(LIBPATH)/cpp/ 5 | BDBMPCIECPP= $(LIBPATH)/cpp/bdbmpcie.cpp $(LIBPATH)/cpp/ShmFifo.cpp 6 | LIB= -lrt -lpthread 7 | 8 | 9 | all: 10 | echo "building for pcie" 11 | mkdir -p obj 12 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) -pedantic -g -O2 13 | bsim: 14 | echo "building for bluesim" 15 | mkdir -p obj 16 | g++ main.cpp $(BDBMPCIECPP) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g -pedantic 17 | -------------------------------------------------------------------------------- /examples/streaming/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "bdbmpcie.h" 5 | //#include "dmasplitter.h" 6 | 7 | double timespec_diff_sec( timespec start, timespec end ) { 8 | double t = end.tv_sec - start.tv_sec; 9 | t += ((double)(end.tv_nsec - start.tv_nsec)/1000000000L); 10 | return t; 11 | } 12 | 13 | int main(int argc, char** argv) { 14 | BdbmPcie* pcie = BdbmPcie::getInstance(); 15 | //DMASplitter* dma = DMASplitter::getInstance(); 16 | 17 | //uint32_t size = 128*128; 18 | 19 | /* 20 | if ( argc > 1 ) { 21 | size = atoi(argv[1]); 22 | } 23 | */ 24 | unsigned int d = pcie->readWord(0); 25 | printf( "Magic: %x\n", d ); 26 | fflush(stdout); 27 | d = pcie->readWord(32); 28 | printf( "Dma Addr 0: %x\n", d ); 29 | fflush(stdout); 30 | 31 | for ( int i = 0; i < 4; i++ ) { 32 | printf( "+++ %x\n", pcie->userReadWord((2+i)*4) ); 33 | } 34 | 35 | printf( "r %x\n", pcie->userReadWord(0) ); 36 | printf( "w %x\n", pcie->userReadWord(4) ); 37 | 38 | //uint8_t* dmabuf = (uint8_t*)dma->dmaBuffer(); 39 | uint8_t* dmabuf = (uint8_t*)pcie->dmaBuffer(); 40 | for ( uint32_t i = 0; i < 32*1024/4; i++ ) { 41 | ((uint32_t*)dmabuf)[i] = i; 42 | //dmabuf[i] = (char)i; 43 | } 44 | for ( uint32_t i = 0; i < 4*1024/4; i++ ) { 45 | ((uint32_t*)dmabuf)[i] = i; 46 | //dmabuf[i] = (char)i; 47 | } 48 | /* 49 | for ( int i = 0; i < 16; i++ ) { 50 | dmabuf[i] = 0xaa; 51 | } 52 | */ 53 | /* 54 | for ( int i = 0; i < 32; i++ ) { 55 | printf( "++ %d %x\n", i, ((uint32_t*)dmabuf)[i] ); 56 | } 57 | */ 58 | /* 59 | for ( int i = 0; i < 8; i++ ) { 60 | pcie->userWriteWord(1*4,4+i); 61 | } 62 | for ( int i = 0; i < 8; i++ ) { 63 | pcie->userWriteWord(0,i); 64 | } 65 | */ 66 | int pagecnt = 4; 67 | timespec start; 68 | timespec now; 69 | clock_gettime(CLOCK_REALTIME, & start); 70 | for ( int i = 0; i < pagecnt; i++ ) { 71 | pcie->userWriteWord(1*4,4+(i%4)); 72 | pcie->userWriteWord(0,(i%4)); 73 | } 74 | //sleep(1); 75 | /* 76 | for ( int i = 0; i < 8; i++ ) { 77 | printf( "r %x\n", pcie->userReadWord(0) ); 78 | printf( "w %x\n", pcie->userReadWord(4) ); 79 | } 80 | */ 81 | printf( "----\n" ); 82 | /* 83 | for ( int i = 0; i < 8; i++ ) { 84 | printf( "r %x\n", pcie->userReadWord(0) ); 85 | printf( "w %x\n", pcie->userReadWord(4) ); 86 | } 87 | */ 88 | //sleep(2); 89 | uint32_t pages = 0; 90 | int sleepcnt = 0; 91 | while (pages < pagecnt) { 92 | pages = pcie->userReadWord(4); 93 | if ( pages >= pagecnt ) break; 94 | 95 | sleepcnt ++; 96 | if ( sleepcnt % 10000 == 0 ) { 97 | printf( "Pages-- %d\n", pages ); 98 | printf( "!! %x\n", pcie->readWord(4) ); 99 | printf( ">> %x\n", ((uint32_t*)dmabuf)[1024/4*4] ); 100 | } 101 | usleep(10); 102 | } 103 | clock_gettime(CLOCK_REALTIME, & now); 104 | double diff = timespec_diff_sec(start, now); 105 | printf( "Elapsed: %f\n", diff ); 106 | 107 | printf( "r %x\n", pcie->userReadWord(0) ); 108 | printf( "w %x\n", pcie->userReadWord(4) ); 109 | /* 110 | for ( int i = 0; i < 32; i++ ) { 111 | printf( "-- %d %x\n", i, ((uint32_t*)dmabuf)[i+1024/4*4] ); 112 | } 113 | */ 114 | 115 | 116 | int incorrects = 0; 117 | for ( uint32_t i = 0; i < 1024*4/4; i++ ) { 118 | uint32_t d = ((uint32_t*)dmabuf)[i+1024/4*4]; 119 | if ( i%8 == 0 ) { 120 | if (d != 0xdeadbeef) { 121 | printf ( "Data incorrect! %x != %x\n", 0xdeadbeef, d ); 122 | incorrects ++; 123 | } 124 | } else { 125 | if (d != i) { 126 | printf ( "Data incorrect! %x != %x\n", i, d ); 127 | incorrects++; 128 | } 129 | } 130 | } 131 | 132 | printf( "Incorrect datas: %d\n", incorrects ); 133 | 134 | /* 135 | for ( int i = 2; i < 16; i++ ) { 136 | printf( "Data in BRAM: %x\n", pcie->userReadWord(i*4) ); 137 | } 138 | */ 139 | 140 | printf( "DebugCode: %x\n", pcie->readWord(4) ); 141 | 142 | } 143 | -------------------------------------------------------------------------------- /examples/streaming/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | if [ "$1" == "gdb" ] 9 | then 10 | gdb ./sw 11 | else 12 | ./sw | tee res.txt 13 | fi 14 | kill -9 $BDBM_BSIM_PID 15 | rm /dev/shm/bdbm$BDBM_BSIM_PID 16 | -------------------------------------------------------------------------------- /examples/streaming/sw: -------------------------------------------------------------------------------- 1 | cpp/obj/bsim -------------------------------------------------------------------------------- /examples/streaming/user-ip.tcl: -------------------------------------------------------------------------------- 1 | ## None 2 | -------------------------------------------------------------------------------- /examples/test/hw/DmaSplitter.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | -------------------------------------------------------------------------------- /examples/test/hw/HwMain.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | 11 | import DMASplitter::*; 12 | 13 | interface HwMainIfc; 14 | endinterface 15 | 16 | module mkHwMain#(PcieUserIfc pcie) 17 | (HwMainIfc); 18 | 19 | Clock curClk <- exposeCurrentClock; 20 | Reset curRst <- exposeCurrentReset; 21 | 22 | Clock pcieclk = pcie.user_clk; 23 | Reset pcierst = pcie.user_rst; 24 | 25 | DMASplitterIfc#(4) dma <- mkDMASplitter(pcie); 26 | 27 | rule getFlashCmd; 28 | dma.deq; 29 | Bit#(128) d = dma.first; 30 | endrule 31 | 32 | rule handleFlashWriteReady; 33 | //dma.enq({32'h1,32'h2,zeroExtend(data)}); // 32'h1 for testing purposes 34 | endrule 35 | 36 | 37 | endmodule 38 | -------------------------------------------------------------------------------- /examples/test/hw/Makefile: -------------------------------------------------------------------------------- 1 | BSCFLAGS = -show-schedule -aggressive-conditions --wait-for-license 2 | 3 | BSCFLAGS_SYNTH = -bdir ./build/obj -vdir ./build/verilog/top -simdir ./build/obj -info-dir ./build -fdir ./build 4 | BSCFLAGS_BSIM = -bdir ./build_bsim/obj -vdir ./build_bsim/verilog/top -simdir ./build_bsim/obj -info-dir ./build_bsim -fdir ./build_bsim 5 | 6 | BSVPATH =../../../src/: 7 | BSIM_CPPFILES =../../../src/PcieBdpi.cpp \ 8 | ../../../src/ShmFifo.cpp 9 | 10 | DEBUGFLAGS = -D BSIM 11 | 12 | 13 | ## change txdiffctrl in auroraintra to 1100 14 | 15 | 16 | 17 | all: 18 | mkdir -p build/obj 19 | mkdir -p build/verilog/top 20 | bsc $(BSCFLAGS) $(BSCFLAGS_SYNTH) -remove-dollar -p +:$(BSVPATH) -verilog -u -g mkProjectTop Top.bsv 21 | cp vivado-impl.tcl ./build/impl.tcl 22 | cd build; cd verilog/top; ../../../verilogcopy.sh; cd ../../; vivado -mode batch -source impl.tcl 23 | tar czf c.tgz build/ 24 | mv c.tgz build/ 25 | 26 | bsim: HwMain.bsv Top.bsv 27 | mkdir -p build_bsim 28 | mkdir -p build_bsim/obj 29 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -p +:$(BSVPATH) -sim -u -g mkProjectTop_bsim Top.bsv 30 | bsc $(BSCFLAGS) $(BSCFLAGS_BSIM) $(DEBUGFLAGS) -sim -e mkProjectTop_bsim -o build_bsim/obj/bsim build_bsim/obj/*.ba $(BSIM_CPPFILES) 31 | 32 | clean: 33 | rm -rf build 34 | rm -rf build_bsim 35 | 36 | .PHONY: all 37 | 38 | core: 39 | cd ../../../core/ ; vivado -mode batch -source core_gen_pcie.tcl -nolog -nojournal 40 | -------------------------------------------------------------------------------- /examples/test/hw/Platform.bsv: -------------------------------------------------------------------------------- 1 | package Platform; 2 | 3 | import FIFO::*; 4 | import Clocks::*; 5 | 6 | import AuroraImportArtix7 :: *; 7 | 8 | interface ControllerPlatformIfc; 9 | endinterface 10 | 11 | module mkPlatform#(AuroraIfc aurora0, Clock clk100, Reset rst100) 12 | (ControllerPlatformIfc); 13 | 14 | Reg#(Bit#(32)) auroraDummy <- mkReg(0); 15 | Reg#(Bit#(1)) auroraStat <- mkReg(0); 16 | 17 | FIFO#(Tuple2#(DataIfc, PacketType)) auroraRQ <- mkFIFO(); 18 | rule mirrorAuroraR; 19 | let d <- aurora0.receive; 20 | aurora0.send(tpl_1(d), tpl_2(d)); 21 | //auroraRQ.enq(d); 22 | endrule 23 | 24 | endmodule 25 | 26 | endpackage: Platform 27 | -------------------------------------------------------------------------------- /examples/test/hw/Top.bsv: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | import Clocks :: *; 5 | import DefaultValue :: *; 6 | import Xilinx :: *; 7 | import XilinxCells :: *; 8 | 9 | import PcieImport :: *; 10 | import PcieCtrl :: *; 11 | import PcieCtrl_bsim :: *; 12 | 13 | import Clocks :: *; 14 | import FIFO::*; 15 | 16 | import HwMain::*; 17 | 18 | //import Platform :: *; 19 | 20 | //import NullReset :: *; 21 | //import IlaImport :: *; 22 | 23 | interface TopIfc; 24 | (* always_ready *) 25 | interface PcieImportPins pcie_pins; 26 | (* always_ready *) 27 | method Bit#(4) led; 28 | endinterface 29 | 30 | (* no_default_clock, no_default_reset *) 31 | module mkProjectTop #( 32 | Clock sys_clk_p, Clock sys_clk_n, Clock emcclk, 33 | Reset sys_rst_n 34 | ) 35 | (TopIfc); 36 | 37 | 38 | PcieImportIfc pcie <- mkPcieImport(sys_clk_p, sys_clk_n, sys_rst_n, emcclk); 39 | Clock sys_clk_buf = pcie.sys_clk_o; 40 | Reset sys_rst_n_buf = pcie.sys_rst_n_o; 41 | 42 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl(pcie.user, clocked_by pcie.user_clk, reset_by pcie.user_reset); 43 | 44 | ClockGenerator7Params clk_params = defaultValue(); 45 | clk_params.clkin1_period = 10.000; // 100 MHz reference 46 | clk_params.clkin_buffer = False; // necessary buffer is instanced above 47 | clk_params.reset_stages = 0; // no sync on reset so input clock has pll as only load 48 | clk_params.clkfbout_mult_f = 10.000; // 1000 MHz VCO 49 | clk_params.clkout0_divide_f = 4; // 250MHz clock 50 | clk_params.clkout1_divide = 8; // 125MHz clock 51 | ClockGenerator7 clk_gen <- mkClockGenerator7(clk_params, clocked_by sys_clk_buf, reset_by sys_rst_n_buf); 52 | Clock clk250 = clk_gen.clkout0; 53 | Reset rst250 <- mkAsyncReset( 4, sys_rst_n_buf, clk250); 54 | 55 | Clock clk125 = clk_gen.clkout0; 56 | Reset rst125 <- mkAsyncReset( 4, sys_rst_n_buf, clk125); 57 | 58 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user, clocked_by clk125, reset_by rst125); 59 | 60 | //ReadOnly#(Bit#(4)) leddata <- mkNullCrossingWire(noClock, pcieCtrl.leds); 61 | 62 | // Interfaces //// 63 | interface PcieImportPins pcie_pins = pcie.pins; 64 | 65 | method Bit#(4) led; 66 | //return leddata; 67 | return 0; 68 | endmethod 69 | endmodule 70 | 71 | module mkProjectTop_bsim (Empty); 72 | Clock curclk <- exposeCurrentClock; 73 | 74 | PcieCtrlIfc pcieCtrl <- mkPcieCtrl_bsim; 75 | 76 | HwMainIfc hwmain <- mkHwMain(pcieCtrl.user); 77 | endmodule 78 | -------------------------------------------------------------------------------- /examples/test/hw/bsimres.txt: -------------------------------------------------------------------------------- 1 | hardware shm_open /bdbm14777 returned 5 with errno 0 2 | AuroraOutQ: flowControl packet: 64 3 | send budget = 0 4 | Gearbox send out: 40000000000000000000000000000040 5 | Gearbox received: 40000000000000000000000000000040 6 | -------------------------------------------------------------------------------- /examples/test/hw/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./bsim/obj/bsim & 4 | export BDBM_BSIM_PID=$! 5 | echo "running sw" 6 | echo $BDBM_BSIM_PID 7 | sleep 1 8 | ../sw/obj/bsim 9 | kill -9 $BDBM_BSIM_PID 10 | -------------------------------------------------------------------------------- /examples/test/hw/verilogcopy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VFILES=" 4 | SyncResetA.v 5 | SyncRegister.v 6 | SyncHandshake.v 7 | MakeResetA.v 8 | SizedFIFO.v 9 | Counter.v 10 | TriState.v 11 | FIFO2.v 12 | ResetInverter.v 13 | SyncFIFO.v 14 | ClockDiv.v 15 | ResetEither.v 16 | MakeReset.v 17 | SyncReset0.v 18 | BRAM2.v 19 | SyncWire.v 20 | " 21 | 22 | CURDIR=`pwd` 23 | cd $BLUESPECDIR/Verilog; 24 | for VFILE in $VFILES ; 25 | do 26 | echo $VFILE 27 | cp $VFILE $CURDIR/ 28 | done 29 | -------------------------------------------------------------------------------- /examples/test/hw/vivado-impl.tcl: -------------------------------------------------------------------------------- 1 | set_param general.maxThreads 8 2 | 3 | # NOTE: typical usage would be "vivado -mode tcl -source create_mkPcieTop_batch.tcl" 4 | # 5 | # STEP#0: define output directory area. 6 | # 7 | set pciedir ../../../../ 8 | set flashdir ../../../flash/ 9 | 10 | set outputDir ./hw 11 | file mkdir $outputDir 12 | # 13 | # STEP#1: setup design sources and constraints 14 | # 15 | #source board.tcl 16 | 17 | set partname {xc7vx485tffg1761-2} 18 | 19 | read_verilog [ glob {verilog/top/*.v} ] 20 | 21 | set_property part $partname [current_project] 22 | 23 | ############# Pcie Stuff 24 | read_ip $pciedir/core/pcie_7x_0/pcie_7x_0.xci 25 | read_verilog [ glob $pciedir/src/*.v ] 26 | read_xdc $pciedir/src/xilinx_pcie_7x_ep_x8g2_VC707.xdc 27 | ############## end Pcie Stuff 28 | 29 | ############# Flash Stuff 30 | #read_ip $flashdir/aurora_8b10b_fmc1/aurora_8b10b_fmc1.xci 31 | #read_verilog [ glob $flashdir/xilinx/*.v ] 32 | #read_xdc $flashdir/xilinx/aurora_8b10b_fmc1_exdes.xdc 33 | ############# end Flash Stuff 34 | 35 | 36 | #generate_target {Synthesis} [get_files ../../xilinx/vio_7series/vio_7series.xci] 37 | #read_ip ../../xilinx/vio_7series/vio_7series.xci 38 | # 39 | #generate_target {Synthesis} [get_files ../../xilinx/ila_7series/ila_7series.xci] 40 | #read_ip ../../xilinx/ila_7series/ila_7series.xci 41 | # 42 | #read_verilog [ glob {../../xilinx/nullreset/*.v} ] 43 | 44 | #read_xdc {../../xilinx/constraints/ac701.xdc} 45 | 46 | 47 | # STEP#2: run synthesis, report utilization and timing estimates, write checkpoint design 48 | # 49 | synth_design -name mkProjectTop -top mkProjectTop -part $partname -flatten rebuilt 50 | 51 | write_checkpoint -force $outputDir/mkprojecttop_post_synth 52 | report_timing_summary -verbose -file $outputDir/mkprojecttop_post_synth_timing_summary.rpt 53 | report_timing -sort_by group -max_paths 100 -path_type summary -file $outputDir/mkprojecttop_post_synth_timing.rpt 54 | report_utilization -verbose -file $outputDir/mkprojecttop_post_synth_utilization.txt 55 | report_datasheet -file $outputDir/mkprojecttop_post_synth_datasheet.txt 56 | write_verilog -force $outputDir/mkprojecttop_netlist.v 57 | write_debug_probes -force probes.ltx 58 | #report_power -file $outputDir/mkprojecttop_post_synth_power.rpt 59 | 60 | # 61 | # STEP#3: run placement and logic optimization, report utilization and timing estimates, write checkpoint design 62 | # 63 | 64 | 65 | opt_design 66 | # power_opt_design 67 | place_design 68 | phys_opt_design 69 | write_checkpoint -force $outputDir/mkprojecttop_post_place 70 | report_timing_summary -file $outputDir/mkprojecttop_post_place_timing_summary.rpt 71 | # 72 | # STEP#4: run router, report actual utilization and timing, write checkpoint design, run drc, write verilog and xdc out 73 | # 74 | route_design 75 | write_checkpoint -force $outputDir/mkprojecttop_post_route 76 | report_timing_summary -file $outputDir/mkprojecttop_post_route_timing_summary.rpt 77 | report_timing -sort_by group -max_paths 100 -path_type summary -file $outputDir/mkprojecttop_post_route_timing.rpt 78 | report_clock_utilization -file $outputDir/mkprojecttop_clock_util.rpt 79 | report_utilization -file $outputDir/mkprojecttop_post_route_util.rpt 80 | report_datasheet -file $outputDir/mkprojecttop_post_route_datasheet.rpt 81 | #report_power -file $outputDir/mkprojecttop_post_route_power.rpt 82 | #report_drc -file $outputDir/mkprojecttop_post_imp_drc.rpt 83 | #write_verilog -force $outputDir/mkprojecttop_impl_netlist.v 84 | write_xdc -no_fixed_only -force $outputDir/mkprojecttop_impl.xdc 85 | # 86 | # STEP#5: generate a bitstream 87 | # 88 | write_bitstream -force -bin_file $outputDir/mkProjectTop.bit 89 | -------------------------------------------------------------------------------- /examples/test/sw/Makefile: -------------------------------------------------------------------------------- 1 | BDBMPCIEDIR=../../../ 2 | 3 | BDBMPCIEFILES=$(BDBMPCIEDIR)/src/ShmFifo.cpp $(BDBMPCIEDIR)/cpp/bdbmpcie.cpp flashmanager.cpp 4 | BDBMPCIEINCLUDE= -I $(BDBMPCIEDIR)/src/ -I $(BDBMPCIEDIR)/cpp/ 5 | LIB= -lrt -lpthread 6 | 7 | 8 | all: 9 | echo "building for pcie" 10 | mkdir -p obj 11 | g++ main.cpp $(BDBMPCIEFILES) $(BDBMPCIEINCLUDE) -o obj/main $(LIB) 12 | bsim: 13 | echo "building for bluesim" 14 | mkdir -p obj 15 | g++ main.cpp $(BDBMPCIEFILES) $(BDBMPCIEINCLUDE) -o obj/bsim $(LIB) -DBLUESIM -g 16 | -------------------------------------------------------------------------------- /examples/test/sw/flashmanager.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "flashmanager.h" 5 | 6 | FlashManager* 7 | FlashManager::m_pInstance = NULL; 8 | 9 | FlashManager* 10 | FlashManager::getInstance() { 11 | if ( m_pInstance == NULL ) m_pInstance = new FlashManager(); 12 | 13 | return m_pInstance; 14 | } 15 | 16 | void* flashManagerThread(void* arg) { 17 | BdbmPcie* pcie = BdbmPcie::getInstance(); 18 | void* dmabuffer = pcie->dmaBuffer(); 19 | unsigned int* ubuf = (unsigned int*)dmabuffer; 20 | 21 | FlashManager* flash = FlashManager::getInstance(); 22 | 23 | while(1) { 24 | pcie->waitInterrupt(); 25 | for ( int i = 0; i < 32; i++ ) { 26 | printf( " %x", ubuf[i] ); 27 | if ( i % 4 == 3 ) printf( "\n"); 28 | } 29 | } 30 | while(1) { 31 | usleep(1000); 32 | uint32_t stat = pcie->readWord(1024*4); 33 | if ( (stat>>24) > 0 ) { 34 | uint8_t type = (0xff & (stat>>16)); 35 | uint16_t val = (0xffff & stat); 36 | 37 | switch ( type ) { 38 | case 0: printf( "read done\n" ); break; 39 | case 1: printf( "write done\n" ); break; 40 | case 2: printf( "erase done!\n" ); break; 41 | case 3: printf( "erase failed!\n" ); break; 42 | case 4: { 43 | printf( "write ready\n" ); 44 | 45 | uint32_t* b = (uint32_t*)flash->storebuffer; 46 | for ( int i = 0; i < (8192+32)/4; i++ ) { 47 | int idx = i % 4; 48 | pcie->writeWord((1024+4+idx)*4, i); 49 | //printf( "Writing %x\n", b[i] ); 50 | } 51 | printf( "written\n" ); 52 | break; 53 | } 54 | } 55 | } 56 | } 57 | } 58 | 59 | FlashManager::FlashManager() { 60 | pthread_create(&flashThread, NULL, flashManagerThread, NULL); 61 | 62 | } 63 | 64 | /* 65 | 0: op 66 | 1: blockpagetag 67 | 2: buschip 68 | */ 69 | void FlashManager::eraseBlock(int bus, int chip, int block) { 70 | BdbmPcie* pcie = BdbmPcie::getInstance(); 71 | int page = 0; 72 | int tag = 0; 73 | uint32_t blockpagetag = (block<<16) | (page<<8) | tag; 74 | uint32_t buschip = (bus<<8) | chip; 75 | pcie->writeWord((1024+2)*4, buschip); 76 | pcie->writeWord((1024+1)*4, blockpagetag); 77 | pcie->writeWord(1024*4, 0); // triggers erase 78 | 79 | } 80 | void FlashManager::writePage(int bus, int chip, int block, int page, void* buffer) { 81 | BdbmPcie* pcie = BdbmPcie::getInstance(); 82 | int tag = 0; 83 | uint32_t blockpagetag = (block<<16) | (page<<8) | tag; 84 | uint32_t buschip = (bus<<8) | chip; 85 | pcie->writeWord((1024+2)*4, buschip); 86 | pcie->writeWord((1024+1)*4, blockpagetag); 87 | pcie->writeWord(1024*4, 2); // triggers write 88 | this->storebuffer = buffer; 89 | } 90 | void FlashManager::readPage(int bus, int chip, int block, int page, void* buffer) { 91 | BdbmPcie* pcie = BdbmPcie::getInstance(); 92 | int tag = 0; 93 | uint32_t blockpagetag = (block<<16) | (page<<8) | tag; 94 | uint32_t buschip = (bus<<8) | chip; 95 | pcie->writeWord((1024+2)*4, buschip); 96 | pcie->writeWord((1024+1)*4, blockpagetag); 97 | pcie->writeWord(1024*4, 1); // triggers read 98 | this->storebuffer = buffer; 99 | 100 | } 101 | -------------------------------------------------------------------------------- /examples/test/sw/flashmanager.h: -------------------------------------------------------------------------------- 1 | #include "bdbmpcie.h" 2 | 3 | #ifndef __FLASHMANAGER__H__ 4 | #define __FLASHMANAGER__H__ 5 | class FlashManager { 6 | public: 7 | FlashManager(); 8 | void eraseBlock(int bus, int chip, int block); 9 | void writePage(int bus, int chip, int block, int page, void* buffer); 10 | void readPage(int bus, int chip, int block, int page, void* buffer); 11 | 12 | static FlashManager* getInstance(); 13 | 14 | private: 15 | pthread_t flashThread; 16 | static FlashManager* m_pInstance; 17 | 18 | public: 19 | //FIXME use dma Buffer instead 20 | void* storebuffer; 21 | }; 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /examples/test/sw/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "bdbmpcie.h" 5 | #include "flashmanager.h" 6 | 7 | 8 | main() { 9 | BdbmPcie* pcie = BdbmPcie::getInstance(); 10 | void* dmabuffer = pcie->dmaBuffer(); 11 | unsigned int* ubuf = (unsigned int*)dmabuffer; 12 | 13 | pcie->writeWord(0,0); 14 | unsigned int d = pcie->readWord(0); 15 | printf( "Magic: %x\n", d ); 16 | 17 | 18 | FlashManager* flash = FlashManager::getInstance(); 19 | 20 | uint32_t* pageBufferW = (uint32_t*)malloc(8192+32); 21 | uint32_t* pageBufferR = (uint32_t*)malloc(8192+32); 22 | for ( int i = 0; i < 8192/4; i++ ) { 23 | pageBufferW[i] = i; 24 | } 25 | printf( "Sending erase message:\n" ); 26 | flash->eraseBlock(1,1,1); 27 | //sleep(1); 28 | flash->writePage(1,1,1,0, pageBufferW); 29 | 30 | sleep(5); 31 | 32 | exit(1); 33 | 34 | 35 | printf( "\t\tSending read cmd\n" ); 36 | 37 | 38 | flash->readPage(1,1,1,0, pageBufferR); 39 | 40 | 41 | printf( "\t\tStarting read\n" ); 42 | sleep(1); 43 | 44 | for ( int i = 0; i < (8192+32)/4; i++ ) { 45 | int idx = i % 4; 46 | uint32_t r = pcie->readWord((1024+1+idx)*4); 47 | printf( "%d:%x ", i, r ); 48 | if ( i % 8 == 0 ) printf( "\n" ); 49 | } 50 | 51 | 52 | 53 | /* 54 | for ( int i = 0; i < 1024; i++ ) { 55 | pcie->writeWord((1024+3)*4, 0); 56 | } 57 | 58 | pcie->writeWord((1024+1)*4, 0); 59 | 60 | pcie->writeWord(1024*4, 4); 61 | for ( int i = 0; i < 32; i++ ) { 62 | pcie->writeWord(1024*4, 8); 63 | } 64 | sleep(1); 65 | pcie->writeWord((1024+1)*4, 0); 66 | 67 | 68 | for ( int i = 0; i < 4096; i++ ) { 69 | if ( ubuf[i] != 0 ) { 70 | printf( "%d: %x\n", i, ubuf[i] ); 71 | } 72 | } 73 | */ 74 | sleep(2); 75 | } 76 | -------------------------------------------------------------------------------- /src/ClockImport.bsv: -------------------------------------------------------------------------------- 1 | package ClockImport; 2 | import Clocks ::*; 3 | 4 | import "BVI" IBUFDS = 5 | module mkClockIBUFDSImport#(Clock clk_p, Clock clk_n)(ClockGenIfc); 6 | default_clock no_clock; 7 | default_reset no_reset; 8 | 9 | parameter CAPACITANCE = "DONT_CARE"; 10 | parameter DIFF_TERM = "FALSE"; 11 | parameter DQS_BIAS = "FALSE"; 12 | parameter IBUF_DELAY_VALUE = "0"; 13 | parameter IBUF_LOW_PWR = "TRUE"; 14 | parameter IFD_DELAY_VALUE = "AUTO"; 15 | parameter IOSTANDARD = "DEFAULT"; 16 | 17 | input_clock clk_p(I) = clk_p; 18 | input_clock clk_n(IB) = clk_n; 19 | 20 | output_clock gen_clk(O); 21 | 22 | path(I, O); 23 | path(IB, O); 24 | 25 | same_family(clk_p, gen_clk); 26 | endmodule 27 | 28 | import "BVI" IBUFDS_GTE2 = 29 | module mkClockIBUFDS_GTE2Import#(Clock clk_p, Clock clk_n)(ClockGenIfc); 30 | default_clock no_clock; 31 | default_reset no_reset; 32 | 33 | input_clock clk_p(I) = clk_p; 34 | input_clock clk_n(IB) = clk_n; 35 | 36 | port CEB = 1'b0; 37 | 38 | output_clock gen_clk(O); 39 | 40 | path(I, O); 41 | path(IB, O); 42 | 43 | same_family(clk_p, gen_clk); 44 | endmodule 45 | 46 | import "BVI" BUFG = 47 | module mkClockBUFGImport(ClockGenIfc); 48 | default_clock clk(I, (*unused*)GATE); 49 | default_reset no_reset; 50 | 51 | path(I, O); 52 | 53 | output_clock gen_clk(O); 54 | same_family(clk, gen_clk); 55 | endmodule 56 | 57 | 58 | endpackage: ClockImport 59 | -------------------------------------------------------------------------------- /src/CompletionFIFO.bsv: -------------------------------------------------------------------------------- 1 | package CompletionFIFO; 2 | 3 | import Vector::*; 4 | import FIFO::*; 5 | import BRAM::*; 6 | 7 | import BRAMFIFO::*; 8 | 9 | interface CompletionFIFOIfc#(type burstWord, numeric type burstSz, numeric type tagSz); 10 | method Action enq(burstWord w, Bit#(tagSz) tag); 11 | method Tuple2#(burstWord, Bit#(tagSz)) first; 12 | method Action deq; 13 | endinterface 14 | 15 | module mkCompletionFIFO (CompletionFIFOIfc#(burstWord, burstSz, tagSz)); 16 | method Action enq(burstWord w, Bit#(tagSz) tag); 17 | endmethod 18 | method Tuple2#(burstWord, Bit#(tagSz)) first; 19 | return ?; 20 | endmethod 21 | method Action deq; 22 | endmethod 23 | endmodule 24 | 25 | endpackage: CompletionFIFO 26 | -------------------------------------------------------------------------------- /src/DMACircularQueue.bsv: -------------------------------------------------------------------------------- 1 | import FIFO::*; 2 | import FIFOF::*; 3 | import Clocks::*; 4 | import Vector::*; 5 | 6 | import BRAM::*; 7 | import BRAMFIFO::*; 8 | 9 | import PcieCtrl::*; 10 | 11 | import MergeN::*; 12 | 13 | typedef TMul#(2,PcieWordSz) DMAQueueWordSz; 14 | typedef Bit#(DMAQueueWordSz) DMAQueueWord; 15 | 16 | interface DMACircularQueueIfc#(numeric type bufferSz); 17 | method ActionValue#(Bit#(128)) getCmd; 18 | method Action enqStat(Bit#(8) addr, Bit#(32) data); 19 | 20 | method Action enq(DMAQueueWord word); 21 | 22 | // TODO 23 | method ActionValue#(DMAQueueWord) first; 24 | method Action deq; 25 | endinterface 26 | 27 | // bufferSz is log(bytes) 28 | module mkDMACircularQueue#(PcieUserIfc pcie) (DMACircularQueueIfc#(bufferSz)) 29 | provisos(Add#(a__, bufferSz, 32)); 30 | 31 | Clock curClk <- exposeCurrentClock; 32 | Reset curRst <- exposeCurrentReset; 33 | 34 | Clock pcieclk = pcie.user_clk; 35 | Reset pcierst = pcie.user_rst; 36 | 37 | Reg#(Bit#(32)) writeByteCount <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 38 | Reg#(Bit#(32)) readByteCount <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 39 | 40 | 41 | FIFO#(IOWrite) userWriteQ <- mkFIFO(clocked_by pcieclk, reset_by pcierst); 42 | rule getWriteReq; 43 | IOWrite d <- pcie.dataReceive; 44 | userWriteQ.enq(d); 45 | endrule 46 | 47 | SyncFIFOIfc#(Bit#(32)) enqSyncQ <- mkSyncFIFOFromCC(32,pcieclk); 48 | Vector#(16,Reg#(Bit#(32))) statReg <- replicateM(mkReg(0, clocked_by pcieclk, reset_by pcierst)); 49 | FIFO#(IOReadReq) userReadReqQ <- mkFIFO; 50 | rule getReadReq; 51 | let req = pcie.dataReq; 52 | userReadReqQ.enq(req); 53 | endrule 54 | rule procUserR; 55 | let req = userReadReqQ.first; 56 | userReadReqQ.deq; 57 | 58 | let addr = (d.addr>>2); 59 | if ( addr == 0 ) begin 60 | enqSyncQ.deq; 61 | pcie.dataSend(req, enqSyncQ.first); 62 | end else if ( addr < 16 ) begin 63 | pcie.dataSend(req, statReg[addr]); 64 | end 65 | endrule 66 | 67 | 68 | Reg#(Bool) started <- mkReg(False, clocked_by pcieclk, reset_by pcierst); 69 | Vector#(4,Reg#(Bit#(32))) cmdbuffer <- replicateM(mkReg(0), clocked_by pcieclk, reset_by pcierst); 70 | SyncFIFOIfc#(Bit#(128)) cmdQ <- mkSyncFIFOToCC(32,pcieclk,pcierst); 71 | rule procUserW; 72 | let d = userWriteQ.first; 73 | userWriteQ.deq; 74 | $display( "User write request @ %d %x", d.addr>>2, d.data ); 75 | Bit#(8) toffset = truncate(d.addr>>2); 76 | 77 | if ( toffset == 0 ) begin 78 | cmdQ.enq({cmdbuffer[3],cmdbuffer[2],cmdbuffer[1],d.data}); 79 | end else if ( toffset < 4 ) begin 80 | cmdbuffer[toffset] <= d.data; 81 | end 82 | if ( toffset == 16 ) begin 83 | started <= True; 84 | end 85 | if ( toffset == 17 ) begin 86 | readByteCount <= d.data; 87 | end 88 | endrule 89 | 90 | 91 | SyncFIFOIfc#(Bit#(256)) enqSyncQ <- mkSyncFIFOFromCC(32,pcieclk); 92 | FIFO#(PcieWord) enqQ <- mkSizedFIFO(32,clocked_by pcieclk, reset_by pcierst); 93 | Reg#(Bit#(8)) enqCountUp <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 94 | Reg#(Bit#(8)) enqCountDown <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 95 | 96 | Reg#(Bit#(8)) dmaCountRemain <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 97 | Reg#(Bit#(8)) dmaCurTag <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 98 | FIFO#(Bit#(8)) availWriteTagQ <- mkSizedFIFO(32, clocked_by pcieclk, reset_by pcierst); 99 | 100 | Reg#(Bit#(8)) availTagCounter <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 101 | rule fillAvailWriteTag(availTagCounter <32); 102 | availTagCounter <= availTagCounter + 1; 103 | availWriteTagQ.enq(availTagCounter); 104 | endrule 105 | 106 | 107 | Reg#(Bit#(32)) writeOffset <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 108 | rule initDMAWrite (dmaCountRemain == 0 && enqCountUp-enqCountDown > 8); 109 | let tag = availWriteTagQ.first; 110 | availWriteTagQ.deq; 111 | 112 | Bit#(bufferSz) offset = truncate(writeOffset); 113 | pcie.dmaWriteReq(zeroExtend(offset), 8, tag); 114 | writeOffset <= writeOffset+128; 115 | //pcie.dmaWriteData(enqQ.first,tag); 116 | //enqQ.deq; 117 | 118 | dmaCountRemain <= 8; 119 | //enqCountDown <= enqCountDown + 1; 120 | dmaCurTag <= tag; 121 | 122 | $display( "dma write starting @ %d (%d)", offset, tag ); 123 | endrule 124 | rule dmaWriteWord (dmaCountRemain > 0); 125 | dmaCountRemain <= dmaCountRemain - 1; 126 | enqCountDown <= enqCountDown + 1; 127 | 128 | pcie.dmaWriteData(enqQ.first,dmaCurTag); 129 | enqQ.deq; 130 | 131 | if ( dmaCountRemain == 1 ) begin 132 | availWriteTagQ.enq(dmaCurTag); 133 | end 134 | endrule 135 | 136 | 137 | 138 | 139 | 140 | Reg#(Maybe#(PcieWord)) serializerBuffer <- mkReg(tagged Invalid, clocked_by pcieclk, reset_by pcierst); 141 | rule serializeEnq (started && writeByteCount-readByteCount<(1<>valueOf(PcieWordSz)); 153 | DMAWord down = truncate(d); 154 | enqQ.enq(down); 155 | serializerBuffer <= tagged Valid truncate(up); 156 | end 157 | endrule 158 | 159 | method ActionValue#(Bit#(128)) getCmd; 160 | cmdQ.deq; 161 | return cmdQ.first; 162 | endmethod 163 | method Action enqStat(Bit#(8) addr, Bit#(32) data); 164 | if ( addr == 0 ) begin 165 | enqSyncQ.enq(data); 166 | end else if(addr<16) begin 167 | statReg[addr] <= data; 168 | end 169 | endmethod 170 | 171 | method Action enq(DMAQueueWord word); 172 | enqSyncQ.enq(word); 173 | endmethod 174 | 175 | // TODO 176 | method ActionValue#(DMAQueueWord) first; 177 | return ?; 178 | endmethod 179 | method Action deq; 180 | endmethod 181 | endmodule 182 | 183 | -------------------------------------------------------------------------------- /src/DMAReadHelper.bsv: -------------------------------------------------------------------------------- 1 | import Clocks::*; 2 | import FIFO::*; 3 | import BRAMFIFO::*; 4 | import FIFOF::*; 5 | import Vector::*; 6 | 7 | import PcieCtrl::*; 8 | 9 | 10 | interface DMAReadHelperIfc; 11 | method Action readReq(Bit#(32) offset, Bit#(32) words); 12 | method ActionValue#(Bit#(128)) read; 13 | endinterface 14 | 15 | module mkDMAReadHelper#(PcieUserIfc pcie) (DMAReadHelperIfc); 16 | Clock pcieclk = pcie.user_clk; 17 | Reset pcierst = pcie.user_rst; 18 | Clock curclk <- exposeCurrentClock; 19 | Reset currst <- exposeCurrentReset; 20 | 21 | SyncFIFOIfc#(Tuple2#(Bit#(32), Bit#(32))) readCmdQ <- mkSyncFIFO(4, curclk, currst, pcieclk); 22 | Reg#(Bit#(32)) dmaReadHostOff <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 23 | Reg#(Bit#(32)) dmaReadLeftBytes <- mkReg(0, clocked_by pcieclk, reset_by pcierst); // host->fpga 24 | 25 | /************************************** 26 | ** DMA Host -> FPGA Start 27 | **************************************/ 28 | //TODO changing dmaReadTagCount require changing curReadTag 29 | Integer dmaReadTagCount = 8; 30 | FIFO#(Bit#(8)) dmaReadFreeTagQ <- mkSizedFIFO(dmaReadTagCount, clocked_by pcieclk, reset_by pcierst); 31 | Vector#(8, Reg#(Bit#(8))) vDmaReadTagWordsLeft <- replicateM(mkReg(0, clocked_by pcieclk, reset_by pcierst)); 32 | Vector#(8, FIFO#(Bit#(128))) vDmaReadWords <- replicateM(mkSizedFIFO(8, clocked_by pcieclk, reset_by pcierst)); 33 | //ScatterNIfc#(16, Bit#(128)) dmaReadWordsS <- mkScatterN;//TODO use this 34 | FIFO#(Tuple2#(Bit#(8),Bit#(8))) dmaReadTagOrderQ <- mkSizedFIFO(dmaReadTagCount, clocked_by pcieclk, reset_by pcierst); 35 | Reg#(Bit#(8)) dmaReadTagInit <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 36 | Reg#(Bool) dmaReadTagInitDone <- mkReg(False, clocked_by pcieclk, reset_by pcierst); 37 | rule initDmaTagR(dmaReadTagInit < fromInteger(dmaReadTagCount)); 38 | dmaReadTagInit <= dmaReadTagInit + 1; 39 | dmaReadFreeTagQ.enq(dmaReadTagInit); 40 | if ( dmaReadTagInit + 1 >= fromInteger(dmaReadTagCount) ) begin 41 | dmaReadTagInitDone <= True; 42 | end 43 | endrule 44 | rule sendDMARead ( dmaReadTagInitDone && dmaReadLeftBytes > 0 ); 45 | dmaReadFreeTagQ.deq; 46 | Bit#(8) freeTag = dmaReadFreeTagQ.first; 47 | 48 | if ( dmaReadLeftBytes >= 128 ) begin 49 | Bit#(8) words = (128>>4); 50 | pcie.dmaReadReq(dmaReadHostOff, zeroExtend(words), freeTag); 51 | 52 | dmaReadLeftBytes <= dmaReadLeftBytes - 128; 53 | dmaReadHostOff <= dmaReadHostOff + 128; 54 | vDmaReadTagWordsLeft[freeTag] <= words; 55 | dmaReadTagOrderQ.enq(tuple2(freeTag,words)); 56 | end else begin 57 | Bit#(8) words = truncate(dmaReadLeftBytes>>4); 58 | pcie.dmaReadReq(dmaReadHostOff, zeroExtend(words), freeTag); 59 | 60 | dmaReadLeftBytes <= 0; 61 | vDmaReadTagWordsLeft[freeTag] <= words; 62 | dmaReadTagOrderQ.enq(tuple2(freeTag,words)); 63 | end 64 | endrule 65 | FIFO#(Tuple2#(Bit#(8), Bit#(128))) dmaReadWordsQ <- mkFIFO(clocked_by pcieclk, reset_by pcierst); 66 | rule getDMARead ( dmaReadTagInitDone ); 67 | let d_ <- pcie.dmaReadWord; 68 | 69 | let word = d_.word; 70 | let tag = d_.tag; 71 | if ( vDmaReadTagWordsLeft[tag] == 1 ) begin 72 | vDmaReadTagWordsLeft[tag] <= 0; 73 | dmaReadFreeTagQ.enq(tag); 74 | dmaReadWordsQ.enq(tuple2(tag, word)); 75 | end else if ( vDmaReadTagWordsLeft[tag] == 0 ) begin 76 | end else begin 77 | vDmaReadTagWordsLeft[tag] <= vDmaReadTagWordsLeft[tag] - 1; 78 | dmaReadWordsQ.enq(tuple2(tag, word)); 79 | end 80 | endrule 81 | rule relayDmaReadWords; 82 | dmaReadWordsQ.deq; 83 | let d = dmaReadWordsQ.first; 84 | let tag = tpl_1(d); 85 | let word = tpl_2(d); 86 | vDmaReadWords[tag].enq(word); 87 | endrule 88 | SyncFIFOIfc#(Bit#(128)) dmaReadWordsQ2 <- mkSyncFIFO(16, pcieclk, pcierst, curclk); 89 | Reg#(Bit#(3)) curReadTag <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 90 | Reg#(Bit#(8)) curReadTagCnt <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 91 | rule startReorderRead ( curReadTagCnt == 0 ); 92 | dmaReadTagOrderQ.deq; 93 | let d = dmaReadTagOrderQ.first; 94 | let tag = tpl_1(d); 95 | let cnt = tpl_2(d); 96 | curReadTag <= truncate(tag); 97 | curReadTagCnt <= cnt-1; 98 | dmaReadWordsQ2.enq(vDmaReadWords[tag].first); 99 | vDmaReadWords[tag].deq; 100 | endrule 101 | rule reorderRead( curReadTagCnt > 0 ); 102 | curReadTagCnt <= curReadTagCnt - 1; 103 | dmaReadWordsQ2.enq(vDmaReadWords[curReadTag].first); 104 | vDmaReadWords[curReadTag].deq; 105 | endrule 106 | 107 | rule dmaReadCmd (dmaReadLeftBytes == 0 ); 108 | readCmdQ.deq; 109 | let c = readCmdQ.first; 110 | dmaReadLeftBytes <= tpl_2(c); 111 | dmaReadHostOff <= tpl_1(c); 112 | endrule 113 | /************************************** 114 | ** DMA Host -> FPGA End 115 | **************************************/ 116 | 117 | method Action readReq(Bit#(32) offset, Bit#(32) bytes); 118 | readCmdQ.enq(tuple2(offset,bytes)); 119 | endmethod 120 | method ActionValue#(Bit#(128)) read; 121 | dmaReadWordsQ2.deq(); 122 | return dmaReadWordsQ2.first; 123 | endmethod 124 | endmodule 125 | -------------------------------------------------------------------------------- /src/DMAWriteHelper.bsv: -------------------------------------------------------------------------------- 1 | import Clocks::*; 2 | import FIFO::*; 3 | import BRAMFIFO::*; 4 | import FIFOF::*; 5 | import Vector::*; 6 | 7 | import PcieCtrl::*; 8 | 9 | 10 | interface DMAWriteHelperIfc; 11 | method Action addHostBuffer(Bit#(32) off, Bit#(32) bytes); 12 | method Action write(Maybe#(Bit#(128)) write); 13 | method ActionValue#(Tuple3#(Bool, Bit#(32),Bit#(32))) bufferDone; 14 | endinterface 15 | 16 | module mkDMAWriteHelper#(PcieUserIfc pcie) (DMAWriteHelperIfc); 17 | Clock pcieclk = pcie.user_clk; 18 | Reset pcierst = pcie.user_rst; 19 | Clock curclk <- exposeCurrentClock; 20 | Reset currst <- exposeCurrentReset; 21 | 22 | Reg#(Maybe#(Bit#(32))) dmaWriteHostStartOff <- mkReg(tagged Invalid, clocked_by pcieclk, reset_by pcierst); 23 | Reg#(Bit#(32)) dmaWriteHostOff <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 24 | Reg#(Bit#(32)) dmaWriteLeftWords <- mkReg(0, clocked_by pcieclk, reset_by pcierst); // fpga->host 25 | SyncFIFOIfc#(Maybe#(Bit#(128))) writeSyncQ <- mkSyncFIFO(32, curclk, currst, pcieclk); 26 | SyncFIFOIfc#(Tuple2#(Bit#(32),Bit#(32))) writeBufferQ <- mkSyncFIFO(8, curclk, currst, pcieclk); 27 | FIFO#(Bit#(128)) writeBufferDataQ <- mkSizedBRAMFIFO(512, clocked_by pcieclk, reset_by pcierst); // 8 KB 28 | Reg#(Bool) writeBufferFlush <- mkReg(False, clocked_by pcieclk, reset_by pcierst); 29 | Reg#(Bit#(16)) writeBufferCntUp <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 30 | Reg#(Bit#(16)) writeBufferCntDn <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 31 | FIFO#(Tuple3#(Bool,Bit#(32),Bit#(32))) writeDoneQ <- mkFIFO(clocked_by pcieclk, reset_by pcierst); 32 | SyncFIFOIfc#(Tuple3#(Bool,Bit#(32),Bit#(32))) writeDoneQ2 <- mkSyncFIFO(32, pcieclk, pcierst, curclk); 33 | rule relayWriteDone; 34 | writeDoneQ.deq; 35 | writeDoneQ2.enq(writeDoneQ.first); 36 | endrule 37 | 38 | 39 | /***************************************************** 40 | ** DMA FPGA -> Host Start 41 | **************************************/ 42 | Integer dmaWriteTagCount = 32; 43 | FIFO#(Bit#(8)) dmaWriteFreeTagQ <- mkSizedFIFO(dmaWriteTagCount, clocked_by pcieclk, reset_by pcierst); 44 | Reg#(Bit#(8)) dmaWriteTagInit <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 45 | Reg#(Bool) dmaWriteTagInitDone <- mkReg(False, clocked_by pcieclk, reset_by pcierst); 46 | rule initDmaTagW(dmaWriteTagInit < fromInteger(dmaWriteTagCount) && dmaWriteTagInitDone == False); 47 | dmaWriteTagInit <= dmaWriteTagInit + 1; 48 | dmaWriteFreeTagQ.enq(32+dmaWriteTagInit);//FIXME to not overlap with read! 49 | if ( dmaWriteTagInit >= fromInteger(dmaWriteTagCount) - 1 ) begin 50 | dmaWriteTagInitDone <= True; 51 | end 52 | endrule 53 | rule relayWriteSync ( writeBufferFlush == False ); 54 | writeSyncQ.deq; 55 | let d = writeSyncQ.first; 56 | if ( isValid(d) ) begin 57 | writeBufferDataQ.enq(fromMaybe(?,d)); 58 | writeBufferCntUp <= writeBufferCntUp +1; 59 | end else begin 60 | writeBufferFlush <= True; 61 | end 62 | endrule 63 | 64 | 65 | 66 | Reg#(Bit#(16)) dmaCurWriteLeft <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 67 | Reg#(Bit#(8)) dmaCurTag <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 68 | Reg#(Bit#(32)) dmaCurBufferWriteCnt <- mkReg(0, clocked_by pcieclk, reset_by pcierst); 69 | rule genPcieWrite ( dmaCurWriteLeft == 0 && dmaWriteLeftWords > 0 && writeBufferCntUp-writeBufferCntDn > 8); 70 | //|| writeBufferFlush ) ); 71 | Bit#(8) writeTag = dmaWriteFreeTagQ.first; 72 | dmaWriteFreeTagQ.deq; 73 | dmaCurTag <= writeTag; 74 | 75 | pcie.dmaWriteReq(dmaWriteHostOff, 8, writeTag); 76 | dmaWriteHostOff <= dmaWriteHostOff + 128; 77 | 78 | dmaCurWriteLeft <= 8; 79 | dmaWriteLeftWords <= dmaWriteLeftWords - 8; 80 | if ( dmaWriteLeftWords == 8 ) begin 81 | writeDoneQ.enq(tuple3(False, fromMaybe(?,dmaWriteHostStartOff), dmaCurBufferWriteCnt+8)); 82 | dmaCurBufferWriteCnt <= 0; 83 | end else begin 84 | dmaCurBufferWriteCnt <= dmaCurBufferWriteCnt + 8; 85 | end 86 | endrule 87 | rule genPcieFlush ( dmaCurWriteLeft == 0 && dmaWriteLeftWords > 0 && writeBufferCntUp-writeBufferCntDn <= 8 && writeBufferFlush ); 88 | Bit#(8) writeTag = dmaWriteFreeTagQ.first; 89 | dmaWriteFreeTagQ.deq; 90 | dmaCurTag <= writeTag; 91 | let qcount = writeBufferCntUp-writeBufferCntDn; 92 | 93 | if (qcount > 0) begin 94 | pcie.dmaWriteReq(dmaWriteHostOff, truncate(qcount), writeTag); 95 | end 96 | dmaCurWriteLeft <= qcount; 97 | dmaWriteHostOff <= dmaWriteHostOff + (zeroExtend(qcount)*16); 98 | 99 | //we are flushing, and no more data left in queue 100 | dmaWriteLeftWords <= 0; 101 | writeBufferFlush <= False; 102 | 103 | dmaWriteHostStartOff <= tagged Invalid; 104 | 105 | writeDoneQ.enq(tuple3(True, fromMaybe(?,dmaWriteHostStartOff), dmaCurBufferWriteCnt+zeroExtend(qcount))); 106 | dmaCurBufferWriteCnt <= 0; 107 | endrule 108 | 109 | rule doPcieWrite( dmaCurWriteLeft > 0 ); 110 | writeBufferDataQ.deq; 111 | let d = writeBufferDataQ.first; 112 | 113 | pcie.dmaWriteData(d, dmaCurTag); 114 | 115 | dmaCurWriteLeft <= dmaCurWriteLeft - 1; 116 | writeBufferCntDn <= writeBufferCntDn +1; 117 | if ( dmaCurWriteLeft == 1 ) begin 118 | dmaWriteFreeTagQ.enq(dmaCurTag); 119 | end 120 | endrule 121 | rule procHostBuffer ( dmaWriteLeftWords == 0 ); 122 | writeBufferQ.deq; 123 | let d = writeBufferQ.first; 124 | let off = tpl_1(d); 125 | let bytes = tpl_2(d); 126 | 127 | dmaWriteHostStartOff <= tagged Valid off; 128 | dmaWriteHostOff <= off; 129 | dmaWriteLeftWords <= (bytes>>4); // 16 byte words 130 | endrule 131 | 132 | method Action addHostBuffer(Bit#(32) off, Bit#(32) bytes); 133 | writeBufferQ.enq(tuple2(off,bytes)); 134 | endmethod 135 | method Action write(Maybe#(Bit#(128)) data); 136 | writeSyncQ.enq(data); 137 | endmethod 138 | method ActionValue#(Tuple3#(Bool, Bit#(32),Bit#(32))) bufferDone; 139 | writeDoneQ2.deq; 140 | let d = writeDoneQ2.first; 141 | return tuple3(tpl_1(d), tpl_2(d), (tpl_3(d)<<4)); 142 | endmethod 143 | endmodule 144 | -------------------------------------------------------------------------------- /src/PcieCtrl_bsim.bsv: -------------------------------------------------------------------------------- 1 | package PcieCtrl_bsim; 2 | 3 | import FIFO::*; 4 | 5 | import PcieCtrl::*; 6 | 7 | import "BDPI" function Bool bdpiIOReady(); 8 | import "BDPI" function ActionValue#(Bit#(64)) bdpiIOData(); 9 | import "BDPI" function ActionValue#(Bool) bdpiIOReadRespReady(); 10 | import "BDPI" function ActionValue#(Bool) bdpiIOReadResp(Bit#(64) data); 11 | 12 | import "BDPI" function ActionValue#(Bool) bdpiDmaWriteData(Bit#(32) addr, Bit#(64) data1, Bit#(64) data2); 13 | import "BDPI" function ActionValue#(Bool) bdpiDmaReadReq(Bit#(32) addr, Bit#(10) words); 14 | import "BDPI" function ActionValue#(Bool) bdpiDmaReadReady(); 15 | import "BDPI" function ActionValue#(Bit#(32)) bdpiDmaReadData(); 16 | import "BDPI" function Bool bdpiInterruptReady(); 17 | import "BDPI" function Action bdpiAssertInterrupt(); 18 | 19 | 20 | module mkPcieCtrl_bsim (PcieCtrlIfc); 21 | Integer dma_buf_offset = valueOf(DMABufOffset); //must match one in driver 22 | Integer io_userspace_offset = valueOf(IoUserSpaceOffset); 23 | 24 | Clock curClk <- exposeCurrentClock; 25 | Reset curRst <- exposeCurrentReset; 26 | 27 | Reg#(Bit#(10)) dmaWriteWordCount <- mkReg(0); 28 | Reg#(Bit#(32)) dmaWriteWordAddr <- mkReg(0); 29 | Reg#(Bit#(32)) dmaWriteWordOff <- mkReg(0); 30 | 31 | Reg#(Bit#(10)) dmaReadWordCount <- mkReg(0); 32 | FIFO#(DMAWord) dmaReadWordQ <- mkSizedFIFO(16); 33 | 34 | Reg#(DMAWord) dmaReadBuffer <- mkReg(0); 35 | Reg#(Bit#(8)) dmaReadBufferOff <- mkReg(0); 36 | rule receiveDmaRead ( dmaReadWordCount > 0 ); 37 | let isready <- bdpiDmaReadReady(); 38 | if ( isready ) begin 39 | let d <- bdpiDmaReadData(); 40 | if ( dmaReadBufferOff >= 3 ) begin 41 | dmaReadWordCount <= dmaReadWordCount - 1; 42 | dmaReadWordQ.enq( {d, dmaReadBuffer[127:32]} ); 43 | dmaReadBufferOff <= 0; 44 | end else begin 45 | dmaReadBuffer <= {d,dmaReadBuffer[127:32]}; 46 | dmaReadBufferOff <= dmaReadBufferOff + 1; 47 | end 48 | end 49 | endrule 50 | 51 | 52 | FIFO#(IOWrite) ioWriteQ <- mkFIFO; 53 | FIFO#(IOReadReq) ioReadReqQ <- mkFIFO; 54 | rule receiveIO; 55 | let d <- bdpiIOData(); 56 | 57 | Bit#(32) data = truncate(d); 58 | Bit#(20) addr = truncate(d>>32); 59 | Bit#(1) write = truncate(d>>(32+24)); 60 | Bit#(1) notready = truncate(d>>(32+31)); 61 | 62 | if ( 0 == notready ) begin 63 | if ( write == 1 ) begin 64 | if ( addr >= fromInteger(io_userspace_offset) ) begin 65 | ioWriteQ.enq(IOWrite{addr: addr-fromInteger(io_userspace_offset), data:data}); 66 | end else begin 67 | //TODO save to BRAM 68 | end 69 | //$display( "IOwrite addr:%x, data:%x", addr, data ); 70 | end 71 | else begin 72 | IOReadReq rr = ?; 73 | rr.addr = addr - fromInteger(io_userspace_offset); 74 | if ( addr >= fromInteger(io_userspace_offset) ) begin 75 | ioReadReqQ.enq(rr); 76 | end else begin 77 | let dd <- bdpiIOReadResp({0,addr, 32'hc001d00d}); 78 | end 79 | //$display( "IOread addr: %x", addr); 80 | end 81 | end 82 | $fflush(stdout); 83 | endrule 84 | 85 | FIFO#(IOReadReq) ioReadReqReturnQ <- mkFIFO; 86 | FIFO#(Bit#(32)) ioReadReqDataQ <- mkFIFO; 87 | rule relayIOReadResp; 88 | let isready <- bdpiIOReadRespReady; 89 | if ( isready ) begin 90 | let ioreq = ioReadReqReturnQ.first; 91 | let data = ioReadReqDataQ.first; 92 | let d <- bdpiIOReadResp({0,ioreq.addr + fromInteger(io_userspace_offset), data}); 93 | ioReadReqReturnQ.deq; 94 | ioReadReqDataQ.deq; 95 | end 96 | endrule 97 | 98 | 99 | 100 | interface PcieUserIfc user; 101 | interface Clock user_clk = curClk; 102 | interface Reset user_rst = curRst; 103 | method ActionValue#(IOWrite) dataReceive; 104 | ioWriteQ.deq; 105 | return ioWriteQ.first; 106 | endmethod 107 | method ActionValue#(IOReadReq) dataReq; 108 | ioReadReqQ.deq; 109 | let d = ioReadReqQ.first; 110 | return d; 111 | endmethod 112 | method Action dataSend(IOReadReq ioreq, Bit#(32) data );// if (bdpiIOReadRespReady() ); 113 | ioReadReqReturnQ.enq(ioreq); 114 | ioReadReqDataQ.enq(data); 115 | //let d <- bdpiIOReadResp({0,ioreq.addr, data}); 116 | //$display( "IOread resp addr: %x data: %x\n", ioreq.addr, data ); 117 | endmethod 118 | 119 | method Action dmaWriteReq(Bit#(32) addr, Bit#(10) words ) if ( dmaWriteWordCount == 0); 120 | dmaWriteWordCount <= words; 121 | dmaWriteWordAddr <= addr; 122 | dmaWriteWordOff <= 0; 123 | endmethod 124 | method Action dmaWriteData(DMAWord data) if ( dmaWriteWordCount > 0 ); 125 | Bool r <- bdpiDmaWriteData(dmaWriteWordAddr+dmaWriteWordOff, truncate(data), truncate(data>>64)); 126 | dmaWriteWordOff <= dmaWriteWordOff + 16; 127 | dmaWriteWordCount <= dmaWriteWordCount - 1; 128 | //$display("dma data %x",data); 129 | endmethod 130 | method Action dmaReadReq(Bit#(32) addr, Bit#(10) words) if ( dmaReadWordCount == 0 ); 131 | dmaReadWordCount <= words; 132 | 133 | let d <- bdpiDmaReadReq(addr,words); 134 | endmethod 135 | method ActionValue#(DMAWord) dmaReadWord; 136 | dmaReadWordQ.deq; 137 | return dmaReadWordQ.first; 138 | endmethod 139 | 140 | method Action assertInterrupt if ( bdpiInterruptReady() ); 141 | bdpiAssertInterrupt(); 142 | endmethod 143 | method Action assertUptrain; 144 | endmethod 145 | 146 | method Bit#(32) debug_data; 147 | return 0; 148 | endmethod 149 | 150 | endinterface 151 | endmodule 152 | 153 | endpackage: PcieCtrl_bsim 154 | -------------------------------------------------------------------------------- /src/PcieImport.bsv: -------------------------------------------------------------------------------- 1 | package PcieImport; 2 | 3 | typedef 128 PcieInterfaceSz; 4 | typedef TDiv#(PcieInterfaceSz, 8) PcieKeepSz; 5 | 6 | (* always_enabled, always_ready *) 7 | interface PcieImportPins; 8 | (* prefix = "", result = "RXN" *) 9 | method Action rxn_in(Bit#(8) rxn_i); 10 | (* prefix = "", result = "RXP" *) 11 | method Action rxp_in(Bit#(8) rxp_i); 12 | 13 | (* prefix = "", result = "TXN" *) 14 | method Bit#(8) txn_out(); 15 | (* prefix = "", result = "TXP" *) 16 | method Bit#(8) txp_out(); 17 | 18 | endinterface 19 | 20 | interface PcieImportUser; 21 | method Bit#(1) user_link_up; 22 | method Bit#(16) cfg_completer_id; 23 | 24 | method Bit#(32) debug_data; 25 | 26 | method Action assertInterrupt(Bit#(1) value); 27 | method Action assertUptrain(Bit#(1) value); 28 | 29 | method Action sendData(Bit#(PcieInterfaceSz) word); 30 | method Action sendKeep(Bit#(PcieKeepSz) keep); 31 | method Action sendLast(Bit#(1) last); 32 | 33 | method ActionValue#(Bit#(PcieInterfaceSz)) receiveData; 34 | method ActionValue#(Bit#(PcieKeepSz)) receiveKeep; 35 | method ActionValue#(Bit#(1)) receiveLast; 36 | method ActionValue#(Bit#(22)) receiveUser; 37 | endinterface 38 | 39 | interface PcieImportIfc; 40 | interface Clock sys_clk_o; 41 | interface Reset sys_rst_n_o; 42 | interface Clock user_clk; 43 | interface Reset user_reset; 44 | interface PcieImportPins pins; 45 | interface PcieImportUser user; 46 | endinterface 47 | 48 | import "BVI" xilinx_pcie_2_1_ep_7x = 49 | module mkPcieImport#(Clock sys_clk_p, Clock sys_clk_n, Reset sys_rst_n, Clock emcclk) (PcieImportIfc); 50 | 51 | default_clock no_clock; 52 | default_reset no_reset; 53 | 54 | input_clock (sys_clk_p) = sys_clk_p; 55 | input_clock (sys_clk_n) = sys_clk_n; 56 | 57 | input_reset (sys_rst_n) = sys_rst_n; 58 | input_clock (emcclk) = emcclk; 59 | 60 | output_clock sys_clk_o(sys_clk_o); 61 | output_reset sys_rst_n_o(sys_rst_n_o); 62 | output_clock user_clk(user_clk); 63 | output_reset user_reset(user_reset_n) clocked_by(user_clk); 64 | 65 | interface PcieImportPins pins; 66 | method rxn_in(pci_exp_rxn) enable((*inhigh*) rx_n_en_0) reset_by(no_reset) clocked_by(sys_clk_n); 67 | method rxp_in(pci_exp_rxp) enable((*inhigh*) rx_p_en_0) reset_by(no_reset) clocked_by(sys_clk_p); 68 | method pci_exp_txn txn_out() reset_by(no_reset) clocked_by(sys_clk_n); 69 | method pci_exp_txp txp_out() reset_by(no_reset) clocked_by(sys_clk_p); 70 | endinterface 71 | 72 | interface PcieImportUser user; 73 | method user_lnk_up user_link_up; 74 | method cfg_completer_id cfg_completer_id; 75 | 76 | method debug_data debug_data; 77 | 78 | method assertInterrupt(assert_interrupt_data) enable(assert_interrupt) ready(assert_interrupt_rdy) clocked_by(user_clk) reset_by(user_reset); 79 | method assertUptrain(asser_uptrain_data) enable(assert_uptrain) ready(assert_interrupt_rdy) clocked_by(user_clk) reset_by(user_reset); 80 | 81 | method sendData(s_axis_tx_tdata) enable(s_axis_tx_tvalid) ready(s_axis_tx_tready) clocked_by(user_clk) reset_by(user_reset); 82 | method sendKeep(s_axis_tx_tkeep) enable(tx_en_keep) ready(s_axis_tx_tready) clocked_by(user_clk) reset_by(user_reset); 83 | method sendLast(s_axis_tx_tlast) enable(tx_en_last) ready(s_axis_tx_tready) clocked_by(user_clk) reset_by(user_reset); 84 | 85 | method m_axis_rx_tdata receiveData enable(m_axis_rx_tready) ready(m_axis_rx_tvalid) clocked_by(user_clk) reset_by(user_reset); 86 | method m_axis_rx_tkeep receiveKeep enable(rx_en_keep) ready(m_axis_rx_tvalid) clocked_by(user_clk) reset_by(user_reset); 87 | method m_axis_rx_tlast receiveLast enable(rx_en_last) ready(m_axis_rx_tvalid) clocked_by(user_clk) reset_by(user_reset); 88 | method m_axis_rx_tuser receiveUser enable(rx_en_user) ready(m_axis_rx_tvalid) clocked_by(user_clk) reset_by(user_reset); 89 | endinterface 90 | 91 | schedule ( 92 | pins_rxn_in, pins_rxp_in, pins_txn_out, pins_txp_out 93 | ) CF ( 94 | pins_rxn_in, pins_rxp_in, pins_txn_out, pins_txp_out 95 | ); 96 | 97 | schedule ( 98 | user_receiveData, user_receiveKeep, user_receiveLast, user_receiveUser, 99 | user_cfg_completer_id, 100 | user_assertInterrupt, 101 | user_sendData, user_sendKeep, user_sendLast, user_user_link_up, 102 | user_debug_data, user_assertUptrain 103 | 104 | 105 | ) CF ( 106 | user_receiveData, user_receiveKeep, user_receiveLast, user_receiveUser, 107 | user_cfg_completer_id, 108 | user_assertInterrupt, 109 | user_sendData, user_sendKeep, user_sendLast, user_user_link_up, 110 | user_debug_data, user_assertUptrain 111 | ); 112 | endmodule 113 | 114 | endpackage: PcieImport 115 | -------------------------------------------------------------------------------- /src/ScatterN.bsv: -------------------------------------------------------------------------------- 1 | package ScatterN; 2 | 3 | // FIXME I think this one is incorrect 4 | 5 | import FIFO::*; 6 | import FIFOF::*; 7 | import Vector::*; 8 | 9 | interface ScatterGetIfc#(type t); 10 | method ActionValue#(t) get; 11 | endinterface 12 | 13 | interface ScatterNProtIfc#(numeric type n, numeric type bn, type t); 14 | interface Vector#(n, ScatterGetIfc#(t)) get; 15 | 16 | method Action enq(t data, Bit#(TLog#(bn)) dst); 17 | endinterface 18 | 19 | module mkScatterNProt (ScatterNProtIfc#(n,bn,t)) 20 | provisos(Bits#(t,a__) 21 | ); 22 | 23 | if ( valueOf(n) > 2 ) begin 24 | Vector#(2,ScatterNProtIfc#(TDiv#(n,2), bn,t)) sa <- replicateM(mkScatterNProt); 25 | Integer dsz = valueOf(TLog#(n)); 26 | 27 | 28 | Vector#(n, ScatterGetIfc#(t)) get_; 29 | for ( Integer i = 0; i < valueOf(n); i=i+1 ) begin 30 | get_[i] = interface ScatterGetIfc; 31 | method ActionValue#(t) get; 32 | /* 33 | if ( i < valueOf(n)/2 ) begin 34 | let d <- sa[0].get[i%(valueOf(n)/2)].get; 35 | return d; 36 | end else begin 37 | let d <- sa[1].get[i-(valueOf(n)/2)].get; 38 | return d; 39 | end 40 | */ 41 | if ( i%2 == 0 ) begin 42 | let d <- sa[0].get[i/2].get; 43 | return d; 44 | end else begin 45 | let d <- sa[1].get[i/2].get; 46 | return d; 47 | end 48 | endmethod 49 | endinterface; 50 | end 51 | interface get = get_; 52 | 53 | 54 | method Action enq(t data, Bit#(TLog#(bn)) dst); 55 | //Bit#(TLog#(bn)) ndst = dst; 56 | //ndst[dsz-1] = 0; 57 | //if ( dst[dsz-1] == 0 ) begin 58 | if ( dst%2 == 0 ) begin 59 | sa[0].enq(data, dst/2); 60 | end else begin 61 | sa[1].enq(data, dst/2); 62 | end 63 | endmethod 64 | 65 | end else if ( valueOf(n) == 2 ) begin 66 | FIFO#(t) getQ1 <- mkFIFO; 67 | FIFO#(t) getQ2 <- mkFIFO; 68 | Vector#(n, ScatterGetIfc#(t)) get_; 69 | get_[0] = interface ScatterGetIfc; 70 | method ActionValue#(t) get; 71 | getQ1.deq; 72 | return getQ1.first; 73 | endmethod 74 | endinterface; 75 | get_[1] = interface ScatterGetIfc; 76 | method ActionValue#(t) get; 77 | getQ2.deq; 78 | return getQ2.first; 79 | endmethod 80 | endinterface; 81 | interface get = get_; 82 | method Action enq(t data, Bit#(TLog#(bn)) dst); 83 | if ( dst[0] == 0 ) begin 84 | getQ1.enq(data); 85 | end else begin 86 | getQ2.enq(data); 87 | end 88 | endmethod 89 | end else begin 90 | FIFO#(t) getQ <- mkFIFO; 91 | Vector#(n, ScatterGetIfc#(t)) get_; 92 | get_[0] = interface ScatterGetIfc; 93 | method ActionValue#(t) get; 94 | getQ.deq; 95 | return getQ.first; 96 | endmethod 97 | endinterface; 98 | interface get = get_; 99 | method Action enq(t data, Bit#(TLog#(bn)) dst); 100 | getQ.enq(data); 101 | endmethod 102 | end 103 | endmodule 104 | 105 | interface ScatterNIfc#(numeric type n, type t); 106 | interface Vector#(n, ScatterGetIfc#(t)) get; 107 | 108 | method Action enq(t data, Bit#(TLog#(n)) dst); 109 | endinterface 110 | 111 | module mkScatterN (ScatterNIfc#(n,t)) 112 | provisos(Bits#(t,a__) 113 | ); 114 | ScatterNProtIfc#(n,n,t) sa <- mkScatterNProt; 115 | 116 | FIFO#(Tuple2#(t,Bit#(TLog#(n)))) inQ <- mkFIFO; 117 | rule relenq; 118 | let data = tpl_1(inQ.first); 119 | let dst = tpl_2(inQ.first); 120 | sa.enq(data,dst); 121 | inQ.deq; 122 | endrule 123 | 124 | Vector#(n, ScatterGetIfc#(t)) get_; 125 | for ( Integer i = 0; i < valueOf(n); i=i+1 ) begin 126 | get_[i] = interface ScatterGetIfc; 127 | method ActionValue#(t) get; 128 | let d <- sa.get[i].get; 129 | return d; 130 | endmethod 131 | endinterface; 132 | end 133 | interface get = get_; 134 | method Action enq(t data, Bit#(TLog#(n)) dst); 135 | inQ.enq(tuple2(data,dst)); 136 | endmethod 137 | endmodule 138 | 139 | endpackage: ScatterN 140 | --------------------------------------------------------------------------------