├── resource.png ├── blockdiagram.png ├── src ├── libpynqmmult.so ├── include │ ├── cf_pthread.h │ ├── xlnk-perf.h │ ├── cf_trace.h │ ├── cf_pending.h │ ├── cf_alloc.h │ ├── cf_util.h │ ├── cf_context.h │ ├── sds_trace.h │ ├── sds_perf_instrumentation.h │ ├── cf_request.h │ ├── axi_lite_dm.h │ ├── zero_copy_dm.h │ ├── cf_sw_fifo.h │ ├── axi_dma_simple_dm.h │ ├── cf_atomic.h │ ├── axi_fifo_dm.h │ ├── cf_mem_stream.h │ ├── accel_info.h │ ├── axi_dma_sg_dm.h │ ├── axi_dma_2d_dm.h │ ├── sds_lib.h │ ├── xlnk_core_cf.h │ ├── cf_lib.h │ └── sds_incl.h ├── cf_stub.c ├── devreg.h ├── cf_stub.h ├── mmult_accel.h ├── Makefile ├── portinfo.h ├── devreg.c ├── mmult_accel.cpp ├── mmult.cpp └── portinfo.c ├── notebooks ├── pynqmmult.bit ├── pynqmmult.ipynb └── pynqmmult.tcl ├── README.md ├── LICENSE └── .gitignore /resource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkat0/pynqmmult/HEAD/resource.png -------------------------------------------------------------------------------- /blockdiagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkat0/pynqmmult/HEAD/blockdiagram.png -------------------------------------------------------------------------------- /src/libpynqmmult.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkat0/pynqmmult/HEAD/src/libpynqmmult.so -------------------------------------------------------------------------------- /notebooks/pynqmmult.bit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tkat0/pynqmmult/HEAD/notebooks/pynqmmult.bit -------------------------------------------------------------------------------- /src/include/cf_pthread.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_pthread 2 | #define D_cf_pthread 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern void cf_pthread_context_init(void); 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | 14 | #endif /* D_cf_pthread */ 15 | -------------------------------------------------------------------------------- /src/cf_stub.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cf_stub.h" 4 | 5 | cf_request_handle_t _p0_request_0; 6 | cf_request_handle_t _p0_request_1; 7 | cf_request_handle_t _p0_request_2; 8 | 9 | size_t _p0_mmult_accel_0_num_out_C; 10 | 11 | -------------------------------------------------------------------------------- /src/devreg.h: -------------------------------------------------------------------------------- 1 | #ifndef _SDS_DEVREG_H 2 | #define _SDS_DEVREG_H 3 | /* File: C:/workspace2015.4/pynq-mmult/SDDebug/_sds/p0/.cf_work/devreg.h */ 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void _p0_cf_register(int); 9 | void _p0_cf_unregister(int); 10 | #ifdef __cplusplus 11 | }; 12 | #endif 13 | #endif /* _SDS_DEVREG_H_ */ 14 | -------------------------------------------------------------------------------- /src/cf_stub.h: -------------------------------------------------------------------------------- 1 | #ifndef STUBS_H_ 2 | #define STUBS_H_ 3 | 4 | #include "sds_incl.h" 5 | #include "portinfo.h" 6 | #include "accel_info.h" 7 | #include "sds_lib.h" 8 | 9 | extern cf_request_handle_t _p0_request_0; 10 | extern cf_request_handle_t _p0_request_1; 11 | extern cf_request_handle_t _p0_request_2; 12 | 13 | extern size_t _p0_mmult_accel_0_num_out_C; 14 | 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/mmult_accel.h: -------------------------------------------------------------------------------- 1 | #ifndef MMULT_ACC_H_ 2 | #define MMULT_ACC_H_ 3 | 4 | #define A_NROWS 32 5 | #define A_NCOLS 32 6 | #define B_NCOLS 32 7 | #define B_NROWS A_NCOLS 8 | 9 | #pragma SDS data access_pattern(in_A:SEQUENTIAL, in_B:SEQUENTIAL, out_C:SEQUENTIAL) 10 | void mmult_accel (float in_A[A_NROWS*A_NCOLS], 11 | float in_B[A_NCOLS*B_NCOLS], 12 | float out_C[A_NROWS*B_NCOLS]); 13 | 14 | #endif /* MMULT_ACC_H_ */ 15 | 16 | -------------------------------------------------------------------------------- /src/include/xlnk-perf.h: -------------------------------------------------------------------------------- 1 | // 2 | // This header file declares the Xlnk performance benchmark APIs 3 | // 4 | 5 | #ifndef XLNK_PERF_H 6 | #define XLNK_PERF_H 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | void xlnkCounterMap(void); 12 | unsigned long xlnkGetGlobalCounter(void); 13 | unsigned long long xlnkGetGlobalCounter64(void); 14 | void xlnkSetGlobalCounter(unsigned long long val); 15 | 16 | #ifdef __cplusplus 17 | }; 18 | #endif 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /src/include/cf_trace.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_trace 2 | #define D_cf_trace 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #ifdef NDEBUG 11 | #define CF_TRACE(...) 12 | #else 13 | extern int cf_trace_enabled; 14 | #define CF_TRACE(...) do { if (cf_trace_enabled) cf_trace(__VA_ARGS__); } while(0) 15 | #endif 16 | 17 | extern void cf_trace( 18 | const char *fmt, 19 | ...); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif /* D_cf_trace */ 26 | -------------------------------------------------------------------------------- /src/include/cf_pending.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_pending 2 | #define D_cf_pending 3 | 4 | #include "cf_lib.h" 5 | #include "cf_atomic.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | typedef struct cf_pending_struct cf_pending_t; 12 | 13 | struct cf_pending_struct { 14 | cf_pending_t *next; 15 | cf_atomic_int_t on_pending; 16 | int (*run_pending)(cf_pending_t *self); 17 | }; 18 | 19 | extern void cf_add_pending(cf_pending_t *pending); 20 | extern void cf_run_pending(void); 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | #endif /* D_cf_pending */ 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pynqmmult 2 | "mmult" example using SDSoC for [PYNQ](https://github.com/Xilinx/PYNQ) board 3 | 4 | ![](blockdiagram.png) 5 | 6 | ![](resource.png) 7 | 8 | ## Future work 9 | 10 | * implementation of neural network on PYNQ 11 | * combinaton of [Chainer](https://github.com/pfnet/chainer) and PYNQ 12 | 13 | ## Thanks to 14 | 15 | * https://github.com/hackwa/pynqfire 16 | * [Adam Taylor’s MicroZed Chronicles, Part 160: Creating an SDSoC Platform for PYNQ](https://forums.xilinx.com/t5/Xcell-Daily-Blog/Adam-Taylor-s-MicroZed-Chronicles-Part-160-Creating-an-SDSoC/ba-p/738145) 17 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | CC = arm-linux-gnueabihf-gcc 2 | CPP = arm-linux-gnueabihf-g++ 3 | 4 | OBJSH = devreg.o cf_stub.o mmult_accel.o portinfo.o 5 | LIBS = -l sds_lib 6 | INC = -I include/ 7 | 8 | all: 9 | $(CC) -fPIC $(INC) -c -g3 -gstabs devreg.c 10 | $(CC) -fPIC $(INC) -c -g3 -gstabs cf_stub.c 11 | $(CC) -fPIC $(INC) -c -g3 -gstabs mmult_accel.cpp 12 | $(CC) -fPIC $(INC) -c -g3 -gstabs portinfo.c 13 | $(CPP) -g3 -gstabs -shared -fPIC -rdynamic $(OBJSH) -Wl,--start-group \ 14 | $(LIBS) -Wl,--end-group -o libpynqmmult.so 15 | rm *.o 16 | 17 | clean: 18 | rm -f *.so *.o 19 | -------------------------------------------------------------------------------- /src/include/cf_alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef CF_ALLOC_H 2 | #define CF_ALLOC_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | struct cf_alloc_attr_struct { 9 | int cacheable; /* 0 - non-cacheable, use with AFI only; 1 - cacheable, use with ACP only */ 10 | int physical_addr; 11 | }; 12 | 13 | void cf_free(void *memptr); 14 | 15 | void cf_set_mem_attr(cf_port_base_t *port, int attr); 16 | 17 | extern void *cf_alloc( 18 | size_t size, 19 | cf_alloc_attr_t *attr); 20 | 21 | extern void *cf_mmap( 22 | void *physicalAddr, 23 | size_t size, 24 | void *virtualAddr); 25 | 26 | extern void cf_munmap( 27 | void *virtualAddr); 28 | 29 | #ifdef __cplusplus 30 | }; 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /src/include/cf_util.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_util 2 | #define D_cf_util 3 | 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /* 11 | * Utility function for data movers that do not have native support 12 | * for receive with iovec. 13 | */ 14 | int cf_util_receive_iov( 15 | cf_port_receive_t *port, 16 | cf_iovec_t *iov, 17 | unsigned int iovcnt, 18 | size_t *bytes_received, 19 | cf_request_handle_t *request); 20 | 21 | /* 22 | * Utility function for data movers that do not have native support 23 | * for send with iovec. 24 | */ 25 | int cf_util_send_iov( 26 | cf_port_send_t *port, 27 | cf_iovec_t *iov, 28 | unsigned int iovcnt, 29 | cf_request_handle_t *request); 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | 35 | #endif /* D_cf_util */ 36 | -------------------------------------------------------------------------------- /src/portinfo.h: -------------------------------------------------------------------------------- 1 | #ifndef _SDS_PORTINFO_H 2 | #define _SDS_PORTINFO_H 3 | /* File: C:/workspace2015.4/pynq-mmult/SDDebug/_sds/p0/.cf_work/portinfo.h */ 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | struct _p0_swblk_mmult_accel { 9 | cf_port_send_t cmd_mmult_accel; 10 | cf_port_send_t in_A; 11 | cf_port_send_t in_B; 12 | cf_port_receive_t out_C; 13 | }; 14 | 15 | extern struct _p0_swblk_mmult_accel _p0_swinst_mmult_accel_0; 16 | void _p0_cf_framework_open(int); 17 | void _p0_cf_framework_close(int); 18 | 19 | #ifdef __cplusplus 20 | }; 21 | #endif 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | void switch_to_next_partition(int); 26 | void init_first_partition(); 27 | void close_last_partition(); 28 | #ifdef __cplusplus 29 | }; 30 | #endif /* extern "C" */ 31 | #endif /* _SDS_PORTINFO_H_ */ 32 | -------------------------------------------------------------------------------- /src/include/cf_context.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_context 2 | #define D_cf_context 3 | 4 | #include "cf_lib.h" 5 | #include "cf_atomic.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | struct cf_context_struct { 12 | /* Status code of last request */ 13 | cf_status_t status; 14 | 15 | /* Context ID of this context. */ 16 | int context_id; 17 | 18 | /* List if unused request nodes */ 19 | cf_request_info_t *request_free_list; 20 | }; 21 | 22 | extern cf_context_t *cf_get_current_context(void); 23 | extern int cf_is_valid_context(int); 24 | extern void cf_context_expect_notification(cf_context_t *context, int on); 25 | extern void cf_context_wait_for_notification(cf_context_t *context); 26 | extern void cf_context_notify(int context_id); 27 | extern void cf_context_init(void); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif /* D_cf_context */ 34 | -------------------------------------------------------------------------------- /src/include/sds_trace.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sdsoc_trace.h 3 | * 4 | * Created on: Sep 2, 2015 5 | * Author: sskalick 6 | */ 7 | 8 | #ifndef SDSOC_TRACE_H_ 9 | #define SDSOC_TRACE_H_ 10 | 11 | typedef struct trace_entry_struct { 12 | long long unsigned timestamp; 13 | unsigned type; 14 | unsigned ID; 15 | } sds_trace_entry; 16 | 17 | typedef struct trace_list_struct { 18 | sds_trace_entry *entries; 19 | size_t used; 20 | size_t size; 21 | struct trace_list_struct *next; 22 | } sds_trace_list; 23 | 24 | void trace_list_add(long long unsigned timestamp, unsigned type, unsigned ID); 25 | void sds_trace_setup(void); 26 | void sds_trace_cleanup(void); 27 | void _sds_print_trace_entry(sds_trace_entry *entry); 28 | void _sds_print_trace(void); 29 | void _sds_trace_log_HW(unsigned ID, unsigned type); 30 | void _sds_trace_log_SW(unsigned ID, unsigned type); 31 | void sds_trace(unsigned ID, unsigned type); 32 | void sds_trace_stop(); 33 | 34 | #endif /* SDSOC_TRACE_H_ */ 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 tkat0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/include/sds_perf_instrumentation.h: -------------------------------------------------------------------------------- 1 | #ifndef APF_PERF_INSTRUMENTATION_H 2 | #define APF_PERF_INSTRUMENTATION_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | struct sw_function_perf_info_struct { 9 | unsigned long long cycleCount; 10 | unsigned int entryCount; 11 | int isRoot; 12 | char *function_name; 13 | }; 14 | 15 | typedef struct sw_function_perf_info_struct sw_function_perf_info_t; 16 | 17 | struct hw_cluster_perf_info_struct { 18 | unsigned int latency; 19 | unsigned int dsp; 20 | unsigned int bram; 21 | unsigned int lut; 22 | unsigned int num_functions; 23 | unsigned int sw_latency; 24 | char *function_names[]; 25 | }; 26 | 27 | typedef struct hw_cluster_perf_info_struct hw_cluster_perf_info_t; 28 | 29 | struct resources_struct { 30 | unsigned int dsp; 31 | unsigned int bram; 32 | unsigned int lut; 33 | }; 34 | 35 | typedef struct resources_struct resources_t; 36 | 37 | void add_sw_perf_function(sw_function_perf_info_t *sw_function); 38 | void add_hw_perf_cluster(hw_cluster_perf_info_t *hw_cluster); 39 | void apf_perf_estimation_exit(void); 40 | void add_sw_estimates(void); //implementation generated by tools and linked in later 41 | void add_hw_estimates(void);//implementation generated by tools and linked in later 42 | void apf_perf_gen_report(void); 43 | 44 | #ifdef __cplusplus 45 | }; 46 | #endif 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | #*.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /src/include/cf_request.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_request 2 | #define D_cf_request 3 | 4 | #include "cf_context.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #define CF_REQUEST_INFO_SIZE (sizeof(cf_request_info_t) + 8 * sizeof(void *)) 11 | 12 | typedef struct cf_request_handlers_struct cf_request_handlers_t; 13 | typedef struct cf_request_on_free_handler_struct cf_request_on_free_handler_t; 14 | 15 | typedef enum { 16 | CF_REQ_STATE_WAITING, 17 | CF_REQ_STATE_ACTIVE, 18 | CF_REQ_STATE_DONE 19 | } cf_request_state_t; 20 | 21 | struct cf_request_handlers_struct { 22 | int (*test_req)(cf_request_info_t *); 23 | void (*release_ref)(cf_request_info_t *); 24 | }; 25 | 26 | struct cf_request_on_free_handler_struct { 27 | cf_request_on_free_handler_t *next; 28 | void (*handler)(void *client_data); 29 | void *client_data; 30 | }; 31 | 32 | struct cf_request_info_struct { 33 | cf_status_t status; 34 | cf_atomic_int_t state; 35 | cf_context_t *context; 36 | cf_request_handlers_t *handlers; 37 | cf_request_on_free_handler_t *on_free_handlers; 38 | }; 39 | 40 | extern cf_request_info_t *cf_request_alloc( 41 | size_t size, 42 | cf_request_handlers_t *handlers); 43 | 44 | extern void cf_request_add_on_free_handler( 45 | cf_request_info_t *request, 46 | void (*handler)(void *client_data), 47 | void *client_data); 48 | 49 | extern void cf_request_free( 50 | cf_request_info_t *request); 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | 56 | #endif /* D_cf_request */ 57 | -------------------------------------------------------------------------------- /src/include/axi_lite_dm.h: -------------------------------------------------------------------------------- 1 | #ifndef AXI_LITE_DM_H 2 | #define AXI_LITE_DM_H 3 | 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct axi_lite_info_struct { 11 | accel_info_t *accel_info; 12 | char *reg_name; 13 | int status_reg_offset; 14 | int data_reg_offset; 15 | int status_reg_val; // this is the value to ensure that the data_reg is valid/ready 16 | int data_type; // 0 for writing the same offset with every member of the input buf, 1 for incrementing the base offset for each member of the input buf 17 | // two additional status vals to support IO registers 18 | int read_status_reg_offset; 19 | int read_status_reg_val; 20 | }; 21 | typedef struct axi_lite_info_struct axi_lite_info_t; 22 | 23 | void axi_lite_register (void *info); 24 | void axi_lite_unregister (void *info); 25 | 26 | int axi_lite_open (cf_port_base_t *port, cf_request_handle_t *request); 27 | int axi_lite_close (cf_port_base_t *port, cf_request_handle_t *request); 28 | 29 | 30 | int axi_lite_send (cf_port_send_t *port, 31 | void *buf, 32 | size_t len, 33 | cf_request_handle_t *request); 34 | 35 | int axi_lite_recv (cf_port_receive_t *port, 36 | void *buf, 37 | size_t len, 38 | size_t *num_recd, 39 | cf_request_handle_t *request); 40 | 41 | int axi_lite_recv_ref (cf_port_receive_t *port, 42 | void **buf, 43 | size_t *len, 44 | cf_request_handle_t *request); 45 | 46 | 47 | 48 | #ifdef __cplusplus 49 | }; 50 | #endif 51 | 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /src/include/zero_copy_dm.h: -------------------------------------------------------------------------------- 1 | #ifndef ZERO_COPY_DM_H 2 | #define ZERO_COPY_DM_H 3 | 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct zero_copy_info_struct { 11 | accel_info_t *accel_info; 12 | char *reg_name; 13 | int needs_cache_flush_invalidate; 14 | int dir_chan; // XLNK_{DMA_TO_DEV,DMA_FROM_DEV,BI_DIRECTIONAL} 15 | int status_reg_offset; 16 | int data_reg_offset; 17 | int status_reg_val; // this is the value to ensure that the data_reg is valid/ready 18 | int read_status_reg_offset; 19 | int read_status_reg_val; // this is the value to ensure that the data_reg is valid/ready 20 | }; 21 | typedef struct zero_copy_info_struct zero_copy_info_t; 22 | 23 | void zero_copy_register (void *info); 24 | void zero_copy_unregister (void *info); 25 | 26 | int zero_copy_open (cf_port_base_t *port, cf_request_handle_t *request); 27 | int zero_copy_close (cf_port_base_t *port, cf_request_handle_t *request); 28 | 29 | 30 | int zero_copy_send_ref_i (cf_port_send_t *port, 31 | void *buf, 32 | size_t len, 33 | cf_request_handle_t *request); 34 | 35 | int zero_copy_send_i (cf_port_send_t *port, 36 | void *buf, 37 | size_t len, 38 | cf_request_handle_t *request); 39 | 40 | int zero_copy_recv (cf_port_receive_t *port, 41 | void *buf, 42 | size_t len, 43 | size_t *num_recd, 44 | cf_request_handle_t *request); 45 | 46 | int zero_copy_recv_ref (cf_port_receive_t *port, 47 | void **buf, 48 | size_t *len, 49 | cf_request_handle_t *request); 50 | 51 | 52 | 53 | #ifdef __cplusplus 54 | }; 55 | #endif 56 | 57 | #endif 58 | 59 | -------------------------------------------------------------------------------- /src/include/cf_sw_fifo.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_sw_fifo 2 | #define D_cf_sw_fifo 3 | 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct cf_sw_fifo_struct { 11 | unsigned int read_offset; 12 | unsigned int write_offset; 13 | unsigned int start_offset; 14 | unsigned int end_offset; 15 | unsigned int item_size; 16 | } cf_sw_fifo_t; 17 | 18 | #define CF_SW_FIFO_INIT(OBJ, DATA) { \ 19 | (char *)(DATA) - (char *)&(OBJ), \ 20 | (char *)(DATA) - (char *)&(OBJ), \ 21 | (char *)(DATA) - (char *)&(OBJ), \ 22 | (char *)(DATA) + (sizeof (DATA)) - (char *)&(OBJ), \ 23 | sizeof *(DATA) \ 24 | } 25 | 26 | #define CF_SW_FIFO_INIT2(OBJ, DATA, SIZE) do { \ 27 | (OBJ).read_offset = (char *)(DATA) - (char *)&(OBJ); \ 28 | (OBJ).write_offset = (char *)(DATA) - (char *)&(OBJ); \ 29 | (OBJ).start_offset = (char *)(DATA) - (char *)&(OBJ); \ 30 | (OBJ).end_offset = (char *)(DATA) + (SIZE) - (char *)&(OBJ); \ 31 | (OBJ).item_size = sizeof *(DATA); \ 32 | } while(0) 33 | 34 | #define CF_SW_FIFO_OBJ(NAME, TYPE, LEN) \ 35 | struct { \ 36 | cf_sw_fifo_t fifo; \ 37 | TYPE data[(LEN) + 1]; \ 38 | } NAME 39 | 40 | 41 | extern size_t cf_sw_fifo_get_available( 42 | cf_sw_fifo_t *fifo, 43 | void **start); 44 | 45 | extern void cf_sw_fifo_set_available( 46 | cf_sw_fifo_t *fifo, 47 | void *end); 48 | 49 | extern void cf_sw_fifo_reserve( 50 | cf_sw_fifo_t *fifo, 51 | void **start, 52 | void **end); 53 | 54 | extern void cf_sw_fifo_commit( 55 | cf_sw_fifo_t *fifo, 56 | void *end); 57 | 58 | #ifdef __cplusplus 59 | } 60 | #endif 61 | 62 | #endif /* D_cf_sw_fifo */ 63 | -------------------------------------------------------------------------------- /src/include/axi_dma_simple_dm.h: -------------------------------------------------------------------------------- 1 | #ifndef AXI_DMA_SIMPLE_DM_H 2 | #define AXI_DMA_SIMPLE_DM_H 3 | 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct axi_dma_simple_info_struct { 11 | int device_id; 12 | int phys_base_addr; 13 | int addr_range; 14 | int virt_base_addr; 15 | int dir; // either DMA_TO_DEV or DMA_FROM_DEV 16 | }; 17 | typedef struct axi_dma_simple_info_struct axi_dma_simple_info_t; 18 | 19 | struct axi_dma_simple_channel_info_struct { 20 | axi_dma_simple_info_t *dma_info; 21 | int in_use; 22 | int needs_cache_flush_invalidate; 23 | }; 24 | typedef struct axi_dma_simple_channel_info_struct axi_dma_simple_channel_info_t; 25 | 26 | void axi_dma_simple_register (void *info); 27 | void axi_dma_simple_unregister (void *info); 28 | 29 | int axi_dma_simple_open (cf_port_base_t *port, cf_request_handle_t *request); 30 | int axi_dma_simple_close (cf_port_base_t *port, cf_request_handle_t *request); 31 | 32 | 33 | int axi_dma_simple_send_ref_i (cf_port_send_t *port, 34 | void *buf, 35 | size_t len, 36 | cf_request_handle_t *request); 37 | 38 | int axi_dma_simple_send_i (cf_port_send_t *port, 39 | void *buf, 40 | size_t len, 41 | cf_request_handle_t *request); 42 | 43 | int axi_dma_simple_recv_i (cf_port_receive_t *port, 44 | void *buf, 45 | size_t len, 46 | size_t *num_recd, 47 | cf_request_handle_t *request); 48 | 49 | int axi_dma_simple_recv_ref_i (cf_port_receive_t *port, 50 | void **buf, 51 | size_t *len, 52 | cf_request_handle_t *request); 53 | 54 | 55 | 56 | #ifdef __cplusplus 57 | }; 58 | #endif 59 | 60 | #endif 61 | 62 | -------------------------------------------------------------------------------- /src/devreg.c: -------------------------------------------------------------------------------- 1 | /* File: C:/workspace2015.4/pynq-mmult/SDDebug/_sds/p0/.cf_work/devreg.c */ 2 | #include "cf_lib.h" 3 | #include "cf_request.h" 4 | #include "devreg.h" 5 | 6 | #include "stdio.h" // for getting printf 7 | #include "xlnk_core_cf.h" 8 | #include "accel_info.h" 9 | #include "axi_dma_simple_dm.h" 10 | #include "axi_lite_dm.h" 11 | 12 | axi_dma_simple_info_t _p0_dm_0 = { 13 | .device_id = 0, 14 | .phys_base_addr = 0x40400000, 15 | .addr_range = 0x10000, 16 | .dir = XLNK_DMA_TO_DEV, 17 | }; 18 | 19 | axi_dma_simple_info_t _p0_dm_1 = { 20 | .device_id = 1, 21 | .phys_base_addr = 0x40410000, 22 | .addr_range = 0x10000, 23 | .dir = XLNK_DMA_TO_DEV, 24 | }; 25 | 26 | axi_dma_simple_info_t _p0_dm_2 = { 27 | .device_id = 2, 28 | .phys_base_addr = 0x40420000, 29 | .addr_range = 0x10000, 30 | .dir = XLNK_DMA_FROM_DEV, 31 | }; 32 | 33 | accel_info_t _sds__p0_mmult_accel_0 = { 34 | .device_id = 3, 35 | .phys_base_addr = 0x43c00000, 36 | .addr_range = 0x10000, 37 | .ip_type = "axis_acc_adapter" 38 | }; 39 | 40 | void _p0_cf_register(int first) 41 | { 42 | int xlnk_init_done = cf_xlnk_open(first); 43 | if (xlnk_init_done == 0) { 44 | axi_dma_simple_register(&_p0_dm_0); 45 | axi_dma_simple_register(&_p0_dm_1); 46 | axi_dma_simple_register(&_p0_dm_2); 47 | accel_register(&_sds__p0_mmult_accel_0); 48 | cf_xlnk_init(first); 49 | } 50 | else if (xlnk_init_done <0) { 51 | fprintf(stderr, "ERROR: unable to open xlnk %d\n", xlnk_init_done); 52 | } 53 | else { 54 | } 55 | } 56 | 57 | void _p0_cf_unregister(int last) 58 | { 59 | axi_dma_simple_unregister(&_p0_dm_0); 60 | axi_dma_simple_unregister(&_p0_dm_1); 61 | axi_dma_simple_unregister(&_p0_dm_2); 62 | accel_unregister(&_sds__p0_mmult_accel_0); 63 | xlnkClose(last,NULL); 64 | } 65 | 66 | -------------------------------------------------------------------------------- /src/include/cf_atomic.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_atomic 2 | #define D_cf_atomic 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define CF_ATOMIC_FLAG_INIT 0 9 | 10 | typedef int cf_atomic_flag_t; 11 | typedef int cf_atomic_int_t; 12 | 13 | #define cf_memory_order_relaxed 0 14 | #define cf_memory_order_consume 1 15 | #define cf_memory_order_acquire 2 16 | #define cf_memory_order_release 3 17 | #define cf_memory_order_acq_rel 4 18 | #define cf_memory_order_seq_cst 5 19 | 20 | #if defined(__arm__) 21 | #define cf_atomic_thread_fence(ORDER) \ 22 | __asm__ __volatile__("dmb":::"memory") 23 | #else 24 | #define cf_atomic_thread_fence(ORDER) \ 25 | __asm__ __volatile__("mfence":::"memory") 26 | #endif 27 | 28 | #define cf_atomic_flag_test_and_set(FLAG) \ 29 | __sync_lock_test_and_set(&(FLAG), 1) 30 | 31 | #define cf_atomic_flag_clear(FLAG) \ 32 | __sync_lock_release(&(FLAG)) 33 | 34 | #define cf_atomic_fetch_add(OBJ, VAL) \ 35 | __sync_fetch_and_add((OBJ), (VAL)) 36 | 37 | #define cf_atomic_fetch_sub(OBJ, VAL) \ 38 | __sync_fetch_and_sub((OBJ), (VAL)) 39 | 40 | #define cf_atomic_load(OBJ) \ 41 | (__sync_synchronize(), *(OBJ)) 42 | 43 | #define cf_atomic_store(OBJ, VAL) \ 44 | (*(OBJ) = (VAL), __sync_synchronize()) 45 | 46 | #define cf_atomic_exchange(OBJ, DES) \ 47 | ({ \ 48 | typeof(OBJ) obj = (OBJ); \ 49 | typeof(*obj) des = (DES); \ 50 | typeof(*obj) expval; \ 51 | typeof(*obj) oldval = cf_atomic_load(obj); \ 52 | do { \ 53 | expval = oldval; \ 54 | oldval = __sync_val_compare_and_swap( \ 55 | obj, expval, des); \ 56 | } while (oldval != expval); \ 57 | oldval; \ 58 | }) 59 | 60 | #define cf_atomic_compare_exchange_strong(OBJ, EXP, DES) \ 61 | ({ \ 62 | typeof(OBJ) obj = (OBJ); \ 63 | typeof(EXP) exp = (EXP); \ 64 | typeof(*obj) expval = *exp; \ 65 | typeof(*obj) oldval = __sync_val_compare_and_swap( \ 66 | obj, expval, (DES)); \ 67 | *exp = oldval; \ 68 | oldval == expval; \ 69 | }) 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif /* D_cf_atomic */ 76 | -------------------------------------------------------------------------------- /src/include/axi_fifo_dm.h: -------------------------------------------------------------------------------- 1 | #ifndef AXI_FIFO_DM_H 2 | #define AXI_FIFO_DM_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include "xlnk_core_cf.h" 8 | 9 | #define AXI_FIFO_DM_TO_DEV 0 10 | #define AXI_FIFO_DM_FROM_DEV 1 11 | 12 | typedef struct axi_fifo_info_struct axi_fifo_info_t; 13 | typedef struct axi_fifo_channel_info_struct axi_fifo_channel_info_t; 14 | typedef struct axi_fifo_transaction_info_struct axi_fifo_transaction_info_t; 15 | 16 | struct axi_fifo_info_struct { 17 | char *name; 18 | int device_id; 19 | int phys_base_addr; 20 | int virt_base_addr; // Computed when registering device 21 | int addr_range; 22 | int num_channels; 23 | int dir_chan0; 24 | int dir_chan1; 25 | int poll_mode_chan0; 26 | int poll_mode_chan1; 27 | int irq_send; 28 | int irq_recv; 29 | }; 30 | 31 | struct axi_fifo_channel_info_struct { 32 | char *name; 33 | //xlnk_handle_t fifochan; 34 | axi_fifo_info_t *fifoinfo; 35 | }; 36 | 37 | /* info used for fifo_submit - adds port ID & flag to the channel info flag 38 | can be dynamic because the same submit function is used for kernel alloc, 39 | physical and virtual buffers */ 40 | struct axi_fifo_transaction_info_struct { 41 | struct axi_fifo_channel_info_struct *fifo_channel_info; 42 | int port_id; // ID of stream port - 0 if the fifo channel serves just 1 43 | // port, else a port ID 44 | int flag; 45 | xlnk_handle_t fifohandle; 46 | }; 47 | 48 | int axi_fifo_register (axi_fifo_info_t *axi_fifo_info); 49 | 50 | int axi_fifo_unregister (axi_fifo_info_t *axi_fifo_info); 51 | 52 | int axi_fifo_open (cf_port_base_t *port, cf_request_handle_t *request); 53 | int axi_fifo_close (cf_port_base_t *port, cf_request_handle_t *request); 54 | 55 | 56 | int axi_fifo_send (cf_port_send_t *port, 57 | void *buf, 58 | size_t len, 59 | cf_request_handle_t *request); 60 | 61 | int axi_fifo_recv (cf_port_receive_t *port, 62 | void *buf, 63 | size_t len, 64 | size_t *num_recd, 65 | cf_request_handle_t *request); 66 | 67 | int axi_fifo_recv_ref (cf_port_receive_t *port, 68 | void **buf, 69 | size_t *len, 70 | cf_request_handle_t *request); 71 | 72 | #ifdef __cplusplus 73 | } 74 | #endif 75 | 76 | #endif /* AXI_FIFO_DM_H */ 77 | -------------------------------------------------------------------------------- /src/include/cf_mem_stream.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_mem_stream 2 | #define D_cf_mem_stream 3 | 4 | #include "cf_sw_fifo.h" 5 | #include "cf_util.h" 6 | #include "cf_atomic.h" 7 | #include "cf_pending.h" 8 | 9 | #ifdef __cplusplus 10 | extern "C" { 11 | #endif 12 | 13 | 14 | typedef struct cf_mem_stream_struct { 15 | cf_pending_t pending; 16 | cf_atomic_int_t state; 17 | cf_sw_fifo_t send; 18 | cf_sw_fifo_t receive; 19 | } cf_mem_stream_t; 20 | 21 | 22 | #define CF_MEM_STREAM_OBJ(NAME, TYPE, LEN) \ 23 | struct { \ 24 | cf_mem_stream_t stream; \ 25 | cf_request_info_t *data_send[(LEN) + 1]; \ 26 | cf_request_info_t *data_receive[(LEN) + 1]; \ 27 | } NAME = { \ 28 | { \ 29 | { 0 }, \ 30 | 0, \ 31 | CF_SW_FIFO_INIT(NAME.stream.send, NAME.data_send), \ 32 | CF_SW_FIFO_INIT(NAME.stream.receive, NAME.data_receive), \ 33 | } \ 34 | } 35 | 36 | 37 | #define CF_MEM_STREAM_SEND_INIT(INFO) { \ 38 | { \ 39 | (INFO), \ 40 | &cf_mem_stream_send_open, \ 41 | &cf_mem_stream_send_close \ 42 | }, \ 43 | &cf_mem_stream_send_buffer, \ 44 | &cf_mem_stream_send_buffer, \ 45 | &cf_util_send_iov \ 46 | } 47 | 48 | 49 | #define CF_MEM_STREAM_RECEIVE_INIT(INFO) { \ 50 | { \ 51 | (INFO), \ 52 | &cf_mem_stream_receive_open, \ 53 | &cf_mem_stream_receive_close \ 54 | }, \ 55 | &cf_mem_stream_receive_reference, \ 56 | &cf_mem_stream_receive_buffer, \ 57 | &cf_util_receive_iov \ 58 | } 59 | 60 | 61 | /* Open sender side */ 62 | extern int cf_mem_stream_send_open( 63 | cf_port_base_t *port, 64 | cf_request_handle_t *request); 65 | 66 | 67 | /* Close sender side */ 68 | extern int cf_mem_stream_send_close( 69 | cf_port_base_t *port, 70 | cf_request_handle_t *request); 71 | 72 | 73 | /* Send buffer */ 74 | extern int cf_mem_stream_send_buffer( 75 | cf_port_send_t *port, 76 | void *buf, 77 | size_t len, 78 | cf_request_handle_t *request); 79 | 80 | 81 | /* Open receive side */ 82 | extern int cf_mem_stream_receive_open( 83 | cf_port_base_t *port, 84 | cf_request_handle_t *request); 85 | 86 | 87 | /* Close receive side */ 88 | extern int cf_mem_stream_receive_close( 89 | cf_port_base_t *port, 90 | cf_request_handle_t *request); 91 | 92 | 93 | /* Receive reference to buffer */ 94 | extern int cf_mem_stream_receive_reference( 95 | cf_port_receive_t *port, 96 | void **buf, 97 | size_t *len, 98 | cf_request_handle_t *request); 99 | 100 | 101 | /* Receive buffer */ 102 | extern int cf_mem_stream_receive_buffer( 103 | cf_port_receive_t *port, 104 | void *buf, 105 | size_t len, 106 | size_t *bytes_received, 107 | cf_request_handle_t *request); 108 | 109 | #ifdef __cplusplus 110 | } 111 | #endif 112 | 113 | #endif /* D_cf_mem_stream */ 114 | -------------------------------------------------------------------------------- /src/include/accel_info.h: -------------------------------------------------------------------------------- 1 | #ifndef ACCEL_INFO_H 2 | #define ACCEL_INFO_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | struct accel_info_struct { 8 | int device_id; /* ID of the uio device */ 9 | int phys_base_addr; /* passed in to accel_register */ 10 | int addr_range; 11 | int virt_base_addr; /* this cannot be computed at compile time, and must be generated by a call to accel_open */ 12 | int wait_flag; /* for checking if the accelerator is DONE */ 13 | unsigned int done_reg_offset; 14 | unsigned int done_reg_mask; 15 | char *ip_type; /* axis_accelerator_adapter or generic_axi_lite or other */ 16 | }; 17 | typedef struct accel_info_struct accel_info_t; 18 | 19 | /* accel_register creates the uio devices in sysfs */ 20 | void accel_register(void *info); 21 | void accel_unregister(void *info); 22 | 23 | /* accel_open opens the uio device and mmaps its base address to set the virtual_base_addr */ 24 | int accel_open(void *info); 25 | 26 | /* accel_open closes the uio device and munmaps it */ 27 | void accel_close(void *info); 28 | 29 | /* accel_wait waits for the accel to be done */ 30 | void accel_wait(void *info); 31 | 32 | /* accel_set_wait_flag sets the wait flag for the accel */ 33 | void accel_set_wait_flag(void *info); 34 | 35 | /* Check if adapter has space. Call before initiating transfer when necessary */ 36 | int accel_adapter_has_space(void *info, unsigned int offset); 37 | 38 | /* accel_get_reg_info passes back the offsets related to the named register 39 | * input 1 = void * pointer to the accel_info struct 40 | * input 2 = char * representing the register name 41 | * output 1 = offset of the register for reading/writing data 42 | * output 2 = offset of an optional status register to check before writing data 43 | * output 3 = value of the status register that indicates the main register is ready for write 44 | * output 4 = offset of an optional status register to check before reading data 45 | * output 5 = value of the status register that indicates the main register is ready for read 46 | */ 47 | void accel_get_reg_info(void *, char *, int *, int *, int *, int *, int *); 48 | 49 | /* accel_get_start_seq passes back an int array representing the command sequence to use for starting the accelerator 50 | * input 1 = number of input scalars 51 | * input 2 = number of input arrays 52 | * input 3 = number of output scalars 53 | * input 4 = number of output arrays 54 | * input 5 = run mode - 0 for single execution and 1 for continous run 55 | * output 1 = int array representing the command sequence 56 | * output 2 = number of elements in the array of output 1 57 | */ 58 | void accel_get_start_seq(int , int , int , int , int , int **, int *); 59 | 60 | /* accel_release_start_seq frees the array passed to the caller by accel_get_start_seq */ 61 | void accel_release_start_seq(int **); 62 | #ifdef __cplusplus 63 | }; 64 | #endif 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /src/mmult_accel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "mmult_accel.h" 5 | 6 | void mmult_kernel(float in_A[A_NROWS][A_NCOLS], 7 | float in_B[A_NCOLS][B_NCOLS], 8 | float out_C[A_NROWS*B_NCOLS]) 9 | { 10 | #pragma HLS INLINE self 11 | #pragma HLS array_partition variable=in_A block factor=16 dim=2 12 | #pragma HLS array_partition variable=in_B block factor=16 dim=1 13 | 14 | int index_a, index_b, index_d; 15 | 16 | for (index_a = 0; index_a < A_NROWS; index_a++) { 17 | for (index_b = 0; index_b < B_NCOLS; index_b++) { 18 | #pragma HLS PIPELINE II=1 19 | float result = 0; 20 | for (index_d = 0; index_d < A_NCOLS; index_d++) { 21 | // multiply accumulate broken into individual operators 22 | // so that AutoESL can infer two FP operators 23 | float product_term = in_A[index_a][index_d] * in_B[index_d][index_b]; 24 | result += product_term; 25 | } 26 | out_C[index_a * B_NCOLS + index_b] = result; 27 | } 28 | } 29 | } 30 | 31 | void mmult_accel (float in_A[A_NROWS*A_NCOLS], 32 | float in_B[A_NCOLS*B_NCOLS], 33 | float out_C[A_NROWS*B_NCOLS]) 34 | { 35 | int i, j; 36 | float a_buf[A_NROWS][A_NCOLS]; 37 | float b_buf[A_NCOLS][B_NCOLS]; 38 | 39 | // Transfer matrix A from multi-buffer into local RAM 40 | for(i=0; i 60 | #include 61 | #include "cf_stub.h" 62 | void _p0_mmult_accel_0(float in_A[1024], float in_B[1024], float out_C[1024]); 63 | void _p0_mmult_accel_0(float in_A[1024], float in_B[1024], float out_C[1024]) 64 | { 65 | switch_to_next_partition(0); 66 | int start_seq[3]; 67 | start_seq[0] = 0x00000000; 68 | start_seq[1] = 0x00010000; 69 | start_seq[2] = 0x00020000; 70 | cf_request_handle_t _p0_swinst_mmult_accel_0_cmd; 71 | cf_send_i(&(_p0_swinst_mmult_accel_0.cmd_mmult_accel), start_seq, 3*sizeof(int), &_p0_swinst_mmult_accel_0_cmd); 72 | cf_wait(_p0_swinst_mmult_accel_0_cmd); 73 | 74 | cf_send_i(&(_p0_swinst_mmult_accel_0.in_A), in_A, 1024 * 4, &_p0_request_0); 75 | cf_send_i(&(_p0_swinst_mmult_accel_0.in_B), in_B, 1024 * 4, &_p0_request_1); 76 | 77 | cf_receive_i(&(_p0_swinst_mmult_accel_0.out_C), out_C, 1024 * 4, &_p0_mmult_accel_0_num_out_C, &_p0_request_2); 78 | 79 | cf_wait(_p0_request_0); 80 | cf_wait(_p0_request_1); 81 | cf_wait(_p0_request_2); 82 | } 83 | 84 | 85 | -------------------------------------------------------------------------------- /src/include/axi_dma_sg_dm.h: -------------------------------------------------------------------------------- 1 | #ifndef AXI_DMA_SG_DM_H 2 | #define AXI_DMA_SG_DM_H 3 | #include "xlnk_core_cf.h" 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct axi_dma_sg_info_struct { // info used for dma_register 11 | char *name; // actually the type-name; example - xilinx-axidma - used for dma_register 12 | int seq_num; // sequence number needed by xlnk; must be unique and "complete in 0..n" 13 | int base_addr; 14 | int addr_range; 15 | int num_channels; // must be 1 or 2, and if 2, the first channel must be the send channel 16 | int dir_chan0; // either DMA_TO_DEV or DMA_FROM_DEV if num_channels is 1, otherwise this field is DMA_TO_DEV 17 | int dir_chan1; // ignored if num_channels is 1, otherwise this field is DMA_FROM_DEV 18 | int poll_mode_chan0; // this info is needed at registration time by the dma driver 19 | int poll_mode_chan1; // this info is needed at registration time by the dma driver 20 | int irq_send, irq_recv; // irq num must be consistent with hw connection and must be 0 if the channel is not present 21 | }; 22 | 23 | struct axi_dma_sg_channel_info_struct { // info used for dma_open and dma_close 24 | // struct axi_dma_sg_info_struct *dma_info; 25 | char *name; 26 | // example - xilinx-axidma.0chan0 ; 27 | // extend the name in dma_info with the seq_num & chan0 for first channel or chan1 for second channel 28 | xlnk_handle_t dmachan; 29 | int state ; // 0 for not-opened and 1 for opened 30 | }; 31 | 32 | struct axi_dma_sg_transaction_info_struct { // info used for dma_submit - adds port ID & flag to the channel info 33 | // flag can be dynamic because the same submit function is used for kernel alloc, physical and virtual buffers 34 | struct axi_dma_sg_channel_info_struct *dma_channel_info; 35 | int port_id; // ID of stream port - 0 if the dma channel serves just 1 port, else a port ID 36 | // int nappwords; // used only for send must be equal to 5 37 | // int appwords[5] ; // example - { 0, ID, 0, 0, 0} 38 | int flag; // for kernel allocated, physical, coherent, poll mode etc., as defined by xlnk - must match the dma_info_struct vallues 39 | xlnk_handle_t dmahandle; // wait handle 40 | }; 41 | 42 | typedef struct axi_dma_sg_info_struct axi_dma_sg_info_t; 43 | typedef struct axi_dma_sg_channel_info_struct axi_dma_sg_channel_info_t; 44 | typedef struct axi_dma_sg_transaction_info_struct axi_dma_sg_transaction_info_t; 45 | 46 | void axi_dma_sg_register (void *info); 47 | void axi_dma_sg_unregister (void *info); 48 | 49 | int axi_dma_sg_open (cf_port_base_t *port, cf_request_handle_t *request); 50 | int axi_dma_sg_close (cf_port_base_t *port, cf_request_handle_t *request); 51 | 52 | 53 | int axi_dma_sg_send_i (cf_port_send_t *port, 54 | void *buf, 55 | size_t len, 56 | cf_request_handle_t *request); 57 | 58 | int axi_dma_sg_recv_i (cf_port_receive_t *port, 59 | void *buf, 60 | size_t len, 61 | size_t *num_recd, 62 | cf_request_handle_t *request); 63 | 64 | int axi_dma_sg_recv_ref (cf_port_receive_t *port, 65 | void **buf, 66 | size_t *len, 67 | cf_request_handle_t *request); 68 | 69 | #ifdef __cplusplus 70 | }; 71 | #endif 72 | 73 | #endif 74 | 75 | -------------------------------------------------------------------------------- /src/include/axi_dma_2d_dm.h: -------------------------------------------------------------------------------- 1 | #ifndef AXI_DMA_2D_DM_H 2 | #define AXI_DMA_2D_DM_H 3 | 4 | #include "cf_lib.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | struct axi_dma_2d_bd_struct { 10 | unsigned int next_desc_ptr; // 31:6 -> ptr, 5:0 -> reserved 11 | unsigned int reserved1; 12 | unsigned int buffer_addr; 13 | unsigned int reserved2; 14 | unsigned int attr_bytes; // 31:28 -> ARUSER, 27:24 -> ARCACHE; TX ONLY -> 19:16 -> TUSER, 12:8 -> TID, 4:0 -> TDEST 15 | unsigned int vsize_stride; // 31:19->Vsize, 15:0 -> stride 16 | unsigned int ctrl_hsize; // 27 -> SOP, 26 -> EOP, 15:0 -> Hsize 17 | unsigned int status; // 31 -> Cmp, 30 -> DE, 29 -> SE, 28-> IE; RX ONLY -> 27->SOP, 26->EOP, 19:16 -> TUSER, 12:8->TID, 4:0->TDEST 18 | }; 19 | 20 | struct axi_dma_2d_info_struct { 21 | int device_id; 22 | int phys_base_addr; 23 | int addr_range; 24 | int intr_num; 25 | int uio_fd; 26 | int virt_base_addr; 27 | int dir; // either DMA_TO_DEV or DMA_FROM_DEV 28 | struct axi_dma_2d_bd_struct *bd_head; 29 | struct axi_dma_2d_bd_struct *bd_tail; 30 | struct axi_dma_2d_bd_struct *current_bd; 31 | unsigned int bd_head_phy; 32 | unsigned int bd_tail_phy; 33 | unsigned int current_bd_phy; 34 | int first_op; 35 | }; 36 | typedef struct axi_dma_2d_info_struct axi_dma_2d_info_t; 37 | 38 | struct axi_dma_2d_channel_info_struct { 39 | axi_dma_2d_info_t *dma_info; 40 | int in_use; 41 | int needs_cache_flush_invalidate; 42 | }; 43 | typedef struct axi_dma_2d_channel_info_struct axi_dma_2d_channel_info_t; 44 | 45 | void axi_dma_2d_register (void *info); 46 | void axi_dma_2d_unregister (void *info); 47 | 48 | int axi_dma_2d_open (cf_port_base_t *port, cf_request_handle_t *request); 49 | int axi_dma_2d_close (cf_port_base_t *port, cf_request_handle_t *request); 50 | 51 | 52 | int axi_dma_2d_send_ref_i (cf_port_send_t *port, 53 | void *buf, 54 | size_t len, 55 | cf_request_handle_t *request); 56 | 57 | int axi_dma_2d_send_i (cf_port_send_t *port, 58 | void *buf, 59 | size_t len, 60 | cf_request_handle_t *request); 61 | 62 | int axi_dma_2d_send_iov_i(cf_port_send_t *port, 63 | cf_iovec_t *iov, 64 | unsigned int iovcnt, 65 | cf_request_handle_t *request); 66 | 67 | int axi_dma_2d_send2d_i (cf_port_send_t *port, 68 | void *buf, 69 | size_t xsize, 70 | size_t numlines, 71 | size_t xstride, 72 | cf_request_handle_t *request); 73 | 74 | int axi_dma_2d_recv_i (cf_port_receive_t *port, 75 | void *buf, 76 | size_t len, 77 | size_t *num_recd, 78 | cf_request_handle_t *request); 79 | 80 | int axi_dma_2d_recv_iov_i(cf_port_receive_t *port, 81 | cf_iovec_t *iov, 82 | unsigned int iovcnt, 83 | size_t *bytes_received, 84 | cf_request_handle_t *request); 85 | 86 | int axi_dma_2d_recv2d_i (cf_port_receive_t *port, 87 | void *buf, 88 | size_t xsize, 89 | size_t numlines, 90 | size_t xstride, 91 | cf_request_handle_t *request); 92 | 93 | int axi_dma_2d_recv_ref_i (cf_port_receive_t *port, 94 | void **buf, 95 | size_t *len, 96 | cf_request_handle_t *request); 97 | 98 | 99 | 100 | #ifdef __cplusplus 101 | }; 102 | #endif 103 | 104 | #endif 105 | 106 | -------------------------------------------------------------------------------- /src/include/sds_lib.h: -------------------------------------------------------------------------------- 1 | #ifndef D_sds_lib 2 | #define D_sds_lib 3 | 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | /* wait for the first request list in the queue identified by id, to complete */ 9 | extern void sds_wait( unsigned int id); 10 | 11 | /* allocate a physically contiguous array of size bytes for DMA transfers */ 12 | extern void *sds_alloc( size_t size); 13 | 14 | /* allocate a physically contiguous array of size bytes for DMA transfers. 15 | Same as sds_alloc() */ 16 | extern void *sds_alloc_cacheable( size_t size); 17 | 18 | /* allocate a physically contiguous array of size bytes for DMA transfers, but 19 | mark the pages as non-cacheable */ 20 | extern void *sds_alloc_non_cacheable( size_t size); 21 | 22 | /* free an array allocated through sds_alloc */ 23 | extern void sds_free(void *memptr); 24 | 25 | /* Create a virtual address mapping to access a memory of size size bytes located at physical address physical_addr 26 | physical_addr: physical address to be mapped 27 | size: size of physical address to be mapped 28 | virtual_addr: If a non-null value is passed in, it is considered to be 29 | the virtual-address already mapped to the physical_addr, and cf_mmap keeps track of the mapping 30 | If a null value is passed in, cf_mmap invokes mmap() to generate the virtual address, and 31 | virtual_addr is assigned this value */ 32 | extern void *sds_mmap( void *physical_addr, size_t size, void *virtual_addr); 33 | 34 | /* register a handle between a given userspace virtual address and an FD that 35 | corresponds to a handle to a GEM-allocated buffer */ 36 | extern int sds_register_dmabuf(void *virtual_addr, int fd); 37 | 38 | /* unregister a handle between a given userspace virtual address and an FD that 39 | corresponds to a handle to a GEM-allocated buffer that was previously 40 | registered by using sds_register_dmabuf() */ 41 | extern int sds_unregister_dmabuf(void *virtual_addr, int fd); 42 | 43 | /* unmaps a virtual address mapped associated with a physical address using sds_mmap() */ 44 | extern void sds_munmap( void *virtual_addr); 45 | 46 | /* returns the value associated with a free-running counter used for fine grain time-interval measurements 47 | The counter increments on every processor clock, and wraps to 0 */ 48 | extern unsigned long long sds_clock_counter(void); 49 | 50 | /* 32 bit version of sds_clock_counter() */ 51 | extern unsigned long sds_clock_counter32(void); 52 | 53 | /* stops the global counter, sets the global counter to the given value, then starts it running again from the given value */ 54 | extern void sds_set_counter(unsigned long long val); 55 | 56 | /* do not use - this is for internal use only, and will be removed from this header */ 57 | /* function called by automatically generated stub code to insert a handle onto a queue 58 | The user then calls sds_wait with the same id to wait for the request to complete 59 | paramters: id: queue ID 60 | req: opaque pointer to a request list allocated by the function 61 | num: number of items in the request list 62 | */ 63 | extern void sds_insert_req( unsigned int id, void *req, int num); 64 | 65 | /* Trace Event Types */ 66 | #define EVENT_START 0x04 67 | #define EVENT_STOP 0x05 68 | 69 | #ifdef __cplusplus 70 | } 71 | #endif 72 | 73 | #endif /* D_sds_lib */ 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/include/xlnk_core_cf.h: -------------------------------------------------------------------------------- 1 | #ifndef XLNK_CORE_CF_H 2 | #define XLNK_CORE_CF_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | // the following are for xlnkDMARegister 8 | #define XLNK_DMA_TO_DEV 0 9 | #define XLNK_DMA_FROM_DEV 1 10 | #define XLNK_BI_DIRECTIONAL 2 11 | // the following are from xlnk-os.h (but the DMA_TO and DMA_FROM flags are not 12 | #define XLNK_FLAG_COHERENT 0x00000001 13 | #define XLNK_FLAG_KERNEL_BUFFER 0x00000002 14 | #define XLNK_FLAG_DMAPOLLING 0x00000004 15 | #define XLNK_FLAG_PHYSICAL_ADDR 0x00000100 16 | #define XLNK_FLAG_VIRTUAL_ADDR 0x00000200 17 | 18 | #define CF_FLAG_CACHE_FLUSH_INVALIDATE 0x00000001 19 | #define CF_FLAG_PHYSICALLY_CONTIGUOUS 0x00000002 20 | #define CF_FLAG_DMAPOLLING 0x00000004 21 | 22 | typedef unsigned int xlnk_handle_t; 23 | 24 | extern void *xlnkAllocBufInternal(size_t len, int cacheable); 25 | extern void *xlnk_mmap2(void *phy_addr, unsigned int size, void *virt_addr); 26 | extern void xlnk_munmap2(void *buf); 27 | extern int xlnk_munmap(unsigned int virt_addr, unsigned int size); 28 | extern void *xlnkAllocBuf(size_t len); 29 | extern void xlnkFreeBuf(void *buf); 30 | extern unsigned int xlnkGetBufPhyAddr(void *buf); 31 | extern unsigned int xlnkGetBufPhyAddrAndCacheable(void *buf, int *cacheable); 32 | extern void xlnkFlushCache(void *buf, int size); 33 | extern void xlnkInvalidateCache(void *addr, int size); 34 | 35 | 36 | extern int xlnkDMARequest(char *name, xlnk_handle_t *dmachan); 37 | extern int xlnkDMARelease(xlnk_handle_t dmachan); 38 | extern int xlnkDMASubmit(xlnk_handle_t dmachan, 39 | void *buf, 40 | unsigned int len, 41 | unsigned int dmadir, 42 | unsigned int nappwords_i, 43 | unsigned int *appwords_i, 44 | unsigned int nappwords_o, 45 | unsigned int flag, 46 | xlnk_handle_t *dmahandle); 47 | extern int xlnkDMAWait(xlnk_handle_t dmahandle, 48 | unsigned int nappwords_o, 49 | unsigned int *appwords_o); 50 | extern int xlnkDmaRegister(char *name, 51 | unsigned int id, 52 | unsigned long base, 53 | unsigned int size, 54 | unsigned int chan_num, 55 | unsigned int chan0_dir, 56 | unsigned int chan0_irq, 57 | unsigned int chan0_poll_mode, 58 | unsigned int chan0_include_dre, 59 | unsigned int chan0_data_width, 60 | unsigned int chan1_dir, 61 | unsigned int chan1_irq, 62 | unsigned int chan1_poll_mode, 63 | unsigned int chan1_include_dre, 64 | unsigned int chan1_data_width); 65 | extern void xlnkDmaUnregister(unsigned long base); 66 | extern int xlnkDevRegister(char *name, 67 | unsigned int id, 68 | unsigned long base, 69 | unsigned int size, 70 | unsigned int irq0, 71 | unsigned int irq1, 72 | unsigned int irq2, 73 | unsigned int irq3); 74 | void xlnkDevUnregister(unsigned long base); 75 | 76 | // return 0 if device registration has to be done, 1 if device registration has been done already and <0 for error 77 | int cf_xlnk_open(int); 78 | // second half of xlnkOpen, to be called after device registration if cfXlnkOpen returns 1 79 | void cf_xlnk_init(int); 80 | 81 | void xlnkClose(int, void *); 82 | unsigned int xlnkUioMap(int uio_id, unsigned int phys_base, unsigned int addr_range); 83 | void xlnkUioUnMap(unsigned int virt_base, unsigned int addr_range); 84 | void xlnkUioWrite32(void *base, unsigned int offset, unsigned int data); 85 | unsigned int xlnkUioRead32(void *base, unsigned int offset); 86 | unsigned long xlnkGetGlobalCounter(void); 87 | unsigned long long xlnkGetGlobalCounter64(void); 88 | void xlnkSetGlobalCounter(unsigned long long val); 89 | 90 | #ifdef __cplusplus 91 | }; 92 | #endif 93 | #endif 94 | 95 | -------------------------------------------------------------------------------- /src/mmult.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "mmult_accel.h" 5 | 6 | #define NUM_TESTS 1024 7 | 8 | #include "sds_lib.h" 9 | #define TIME_STAMP_INIT unsigned long long clock_start, clock_end; clock_start = sds_clock_counter(); 10 | #define TIME_STAMP_SW { clock_end = sds_clock_counter(); printf("Average number of processor cycles for golden version: %llu \n", (clock_end-clock_start)/NUM_TESTS); clock_start = sds_clock_counter(); } 11 | #define TIME_STAMP_ACCEL { clock_end = sds_clock_counter(); printf("Average number of processor cycles for hardware version: %llu \n", (clock_end-clock_start)/NUM_TESTS); } 12 | 13 | static void mmult_init(float *tin1Buf, float *tin2Buf, float *toutBufSw, float *toutBufHw) 14 | { 15 | int i, j; 16 | 17 | for (i = 0; i < A_NROWS; i++) { 18 | for (j = 0; j < A_NCOLS; j++) { 19 | tin1Buf[i * A_NCOLS + j] = 1+i*A_NCOLS+j; 20 | } 21 | } 22 | for (i = 0; i < B_NROWS; i++) { 23 | for (j = 0; j < B_NCOLS; j++) { 24 | tin2Buf[i * B_NCOLS + j] = rand() % (A_NCOLS * B_NCOLS); 25 | } 26 | } 27 | for (i = 0; i < A_NROWS; i++) { 28 | for (j = 0; j < B_NCOLS; j++) { 29 | toutBufSw[i * B_NCOLS + j] = 0.0; 30 | toutBufHw[i * B_NCOLS + j] = 0.0; 31 | } 32 | } 33 | } 34 | 35 | void mmult_golden(float *in_arr, float *in_arr2, float *out_arr) 36 | { 37 | for (int row = 0; row < A_NROWS; row++) { 38 | for (int col = 0; col < B_NCOLS; col++) { 39 | float result = 0.0; 40 | for (int k = 0; k < A_NCOLS; k++) { 41 | result += in_arr[row*A_NCOLS+k] * in_arr2[k*B_NCOLS+col]; 42 | } 43 | out_arr[row*A_NCOLS+col] = result; 44 | } 45 | } 46 | } 47 | 48 | static int mmult_result_check(float *toutBufSw, float *toutBufHw) 49 | { 50 | int i; 51 | 52 | for (i = 0; i < A_NROWS * B_NCOLS; i++) { 53 | if (toutBufSw[i] != toutBufHw[i]) { 54 | printf("Mismatch: data index=%d d=%f, dout=%f\n", i, toutBufSw[i], toutBufHw[i]); 55 | return 0; 56 | } 57 | } 58 | return 1; 59 | } 60 | 61 | void _p0_mmult_accel_0(float in_A[1024], float in_B[1024], float out_C[1024]); 62 | int mmult_test(float *tin1Buf, float *tin2Buf, float *toutBufSw, float *toutBufHw) 63 | { 64 | int i; 65 | 66 | printf("Testing mmult ...\n"); 67 | 68 | mmult_init(tin1Buf, tin2Buf, toutBufSw, toutBufHw); 69 | 70 | TIME_STAMP_INIT 71 | 72 | for (i = 0; i < NUM_TESTS; i++) { 73 | mmult_golden(tin1Buf, tin2Buf, toutBufSw); 74 | } 75 | 76 | TIME_STAMP_SW 77 | 78 | for (i = 0; i < NUM_TESTS; i++) 79 | _p0_mmult_accel_0(tin1Buf, tin2Buf, toutBufHw); 80 | 81 | TIME_STAMP_ACCEL 82 | 83 | return mmult_result_check(toutBufSw, toutBufHw); 84 | } 85 | 86 | 87 | int main(int argc, char* argv[]){ 88 | int test_passed = 0; 89 | float *tin1Buf, *tin2Buf, *toutBufSw, *toutBufHw; 90 | 91 | printf("Testing with A_NROWS = A_NCOLS = B_NCOLS = B_NROWS = %d\n", A_NROWS); 92 | 93 | tin1Buf = (float *)sds_alloc(A_NROWS * A_NCOLS * sizeof(float)); 94 | tin2Buf = (float *)sds_alloc(A_NCOLS * B_NCOLS * sizeof(float)); 95 | toutBufHw = (float *)sds_alloc(A_NROWS * B_NCOLS * sizeof(float)); 96 | toutBufSw = (float *)sds_alloc(A_NROWS * B_NCOLS * sizeof(float)); 97 | 98 | if (!tin1Buf || !tin2Buf || !toutBufHw || !toutBufSw) { 99 | if (tin1Buf) sds_free(tin1Buf); 100 | if (tin2Buf) sds_free(tin2Buf); 101 | if (toutBufHw) sds_free(toutBufHw); 102 | if (toutBufSw) sds_free(toutBufSw); 103 | return 2; 104 | } 105 | 106 | test_passed = mmult_test(tin1Buf, tin2Buf, toutBufSw, toutBufHw); 107 | 108 | sds_free(tin1Buf); 109 | sds_free(tin2Buf); 110 | sds_free(toutBufHw); 111 | sds_free(toutBufSw); 112 | 113 | printf("TEST %s\n", test_passed ? "PASSED" : "FAILED"); 114 | 115 | return (test_passed ? 0 : -1); 116 | } 117 | -------------------------------------------------------------------------------- /src/portinfo.c: -------------------------------------------------------------------------------- 1 | /* File: C:/workspace2015.4/pynq-mmult/SDDebug/_sds/p0/.cf_work/portinfo.c */ 2 | #include "cf_lib.h" 3 | #include "cf_request.h" 4 | #include "devreg.h" 5 | 6 | #include "portinfo.h" 7 | 8 | #include "stdio.h" // for printf 9 | 10 | #include "xlnk_core_cf.h" 11 | #include "accel_info.h" 12 | #include "axi_dma_simple_dm.h" 13 | #include "axi_lite_dm.h" 14 | 15 | extern axi_dma_simple_info_t _p0_dm_0; 16 | extern axi_dma_simple_info_t _p0_dm_1; 17 | extern axi_dma_simple_info_t _p0_dm_2; 18 | extern accel_info_t _sds__p0_mmult_accel_0; 19 | 20 | axi_lite_info_t _p0_swinst_mmult_accel_0_cmd_mmult_accel_info = { 21 | .accel_info = &_sds__p0_mmult_accel_0, 22 | .reg_name = "0x28" 23 | }; 24 | 25 | axi_dma_simple_channel_info_t _p0_swinst_mmult_accel_0_in_A_info = { 26 | .dma_info = &_p0_dm_1, 27 | .in_use = 0, 28 | .needs_cache_flush_invalidate = 0 29 | }; 30 | 31 | axi_dma_simple_channel_info_t _p0_swinst_mmult_accel_0_in_B_info = { 32 | .dma_info = &_p0_dm_0, 33 | .in_use = 0, 34 | .needs_cache_flush_invalidate = 0 35 | }; 36 | 37 | axi_dma_simple_channel_info_t _p0_swinst_mmult_accel_0_out_C_info = { 38 | .dma_info = &_p0_dm_2, 39 | .in_use = 0, 40 | .needs_cache_flush_invalidate = 0 41 | }; 42 | 43 | struct _p0_swblk_mmult_accel _p0_swinst_mmult_accel_0 = { 44 | .cmd_mmult_accel = { .base = { 45 | .channel_info = &_p0_swinst_mmult_accel_0_cmd_mmult_accel_info, 46 | .open_i = &axi_lite_open, 47 | .close_i = &axi_lite_close }, 48 | .send_i = &axi_lite_send }, 49 | .in_A = { .base = { 50 | .channel_info = &_p0_swinst_mmult_accel_0_in_A_info, 51 | .open_i = &axi_dma_simple_open, 52 | .close_i = &axi_dma_simple_close }, 53 | .send_i = &axi_dma_simple_send_i }, 54 | .in_B = { .base = { 55 | .channel_info = &_p0_swinst_mmult_accel_0_in_B_info, 56 | .open_i = &axi_dma_simple_open, 57 | .close_i = &axi_dma_simple_close }, 58 | .send_i = &axi_dma_simple_send_i }, 59 | .out_C = { .base = { 60 | .channel_info = &_p0_swinst_mmult_accel_0_out_C_info, 61 | .open_i = &axi_dma_simple_open, 62 | .close_i = &axi_dma_simple_close }, 63 | .receive_ref_i = 0, 64 | .receive_i = &axi_dma_simple_recv_i }, 65 | }; 66 | 67 | void _p0_cf_open_port (cf_port_base_t *port) 68 | { 69 | port->open_i(port, NULL); 70 | } 71 | 72 | void _p0_cf_framework_open(int first) 73 | { 74 | cf_context_init(); 75 | xlnkCounterMap(); 76 | _p0_cf_register(first); 77 | cf_get_current_context(); 78 | accel_open(&_sds__p0_mmult_accel_0); 79 | _p0_cf_open_port( &_p0_swinst_mmult_accel_0.cmd_mmult_accel.base ); 80 | _p0_cf_open_port( &_p0_swinst_mmult_accel_0.in_A.base ); 81 | _p0_cf_open_port( &_p0_swinst_mmult_accel_0.in_B.base ); 82 | _p0_cf_open_port( &_p0_swinst_mmult_accel_0.out_C.base ); 83 | } 84 | 85 | void _p0_cf_framework_close(int last) 86 | { 87 | cf_close_i( &_p0_swinst_mmult_accel_0.cmd_mmult_accel, NULL); 88 | cf_close_i( &_p0_swinst_mmult_accel_0.in_A, NULL); 89 | cf_close_i( &_p0_swinst_mmult_accel_0.in_B, NULL); 90 | cf_close_i( &_p0_swinst_mmult_accel_0.out_C, NULL); 91 | accel_close(&_sds__p0_mmult_accel_0); 92 | _p0_cf_unregister(last); 93 | } 94 | 95 | #define TOTAL_PARTITIONS 1 96 | int current_partition_num = 0; 97 | struct { 98 | void (*open)(int); 99 | void (*close)(int); 100 | } 101 | 102 | _ptable[TOTAL_PARTITIONS] = { 103 | {.open = &_p0_cf_framework_open, .close= &_p0_cf_framework_close}, 104 | }; 105 | 106 | void switch_to_next_partition(int partition_num) 107 | { 108 | #ifdef __linux__ 109 | if (current_partition_num != partition_num) { 110 | _ptable[current_partition_num].close(0); 111 | char buf[128]; 112 | sprintf(buf, "cat /mnt/_sds/_p%d_.bin > /dev/xdevcfg", partition_num); 113 | system(buf); 114 | _ptable[partition_num].open(0); 115 | current_partition_num = partition_num; 116 | } 117 | #endif 118 | } 119 | 120 | void init_first_partition() __attribute__ ((constructor)); 121 | void close_last_partition() __attribute__ ((destructor)); 122 | void init_first_partition() 123 | { 124 | current_partition_num = 0; 125 | _ptable[current_partition_num].open(1); 126 | 127 | sds_trace_setup(); 128 | } 129 | 130 | 131 | void close_last_partition() 132 | { 133 | #ifdef PERF_EST 134 | apf_perf_estimation_exit(); 135 | #endif 136 | sds_trace_cleanup(); 137 | _ptable[current_partition_num].close(1); 138 | current_partition_num = 0; 139 | } 140 | 141 | -------------------------------------------------------------------------------- /notebooks/pynqmmult.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### mmult on PYNQ\n", 8 | "##### Source : https://github.com/tkat0/pynqmmult\n", 9 | "\n", 10 | "* SDSoC付属の内積演算サンプルをPYNQで動かしてみた\n", 11 | "* ドライバは共有ライブラリとしてビルドし、CFFIを利用してPythonから呼び出す\n", 12 | "* CMAでアロケートした連続領域をnumpyのndarrayとして扱えるようにした\n", 13 | "* 開発環境は、SDSoC 2015.4" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "import cffi\n", 26 | "from pynq import Overlay\n", 27 | "# load Base Overlay\n", 28 | "Overlay(\"/home/xilinx/pynq/bitstream/base.bit\").download()\n", 29 | "\n", 30 | "from pynq.drivers import xlnk" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "000121e4 B _p0_mmult_accel_0_num_out_C\n", 45 | "00012168 D _p0_swinst_mmult_accel_0\n", 46 | "00012124 D _p0_swinst_mmult_accel_0_cmd_mmult_accel_info\n", 47 | "00012144 D _p0_swinst_mmult_accel_0_in_A_info\n", 48 | "00012150 D _p0_swinst_mmult_accel_0_in_B_info\n", 49 | "0001215c D _p0_swinst_mmult_accel_0_out_C_info\n", 50 | "00012104 D _sds__p0_mmult_accel_0\n", 51 | "00001318 T mmult_accel(float*, float*, float*)\n", 52 | "000014c0 T _p0_mmult_accel_0(float*, float*, float*)\n", 53 | "---\n", 54 | "000121e4 B _p0_mmult_accel_0_num_out_C\n", 55 | "00012168 D _p0_swinst_mmult_accel_0\n", 56 | "00012124 D _p0_swinst_mmult_accel_0_cmd_mmult_accel_info\n", 57 | "00012144 D _p0_swinst_mmult_accel_0_in_A_info\n", 58 | "00012150 D _p0_swinst_mmult_accel_0_in_B_info\n", 59 | "0001215c D _p0_swinst_mmult_accel_0_out_C_info\n", 60 | "00012104 D _sds__p0_mmult_accel_0\n", 61 | "00001318 T _Z11mmult_accelPfS_S_\n", 62 | "000014c0 T _Z17_p0_mmult_accel_0PfS_S_\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "# SDSoCでビルドした共有ライブラリから、HW化した関数の名前を把握する\n", 68 | "# SW版の関数名は、mmult_accelだが、HW化すると_p0_mmult_accel_0となる。\n", 69 | "# さらに、コンパイラの名前マングリングにより_Z17_p0_mmult_accel_0PfS_S_になるよう\n", 70 | "!nm -C ../src/libpynqmmult.so | grep mmult_accel\n", 71 | "!echo ---\n", 72 | "!nm -D ../src/libpynqmmult.so | grep mmult_accel" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "### Call Accelerator" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "class Mmult():\n", 91 | " def __init__(self):\n", 92 | " self.bitfile = \"./pynqmmult.bit\"\n", 93 | " self.libfile = \"../src/libpynqmmult.so\"\n", 94 | " self.ffi = cffi.FFI()\n", 95 | " self.ffi.cdef(\"void _Z17_p0_mmult_accel_0PfS_S_(float*, float*, float*);\")\n", 96 | " self.lib = self.ffi.dlopen(self.libfile)\n", 97 | " Overlay(self.bitfile).download()\n", 98 | " \n", 99 | " def __call__(self, a, b, c):\n", 100 | " # a,b,c is CData Object\n", 101 | " self.lib._Z17_p0_mmult_accel_0PfS_S_(a,b,c)\n" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "mmult = Mmult()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "# SDSoCにより高位合成したHWにDMA(not SG)経由で読み書きするメモリ領域は連続領域である必要がある\n", 124 | "# 連続領域は、CMAのAPIにより、確保する\n", 125 | "# 連続領域をndarrayとして扱う\n", 126 | "\n", 127 | "memmanager = xlnk.xlnk()\n", 128 | "ffi = cffi.FFI()\n", 129 | "\n", 130 | " # TODO 現状32x32の配列のみ対応。汎用化する\n", 131 | "def init_contiguous_ndarray(size=(32,32), dtype=\"float\"):\n", 132 | " buf = memmanager.cma_alloc(32*32, data_type=dtype)\n", 133 | " cbuf = ffi.buffer(buf, 32*32 * ffi.sizeof(dtype))\n", 134 | " return np.frombuffer(cbuf, dtype=np.float32).reshape(size), buf" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "A (32, 32) \n", 149 | "[[ 1. 1. 1. ..., 1. 1. 1.]\n", 150 | " [ 1. 1. 1. ..., 1. 1. 1.]\n", 151 | " [ 1. 1. 1. ..., 1. 1. 1.]\n", 152 | " ..., \n", 153 | " [ 1. 1. 1. ..., 1. 1. 1.]\n", 154 | " [ 1. 1. 1. ..., 1. 1. 1.]\n", 155 | " [ 1. 1. 1. ..., 1. 1. 1.]]\n", 156 | "B (32, 32) \n", 157 | "[[ 2. 2. 2. ..., 2. 2. 2.]\n", 158 | " [ 2. 2. 2. ..., 2. 2. 2.]\n", 159 | " [ 2. 2. 2. ..., 2. 2. 2.]\n", 160 | " ..., \n", 161 | " [ 2. 2. 2. ..., 2. 2. 2.]\n", 162 | " [ 2. 2. 2. ..., 2. 2. 2.]\n", 163 | " [ 2. 2. 2. ..., 2. 2. 2.]]\n", 164 | "C (32, 32) \n", 165 | "[[ 0. 0. 0. ..., 0. 0. 0.]\n", 166 | " [ 0. 0. 0. ..., 0. 0. 0.]\n", 167 | " [ 0. 0. 0. ..., 0. 0. 0.]\n", 168 | " ..., \n", 169 | " [ 0. 0. 0. ..., 0. 0. 0.]\n", 170 | " [ 0. 0. 0. ..., 0. 0. 0.]\n", 171 | " [ 0. 0. 0. ..., 0. 0. 0.]]\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "a, pa = init_contiguous_ndarray()\n", 177 | "b, pb = init_contiguous_ndarray()\n", 178 | "c, pc = init_contiguous_ndarray()\n", 179 | "\n", 180 | "a += 1\n", 181 | "b += 2\n", 182 | "\n", 183 | "print(\"A\", a.shape, type(a))\n", 184 | "print(a)\n", 185 | "print(\"B\", b.shape, type(b))\n", 186 | "print(b)\n", 187 | "print(\"C\", c.shape, type(c))\n", 188 | "print(c)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 7, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "mmult(pa, pb, pc)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 8, 205 | "metadata": { 206 | "collapsed": false 207 | }, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "C (32, 32) \n", 214 | "[[ 64. 64. 64. ..., 64. 64. 64.]\n", 215 | " [ 64. 64. 64. ..., 64. 64. 64.]\n", 216 | " [ 64. 64. 64. ..., 64. 64. 64.]\n", 217 | " ..., \n", 218 | " [ 64. 64. 64. ..., 64. 64. 64.]\n", 219 | " [ 64. 64. 64. ..., 64. 64. 64.]\n", 220 | " [ 64. 64. 64. ..., 64. 64. 64.]]\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "print(\"C\", c.shape, type(c))\n", 226 | "print(c)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "### Test" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 9, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "array([[ 64., 64., 64., ..., 64., 64., 64.],\n", 247 | " [ 64., 64., 64., ..., 64., 64., 64.],\n", 248 | " [ 64., 64., 64., ..., 64., 64., 64.],\n", 249 | " ..., \n", 250 | " [ 64., 64., 64., ..., 64., 64., 64.],\n", 251 | " [ 64., 64., 64., ..., 64., 64., 64.],\n", 252 | " [ 64., 64., 64., ..., 64., 64., 64.]], dtype=float32)" 253 | ] 254 | }, 255 | "execution_count": 9, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "# For comparison\n", 262 | "np.dot(a,b)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 10, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [ 272 | { 273 | "name": "stdout", 274 | "output_type": "stream", 275 | "text": [ 276 | "OK\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "# SWとHWの計算結果の全ての要素が一致することを確認\n", 282 | "if np.alltrue(c == np.dot(a, b)):\n", 283 | " print(\"OK\")\n", 284 | "else:\n", 285 | " print(\"NG\")" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "### Benchmarks" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 11, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "name": "stdout", 304 | "output_type": "stream", 305 | "text": [ 306 | "100 loops, best of 3: 33.6 µs per loop\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "# HW\n", 312 | "t_hw = %timeit -n 100 -o mmult(pa, pb, pc)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 12, 318 | "metadata": { 319 | "collapsed": false 320 | }, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "100 loops, best of 3: 7.36 ms per loop\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "# SW\n", 332 | "t_sw = %timeit -n 100 -o np.dot(a, b)" 333 | ] 334 | } 335 | ], 336 | "metadata": { 337 | "kernelspec": { 338 | "display_name": "Python 3", 339 | "language": "python", 340 | "name": "python3" 341 | }, 342 | "language_info": { 343 | "codemirror_mode": { 344 | "name": "ipython", 345 | "version": 3 346 | }, 347 | "file_extension": ".py", 348 | "mimetype": "text/x-python", 349 | "name": "python", 350 | "nbconvert_exporter": "python", 351 | "pygments_lexer": "ipython3", 352 | "version": "3.4.3+" 353 | } 354 | }, 355 | "nbformat": 4, 356 | "nbformat_minor": 0 357 | } 358 | -------------------------------------------------------------------------------- /src/include/cf_lib.h: -------------------------------------------------------------------------------- 1 | #ifndef D_cf_lib 2 | #define D_cf_lib 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | typedef int cf_status_t; 12 | typedef struct cf_context_struct cf_context_t; 13 | typedef struct cf_request_handle_struct *cf_request_handle_t; 14 | typedef struct cf_request_info_struct cf_request_info_t; 15 | typedef struct cf_port_base_struct cf_port_base_t; 16 | typedef struct cf_port_receive_struct cf_port_receive_t; 17 | typedef struct cf_port_send_struct cf_port_send_t; 18 | typedef struct cf_port_addressable_struct cf_port_addressable_t; 19 | typedef struct cf_alloc_attr_struct cf_alloc_attr_t; 20 | typedef struct cf_iovec_struct cf_iovec_t; 21 | 22 | 23 | /* 24 | * Error codes 25 | */ 26 | enum { 27 | CF_STATUS_OKAY, 28 | CF_STATUS_PENDING, 29 | CF_STATUS_EMPTY, 30 | CF_STATUS_FULL, 31 | CF_STATUS_INVALID, 32 | CF_STATUS_NOMEM, 33 | CF_STATUS_ALREADY_OPEN, 34 | CF_STATUS_NOT_OPEN, 35 | CF_STATUS_PORT_BUSY, 36 | CF_STATUS_EOS 37 | }; 38 | 39 | 40 | /* 41 | * cf_addressable_map flags 42 | */ 43 | enum { 44 | CF_MAP_READ = 1, /* Enable read access */ 45 | CF_MAP_WRITE = 2, /* Enable write access */ 46 | CF_MAP_ATOMIC = 4 /* Enable atomic operations */ 47 | }; 48 | 49 | 50 | /* 51 | * Common port object 52 | */ 53 | struct cf_port_base_struct { 54 | void *channel_info; 55 | int (*open_i)( 56 | cf_port_base_t *port, 57 | cf_request_handle_t *request); 58 | int (*close_i)( 59 | cf_port_base_t *port, 60 | cf_request_handle_t *request); 61 | }; 62 | 63 | 64 | /* 65 | * Stream receiver port 66 | */ 67 | struct cf_port_receive_struct { 68 | cf_port_base_t base; 69 | int (*receive_ref_i)( 70 | cf_port_receive_t *port, 71 | void **buf, 72 | size_t *len, 73 | cf_request_handle_t *request); 74 | int (*receive_i)( 75 | cf_port_receive_t *port, 76 | void *buf, 77 | size_t len, 78 | size_t *bytes_received, 79 | cf_request_handle_t *request); 80 | int (*receive_iov_i)( 81 | cf_port_receive_t *port, 82 | cf_iovec_t *iov, 83 | unsigned int iovcnt, 84 | size_t *bytes_received, 85 | cf_request_handle_t *request); 86 | }; 87 | 88 | 89 | /* 90 | * Stream sender port 91 | */ 92 | struct cf_port_send_struct { 93 | cf_port_base_t base; 94 | int (*send_ref_i)( 95 | cf_port_send_t *port, 96 | void *buf, 97 | size_t len, 98 | cf_request_handle_t *request); 99 | int (*send_i)( 100 | cf_port_send_t *port, 101 | void *buf, 102 | size_t len, 103 | cf_request_handle_t *request); 104 | int (*send_iov_i)( 105 | cf_port_send_t *port, 106 | cf_iovec_t *iov, 107 | unsigned int iovcnt, 108 | cf_request_handle_t *request); 109 | }; 110 | 111 | 112 | /* 113 | * Addressable master port 114 | */ 115 | struct cf_port_addressable_struct { 116 | cf_port_base_t base; 117 | int (*getbuf_i)( 118 | cf_port_addressable_t *port, 119 | size_t offset, 120 | void *buf, 121 | size_t len, 122 | cf_request_handle_t *request); 123 | int (*setbuf_i)( 124 | cf_port_addressable_t *port, 125 | size_t offset, 126 | void *buf, 127 | size_t len, 128 | cf_request_handle_t *request); 129 | int (*alloc_i)( 130 | cf_port_addressable_t *port, 131 | size_t size, 132 | size_t *offset, 133 | cf_request_handle_t *request); 134 | int (*free_i)( 135 | cf_port_addressable_t *port, 136 | size_t offset, 137 | cf_request_handle_t *request); 138 | void *(*map)( 139 | cf_port_addressable_t *port, 140 | size_t offset, 141 | size_t size, 142 | int flags); 143 | }; 144 | 145 | 146 | /* 147 | * Structure used to describe scatter/gather lists and 148 | * multidimensional arrays for cf_send_iov and cf_receive_iov family of 149 | * functions. 150 | * 151 | * Example scatter/gather list: 152 | * cf_iovec_t iov[3] = { 153 | * { &buf1, 0, 1, sizeof buf1 }, 154 | * { &buf2, 0, 1, sizeof buf2 }, 155 | * { &buf3, 0, 1, sizeof buf3 } 156 | * }; 157 | * 158 | * When is null and is not the first entry in the array of 159 | * cf_iovec_t structures then the entry in a continuation of the 160 | * preceding entries that together describes all or a part of a 161 | * multidimensional array. The array must be layed out as a 162 | * consecutive byte array. 163 | * 164 | * Example partial two dimensional array: 165 | * #define real_row_size real_num_cols*real_col_size 166 | * cf_iovec_t iov[2] = { 167 | * { &buf, real_row_size*part_start_row, real_row_size, part_num_rows }, 168 | * { NULL, real_col_size*part_start_col, 1, part_num_cols*real_col_size } 169 | * }; 170 | */ 171 | struct cf_iovec_struct { 172 | void *buf; 173 | size_t offset; 174 | size_t stride; 175 | size_t elements; 176 | }; 177 | 178 | 179 | /* 180 | * Get status code. 181 | */ 182 | extern cf_status_t cf_get_status(void); 183 | 184 | 185 | /* 186 | * Set status code. 187 | */ 188 | extern void cf_set_status(cf_status_t status); 189 | 190 | 191 | /* 192 | * Asynchronous port open. 193 | * 194 | * May return before open is complete. Caller must call cf_wait() on 195 | * before using port. 196 | */ 197 | #define cf_open_i(PORT, REQUEST) \ 198 | ((PORT)->base.open_i(&(PORT)->base, (REQUEST))) 199 | 200 | 201 | /* 202 | * Synchronous port open. 203 | * 204 | * Returns when open is complete. 205 | */ 206 | #define cf_open(PORT) \ 207 | (cf_open_internal(&(PORT)->base)) 208 | extern int cf_open_internal( 209 | cf_port_base_t *port); 210 | 211 | 212 | /* 213 | * Asynchronous port close. 214 | * 215 | * May return before close is complete. Caller must call cf_wait() on 216 | * . 217 | */ 218 | #define cf_close_i(PORT, REQUEST) \ 219 | ((PORT)->base.close_i(&(PORT)->base, (REQUEST))) 220 | 221 | 222 | /* 223 | * Synchronous port close. 224 | * 225 | * Returns when close is complete. 226 | */ 227 | #define cf_close(PORT) \ 228 | (cf_close_internal(&(PORT)->base)) 229 | extern int cf_close_internal( 230 | cf_port_base_t *port); 231 | 232 | 233 | /* 234 | * Asynchronous send buffer reference to stream. 235 | * 236 | * This function transfers the ownership of the buffer to the receiver 237 | * from the point in time of calling cf_send_ref_i() until the point 238 | * in time when the corresponding wait() call returns. 239 | * 240 | * May return before send is complete. Caller must call cf_wait() on 241 | * before reusing the buffer. 242 | */ 243 | extern int cf_send_ref_i( 244 | cf_port_send_t *port, 245 | void *buf, 246 | size_t len, 247 | cf_request_handle_t *request); 248 | 249 | 250 | /* 251 | * Synchronous send buffer reference to stream. 252 | * 253 | * This function transfers the ownership of the buffer to the receiver 254 | * for the duration of the cf_send_ref() call. 255 | * 256 | * Returns when send is complete and buffer can be reused. 257 | */ 258 | extern int cf_send_ref( 259 | cf_port_send_t *port, 260 | void *buf, 261 | size_t len); 262 | 263 | 264 | /* 265 | * Asynchronous send buffer to stream. 266 | * 267 | * May return before send is complete. Caller must call cf_wait() on 268 | * before reusing the buffer. 269 | */ 270 | extern int cf_send_i( 271 | cf_port_send_t *port, 272 | void *buf, 273 | size_t len, 274 | cf_request_handle_t *request); 275 | 276 | 277 | /* 278 | * Synchronous send buffer to stream. 279 | * 280 | * Returns when send is complete and buffer can be reused. 281 | */ 282 | extern int cf_send( 283 | cf_port_send_t *port, 284 | void *buf, 285 | size_t len); 286 | 287 | 288 | /* 289 | * Asynchronous send of two dimentional array to stream. The buffer 290 | * must be a consecutive array of bytes, i.e. not an array of pointers 291 | * to rows. 292 | * 293 | * May return before send is complete. Caller must call cf_wait() on 294 | * before reusing the buffer. 295 | */ 296 | extern int cf_send_2d_i( 297 | cf_port_send_t *port, 298 | void *buf, 299 | size_t len, 300 | size_t stride, 301 | size_t count, 302 | cf_request_handle_t *request); 303 | 304 | 305 | /* 306 | * Synchronous send of two dimentional array to stream. The buffer 307 | * must be a consecutive array of bytes, i.e. not an array of pointers 308 | * to rows. 309 | * 310 | * Returns when send is complete and buffer can be reused. 311 | */ 312 | extern int cf_send_2d( 313 | cf_port_send_t *port, 314 | void *buf, 315 | size_t len, 316 | size_t stride, 317 | size_t count); 318 | 319 | 320 | /* 321 | * Asynchronous scatter/gather send to stream. See definition of 322 | * cf_iovec_t for details about the buffer. 323 | * 324 | * May return before send is complete. Caller must call cf_wait() on 325 | * before reusing the buffer. 326 | */ 327 | extern int cf_send_iov_i( 328 | cf_port_send_t *port, 329 | cf_iovec_t *iov, 330 | unsigned int iovcnt, 331 | cf_request_handle_t *request); 332 | 333 | 334 | /* 335 | * Synchronous scatter/gather send to stream. See definition of 336 | * cf_iovec_t for details about the buffer. 337 | * 338 | * Returns when send is complete and buffer can be reused. 339 | */ 340 | extern int cf_send_iov( 341 | cf_port_send_t *port, 342 | cf_iovec_t *iov, 343 | unsigned int iovcnt); 344 | 345 | 346 | /* 347 | * Asynchronous receive data from stream. 348 | * 349 | * May return before data has been received. Caller must call 350 | * cf_wait() on before using buf or len. 351 | */ 352 | extern int cf_receive_i( 353 | cf_port_receive_t *port, 354 | void *buf, 355 | size_t len, 356 | size_t *bytes_received, 357 | cf_request_handle_t *request); 358 | 359 | 360 | /* 361 | * Synchronous receive data from stream. 362 | * 363 | * Returns when data has been received and buf is populated with data. 364 | */ 365 | extern int cf_receive( 366 | cf_port_receive_t *port, 367 | void *buf, 368 | size_t len, 369 | size_t *bytes_received); 370 | 371 | 372 | /* 373 | * Asynchronous receive reference to data from stream. 374 | * 375 | * May return before data has been received. Caller must call 376 | * cf_wait() on before using buf or len. 377 | * 378 | * Once reference to data has been received the caller must call 379 | * cf_release_ref() to indicate that the referenced buffer is no 380 | * longer used. 381 | */ 382 | extern int cf_receive_ref_i( 383 | cf_port_receive_t *port, 384 | void **buf, 385 | size_t *len, 386 | cf_request_handle_t *request); 387 | 388 | 389 | /* 390 | * Synchronous receive reference to data from stream. 391 | * 392 | * Returns when data has been received and *buf and *len are set. 393 | * 394 | * Once reference to data has been received the caller must call 395 | * cf_release_ref() to indicate that the referenced buffer is no 396 | * longer used. 397 | */ 398 | extern int cf_receive_ref( 399 | cf_port_receive_t *port, 400 | void **buf, 401 | size_t *len, 402 | cf_request_handle_t *request); 403 | 404 | 405 | /* 406 | * Asynchronous receive of two dimentional array from stream. The 407 | * buffer must be a consecutive array of bytes, i.e. not an array of 408 | * pointers to rows. 409 | * 410 | * May return before data has been received. Caller must call 411 | * cf_wait() on before using buf or len. 412 | */ 413 | extern int cf_receive_2d_i( 414 | cf_port_receive_t *port, 415 | void *buf, 416 | size_t len, 417 | size_t stride, 418 | size_t count, 419 | size_t *bytes_received, 420 | cf_request_handle_t *request); 421 | 422 | 423 | /* 424 | * Synchronous receive of two dimentional array from stream. The 425 | * buffer must be a consecutive array of bytes, i.e. not an array of 426 | * pointers to rows. 427 | * 428 | * Returns when data has been received and buf is populated with data. 429 | */ 430 | extern int cf_receive_2d( 431 | cf_port_receive_t *port, 432 | void *buf, 433 | size_t len, 434 | size_t stride, 435 | size_t count, 436 | size_t *bytes_received); 437 | 438 | 439 | /* 440 | * Asynchronous scatter/gather receive from stream. See definition of 441 | * cf_iovec_t for details about the buffer. 442 | * 443 | * May return before data has been received. Caller must call 444 | * cf_wait() on before using buf or len. 445 | */ 446 | extern int cf_receive_iov_i( 447 | cf_port_receive_t *port, 448 | cf_iovec_t *iov, 449 | unsigned int iovcnt, 450 | size_t *bytes_received, 451 | cf_request_handle_t *request); 452 | 453 | 454 | /* 455 | * Synchronous receive of two dimentional array from stream. See 456 | * definition of cf_iovec_t for details about the buffer. 457 | * 458 | * Returns when data has been received and buf is populated with data. 459 | */ 460 | extern int cf_receive_iov( 461 | cf_port_receive_t *port, 462 | cf_iovec_t *iov, 463 | unsigned int iovcnt, 464 | size_t *bytes_received); 465 | 466 | 467 | /* 468 | * Asynchronous allocate memory from addressable port. 469 | * 470 | * Allocates number of bytes from addressable memory space 471 | * associated with or returns error. Offset to allocated space 472 | * is stored in *offset at the completion of a successful allocation. 473 | * 474 | * May return before allocaiton is complete. Caller must call 475 | * cf_wait() on before using content of *offset. 476 | */ 477 | extern int cf_addressable_alloc_i( 478 | cf_port_addressable_t *port, 479 | size_t size, 480 | size_t *offset, 481 | cf_request_handle_t *request); 482 | 483 | 484 | /* 485 | * Synchronous allocate memory from addressable port. 486 | * 487 | * Allocates number of bytes from addressable memory space 488 | * associated with or returns error. Offset to allocated space 489 | * is stored in *offset at the completion of a successful allocation. 490 | * 491 | * Returns on error or when allocation is complete and *offset is 492 | * populated with the offset to the start of the allocated memory. 493 | */ 494 | extern int cf_addressable_alloc( 495 | cf_port_addressable_t *port, 496 | size_t size, 497 | size_t *offset); 498 | 499 | 500 | /* 501 | * Asynchronous free memory allocated from addressable port. 502 | * 503 | * Free memory block starting at from addressable memory 504 | * space associated with or returns error. The memory block 505 | * starting at must have been allocated using 506 | * cf_addressable_alloc() or related functions. 507 | * 508 | * May return before free is complete. Caller must call 509 | * cf_wait() on to make sure free is complete. 510 | */ 511 | extern int cf_addressable_free_i( 512 | cf_port_addressable_t *port, 513 | size_t offset, 514 | cf_request_handle_t *request); 515 | 516 | 517 | /* 518 | * Synchronous free memory from addressable port. 519 | * 520 | * Free memory block starting at from addressable memory 521 | * space associated with or returns error. The memory block 522 | * starting at must have been allocated using 523 | * cf_addressable_alloc() or related functions. 524 | * 525 | * Returns on error or when free is complete. 526 | */ 527 | extern int cf_addressable_free( 528 | cf_port_addressable_t *port, 529 | size_t offset); 530 | 531 | 532 | /* 533 | * Asynchronous get from addressable port. 534 | * 535 | * Initiates copy of number of bytes from at 536 | * into or returns error. 537 | * 538 | * May return before copy is complete. Caller must call cf_wait() on 539 | * before using content of buf. 540 | */ 541 | extern int cf_getbuf_i( 542 | cf_port_addressable_t *port, 543 | size_t offset, 544 | void *buf, 545 | size_t len, 546 | cf_request_handle_t *request); 547 | 548 | 549 | /* 550 | * Synchronous get from addressable port. 551 | * 552 | * Returns on error or when number of bytes from at 553 | * has been copied into . 554 | */ 555 | extern int cf_getbuf( 556 | cf_port_addressable_t *port, 557 | size_t offset, 558 | void *buf, 559 | size_t len); 560 | 561 | 562 | /* 563 | * Asynchronous set to addressable port. 564 | * 565 | * Initiates copy of number of bytes from to at 566 | * or returns error. 567 | * 568 | * May return before copy is complete. Caller must call cf_wait() on 569 | * before modifying content of buf. 570 | */ 571 | extern int cf_setbuf_i( 572 | cf_port_addressable_t *port, 573 | size_t offset, 574 | void *buf, 575 | size_t len, 576 | cf_request_handle_t *request); 577 | 578 | 579 | /* 580 | * Synchronous set to addressable port. 581 | * 582 | * Returns on error or when number of bytes from has been 583 | * copied to at . 584 | */ 585 | extern int cf_setbuf( 586 | cf_port_addressable_t *port, 587 | size_t offset, 588 | void *buf, 589 | size_t len); 590 | 591 | /* 592 | * Synchronous map memory from addressable port into local address space. 593 | * 594 | * Maps memory number of bytes starting at in 595 | * addressable memory space associated with into local memory. 596 | * is set to one or more of CF_MAP_* to control how memory is 597 | * mapped. 598 | * 599 | * Returns the local address to the mapped memory, or ((void)-1) to 600 | * indicate error. 601 | */ 602 | extern void *cf_addressable_map( 603 | cf_port_addressable_t *port, 604 | size_t offset, 605 | size_t size, 606 | int flags); 607 | 608 | 609 | /* 610 | * Unmap memory mapped using cf_addressable_map() 611 | */ 612 | int cf_addressable_unmap( 613 | void *ptr, 614 | size_t size); 615 | 616 | 617 | /* 618 | * Release reference to buffer assoctated with request. 619 | * 620 | * This will either complete the send request or release the buffer 621 | * back to the system. 622 | */ 623 | extern void cf_release_ref( 624 | cf_request_handle_t *request); 625 | 626 | /* 627 | * Test if request has completed 628 | * 629 | * Returns true if request completed, otherwise returns false. 630 | * 631 | * Same semantic as cf_wait() expect that it does not block. 632 | */ 633 | extern int cf_test( 634 | cf_request_handle_t *request); 635 | 636 | /* 637 | * Wait for request to complete 638 | */ 639 | extern int cf_wait( 640 | cf_request_handle_t request); 641 | 642 | /* 643 | * Wait for any request in list to complete 644 | * 645 | * Returns the index in list for the request that completed. 646 | */ 647 | extern unsigned int cf_wait_any( 648 | cf_request_handle_t *request_list, 649 | unsigned int request_count); 650 | 651 | /* 652 | * Wait for all requests in list to complete 653 | * 654 | * Returns the number of requests that completed without error. 655 | */ 656 | extern unsigned int cf_wait_all( 657 | cf_request_handle_t *request_list, 658 | unsigned int request_count); 659 | 660 | #ifdef __cplusplus 661 | } 662 | #endif 663 | 664 | #endif /* D_cf_lib */ 665 | -------------------------------------------------------------------------------- /src/include/sds_incl.h: -------------------------------------------------------------------------------- 1 | #ifndef D_apf_incl 2 | #define D_apf_incl 3 | /* To simplify the includes for c-callable IP libraries: 4 | * this include file contains all the definitions from cf_lib.h and 5 | * some selected defintions from sds_lib.h 6 | */ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* definitions from cf_lib.h */ 11 | typedef int cf_status_t; 12 | typedef struct cf_context_struct cf_context_t; 13 | typedef struct cf_request_handle_struct *cf_request_handle_t; 14 | typedef struct cf_request_info_struct cf_request_info_t; 15 | typedef struct cf_port_base_struct cf_port_base_t; 16 | typedef struct cf_port_receive_struct cf_port_receive_t; 17 | typedef struct cf_port_send_struct cf_port_send_t; 18 | typedef struct cf_port_addressable_struct cf_port_addressable_t; 19 | typedef struct cf_alloc_attr_struct cf_alloc_attr_t; 20 | typedef struct cf_iovec_struct cf_iovec_t; 21 | 22 | 23 | /* 24 | * Error codes 25 | */ 26 | enum { 27 | CF_STATUS_OKAY, 28 | CF_STATUS_PENDING, 29 | CF_STATUS_EMPTY, 30 | CF_STATUS_FULL, 31 | CF_STATUS_INVALID, 32 | CF_STATUS_NOMEM, 33 | CF_STATUS_ALREADY_OPEN, 34 | CF_STATUS_NOT_OPEN, 35 | CF_STATUS_PORT_BUSY, 36 | CF_STATUS_EOS 37 | }; 38 | 39 | 40 | /* 41 | * cf_addressable_map flags 42 | */ 43 | enum { 44 | CF_MAP_READ = 1, /* Enable read access */ 45 | CF_MAP_WRITE = 2, /* Enable write access */ 46 | CF_MAP_ATOMIC = 4 /* Enable atomic operations */ 47 | }; 48 | 49 | 50 | /* 51 | * Common port object 52 | */ 53 | struct cf_port_base_struct { 54 | void *channel_info; 55 | int (*open_i)( 56 | cf_port_base_t *port, 57 | cf_request_handle_t *request); 58 | int (*close_i)( 59 | cf_port_base_t *port, 60 | cf_request_handle_t *request); 61 | }; 62 | 63 | 64 | /* 65 | * Stream receiver port 66 | */ 67 | struct cf_port_receive_struct { 68 | cf_port_base_t base; 69 | int (*receive_ref_i)( 70 | cf_port_receive_t *port, 71 | void **buf, 72 | size_t *len, 73 | cf_request_handle_t *request); 74 | int (*receive_i)( 75 | cf_port_receive_t *port, 76 | void *buf, 77 | size_t len, 78 | size_t *bytes_received, 79 | cf_request_handle_t *request); 80 | int (*receive_iov_i)( 81 | cf_port_receive_t *port, 82 | cf_iovec_t *iov, 83 | unsigned int iovcnt, 84 | size_t *bytes_received, 85 | cf_request_handle_t *request); 86 | }; 87 | 88 | 89 | /* 90 | * Stream sender port 91 | */ 92 | struct cf_port_send_struct { 93 | cf_port_base_t base; 94 | int (*send_ref_i)( 95 | cf_port_send_t *port, 96 | void *buf, 97 | size_t len, 98 | cf_request_handle_t *request); 99 | int (*send_i)( 100 | cf_port_send_t *port, 101 | void *buf, 102 | size_t len, 103 | cf_request_handle_t *request); 104 | int (*send_iov_i)( 105 | cf_port_send_t *port, 106 | cf_iovec_t *iov, 107 | unsigned int iovcnt, 108 | cf_request_handle_t *request); 109 | }; 110 | 111 | 112 | /* 113 | * Addressable master port 114 | */ 115 | struct cf_port_addressable_struct { 116 | cf_port_base_t base; 117 | int (*getbuf_i)( 118 | cf_port_addressable_t *port, 119 | size_t offset, 120 | void *buf, 121 | size_t len, 122 | cf_request_handle_t *request); 123 | int (*setbuf_i)( 124 | cf_port_addressable_t *port, 125 | size_t offset, 126 | void *buf, 127 | size_t len, 128 | cf_request_handle_t *request); 129 | int (*alloc_i)( 130 | cf_port_addressable_t *port, 131 | size_t size, 132 | size_t *offset, 133 | cf_request_handle_t *request); 134 | int (*free_i)( 135 | cf_port_addressable_t *port, 136 | size_t offset, 137 | cf_request_handle_t *request); 138 | void *(*map)( 139 | cf_port_addressable_t *port, 140 | size_t offset, 141 | size_t size, 142 | int flags); 143 | }; 144 | 145 | 146 | /* 147 | * Structure used to describe scatter/gather lists and 148 | * multidimensional arrays for cf_send_iov and cf_receive_iov family of 149 | * functions. 150 | * 151 | * Example scatter/gather list: 152 | * cf_iovec_t iov[3] = { 153 | * { &buf1, 0, 1, sizeof buf1 }, 154 | * { &buf2, 0, 1, sizeof buf2 }, 155 | * { &buf3, 0, 1, sizeof buf3 } 156 | * }; 157 | * 158 | * When is null and is not the first entry in the array of 159 | * cf_iovec_t structures then the entry in a continuation of the 160 | * preceding entries that together describes all or a part of a 161 | * multidimensional array. The array must be layed out as a 162 | * consecutive byte array. 163 | * 164 | * Example partial two dimensional array: 165 | * #define real_row_size real_num_cols*real_col_size 166 | * cf_iovec_t iov[2] = { 167 | * { &buf, real_row_size*part_start_row, real_row_size, part_num_rows }, 168 | * { NULL, real_col_size*part_start_col, 1, part_num_cols*real_col_size } 169 | * }; 170 | */ 171 | struct cf_iovec_struct { 172 | void *buf; 173 | size_t offset; 174 | size_t stride; 175 | size_t elements; 176 | }; 177 | 178 | 179 | /* 180 | * Get status code. 181 | */ 182 | extern cf_status_t cf_get_status(void); 183 | 184 | 185 | /* 186 | * Set status code. 187 | */ 188 | extern void cf_set_status(cf_status_t status); 189 | 190 | 191 | /* 192 | * Asynchronous port open. 193 | * 194 | * May return before open is complete. Caller must call cf_wait() on 195 | * before using port. 196 | */ 197 | #define cf_open_i(PORT, REQUEST) \ 198 | ((PORT)->base.open_i(&(PORT)->base, (REQUEST))) 199 | 200 | 201 | /* 202 | * Synchronous port open. 203 | * 204 | * Returns when open is complete. 205 | */ 206 | #define cf_open(PORT) \ 207 | (cf_open_internal(&(PORT)->base)) 208 | extern int cf_open_internal( 209 | cf_port_base_t *port); 210 | 211 | 212 | /* 213 | * Asynchronous port close. 214 | * 215 | * May return before close is complete. Caller must call cf_wait() on 216 | * . 217 | */ 218 | #define cf_close_i(PORT, REQUEST) \ 219 | ((PORT)->base.close_i(&(PORT)->base, (REQUEST))) 220 | 221 | 222 | /* 223 | * Synchronous port close. 224 | * 225 | * Returns when close is complete. 226 | */ 227 | #define cf_close(PORT) \ 228 | (cf_close_internal(&(PORT)->base)) 229 | extern int cf_close_internal( 230 | cf_port_base_t *port); 231 | 232 | 233 | /* 234 | * Asynchronous send buffer reference to stream. 235 | * 236 | * This function transfers the ownership of the buffer to the receiver 237 | * from the point in time of calling cf_send_ref_i() until the point 238 | * in time when the corresponding wait() call returns. 239 | * 240 | * May return before send is complete. Caller must call cf_wait() on 241 | * before reusing the buffer. 242 | */ 243 | extern int cf_send_ref_i( 244 | cf_port_send_t *port, 245 | void *buf, 246 | size_t len, 247 | cf_request_handle_t *request); 248 | 249 | 250 | /* 251 | * Synchronous send buffer reference to stream. 252 | * 253 | * This function transfers the ownership of the buffer to the receiver 254 | * for the duration of the cf_send_ref() call. 255 | * 256 | * Returns when send is complete and buffer can be reused. 257 | */ 258 | extern int cf_send_ref( 259 | cf_port_send_t *port, 260 | void *buf, 261 | size_t len); 262 | 263 | 264 | /* 265 | * Asynchronous send buffer to stream. 266 | * 267 | * May return before send is complete. Caller must call cf_wait() on 268 | * before reusing the buffer. 269 | */ 270 | extern int cf_send_i( 271 | cf_port_send_t *port, 272 | void *buf, 273 | size_t len, 274 | cf_request_handle_t *request); 275 | 276 | 277 | /* 278 | * Synchronous send buffer to stream. 279 | * 280 | * Returns when send is complete and buffer can be reused. 281 | */ 282 | extern int cf_send( 283 | cf_port_send_t *port, 284 | void *buf, 285 | size_t len); 286 | 287 | 288 | /* 289 | * Asynchronous send of two dimentional array to stream. The buffer 290 | * must be a consecutive array of bytes, i.e. not an array of pointers 291 | * to rows. 292 | * 293 | * May return before send is complete. Caller must call cf_wait() on 294 | * before reusing the buffer. 295 | */ 296 | extern int cf_send_2d_i( 297 | cf_port_send_t *port, 298 | void *buf, 299 | size_t len, 300 | size_t stride, 301 | size_t count, 302 | cf_request_handle_t *request); 303 | 304 | 305 | /* 306 | * Synchronous send of two dimentional array to stream. The buffer 307 | * must be a consecutive array of bytes, i.e. not an array of pointers 308 | * to rows. 309 | * 310 | * Returns when send is complete and buffer can be reused. 311 | */ 312 | extern int cf_send_2d( 313 | cf_port_send_t *port, 314 | void *buf, 315 | size_t len, 316 | size_t stride, 317 | size_t count); 318 | 319 | 320 | /* 321 | * Asynchronous scatter/gather send to stream. See definition of 322 | * cf_iovec_t for details about the buffer. 323 | * 324 | * May return before send is complete. Caller must call cf_wait() on 325 | * before reusing the buffer. 326 | */ 327 | extern int cf_send_iov_i( 328 | cf_port_send_t *port, 329 | cf_iovec_t *iov, 330 | unsigned int iovcnt, 331 | cf_request_handle_t *request); 332 | 333 | 334 | /* 335 | * Synchronous scatter/gather send to stream. See definition of 336 | * cf_iovec_t for details about the buffer. 337 | * 338 | * Returns when send is complete and buffer can be reused. 339 | */ 340 | extern int cf_send_iov( 341 | cf_port_send_t *port, 342 | cf_iovec_t *iov, 343 | unsigned int iovcnt); 344 | 345 | 346 | /* 347 | * Asynchronous receive data from stream. 348 | * 349 | * May return before data has been received. Caller must call 350 | * cf_wait() on before using buf or len. 351 | */ 352 | extern int cf_receive_i( 353 | cf_port_receive_t *port, 354 | void *buf, 355 | size_t len, 356 | size_t *bytes_received, 357 | cf_request_handle_t *request); 358 | 359 | 360 | /* 361 | * Synchronous receive data from stream. 362 | * 363 | * Returns when data has been received and buf is populated with data. 364 | */ 365 | extern int cf_receive( 366 | cf_port_receive_t *port, 367 | void *buf, 368 | size_t len, 369 | size_t *bytes_received); 370 | 371 | 372 | /* 373 | * Asynchronous receive reference to data from stream. 374 | * 375 | * May return before data has been received. Caller must call 376 | * cf_wait() on before using buf or len. 377 | * 378 | * Once reference to data has been received the caller must call 379 | * cf_release_ref() to indicate that the referenced buffer is no 380 | * longer used. 381 | */ 382 | extern int cf_receive_ref_i( 383 | cf_port_receive_t *port, 384 | void **buf, 385 | size_t *len, 386 | cf_request_handle_t *request); 387 | 388 | 389 | /* 390 | * Synchronous receive reference to data from stream. 391 | * 392 | * Returns when data has been received and *buf and *len are set. 393 | * 394 | * Once reference to data has been received the caller must call 395 | * cf_release_ref() to indicate that the referenced buffer is no 396 | * longer used. 397 | */ 398 | extern int cf_receive_ref( 399 | cf_port_receive_t *port, 400 | void **buf, 401 | size_t *len, 402 | cf_request_handle_t *request); 403 | 404 | 405 | /* 406 | * Asynchronous receive of two dimentional array from stream. The 407 | * buffer must be a consecutive array of bytes, i.e. not an array of 408 | * pointers to rows. 409 | * 410 | * May return before data has been received. Caller must call 411 | * cf_wait() on before using buf or len. 412 | */ 413 | extern int cf_receive_2d_i( 414 | cf_port_receive_t *port, 415 | void *buf, 416 | size_t len, 417 | size_t stride, 418 | size_t count, 419 | size_t *bytes_received, 420 | cf_request_handle_t *request); 421 | 422 | 423 | /* 424 | * Synchronous receive of two dimentional array from stream. The 425 | * buffer must be a consecutive array of bytes, i.e. not an array of 426 | * pointers to rows. 427 | * 428 | * Returns when data has been received and buf is populated with data. 429 | */ 430 | extern int cf_receive_2d( 431 | cf_port_receive_t *port, 432 | void *buf, 433 | size_t len, 434 | size_t stride, 435 | size_t count, 436 | size_t *bytes_received); 437 | 438 | 439 | /* 440 | * Asynchronous scatter/gather receive from stream. See definition of 441 | * cf_iovec_t for details about the buffer. 442 | * 443 | * May return before data has been received. Caller must call 444 | * cf_wait() on before using buf or len. 445 | */ 446 | extern int cf_receive_iov_i( 447 | cf_port_receive_t *port, 448 | cf_iovec_t *iov, 449 | unsigned int iovcnt, 450 | size_t *bytes_received, 451 | cf_request_handle_t *request); 452 | 453 | 454 | /* 455 | * Synchronous receive of two dimentional array from stream. See 456 | * definition of cf_iovec_t for details about the buffer. 457 | * 458 | * Returns when data has been received and buf is populated with data. 459 | */ 460 | extern int cf_receive_iov( 461 | cf_port_receive_t *port, 462 | cf_iovec_t *iov, 463 | unsigned int iovcnt, 464 | size_t *bytes_received); 465 | 466 | 467 | /* 468 | * Asynchronous allocate memory from addressable port. 469 | * 470 | * Allocates number of bytes from addressable memory space 471 | * associated with or returns error. Offset to allocated space 472 | * is stored in *offset at the completion of a successful allocation. 473 | * 474 | * May return before allocaiton is complete. Caller must call 475 | * cf_wait() on before using content of *offset. 476 | */ 477 | extern int cf_addressable_alloc_i( 478 | cf_port_addressable_t *port, 479 | size_t size, 480 | size_t *offset, 481 | cf_request_handle_t *request); 482 | 483 | 484 | /* 485 | * Synchronous allocate memory from addressable port. 486 | * 487 | * Allocates number of bytes from addressable memory space 488 | * associated with or returns error. Offset to allocated space 489 | * is stored in *offset at the completion of a successful allocation. 490 | * 491 | * Returns on error or when allocation is complete and *offset is 492 | * populated with the offset to the start of the allocated memory. 493 | */ 494 | extern int cf_addressable_alloc( 495 | cf_port_addressable_t *port, 496 | size_t size, 497 | size_t *offset); 498 | 499 | 500 | /* 501 | * Asynchronous free memory allocated from addressable port. 502 | * 503 | * Free memory block starting at from addressable memory 504 | * space associated with or returns error. The memory block 505 | * starting at must have been allocated using 506 | * cf_addressable_alloc() or related functions. 507 | * 508 | * May return before free is complete. Caller must call 509 | * cf_wait() on to make sure free is complete. 510 | */ 511 | extern int cf_addressable_free_i( 512 | cf_port_addressable_t *port, 513 | size_t offset, 514 | cf_request_handle_t *request); 515 | 516 | 517 | /* 518 | * Synchronous free memory from addressable port. 519 | * 520 | * Free memory block starting at from addressable memory 521 | * space associated with or returns error. The memory block 522 | * starting at must have been allocated using 523 | * cf_addressable_alloc() or related functions. 524 | * 525 | * Returns on error or when free is complete. 526 | */ 527 | extern int cf_addressable_free( 528 | cf_port_addressable_t *port, 529 | size_t offset); 530 | 531 | 532 | /* 533 | * Asynchronous get from addressable port. 534 | * 535 | * Initiates copy of number of bytes from at 536 | * into or returns error. 537 | * 538 | * May return before copy is complete. Caller must call cf_wait() on 539 | * before using content of buf. 540 | */ 541 | extern int cf_getbuf_i( 542 | cf_port_addressable_t *port, 543 | size_t offset, 544 | void *buf, 545 | size_t len, 546 | cf_request_handle_t *request); 547 | 548 | 549 | /* 550 | * Synchronous get from addressable port. 551 | * 552 | * Returns on error or when number of bytes from at 553 | * has been copied into . 554 | */ 555 | extern int cf_getbuf( 556 | cf_port_addressable_t *port, 557 | size_t offset, 558 | void *buf, 559 | size_t len); 560 | 561 | 562 | /* 563 | * Asynchronous set to addressable port. 564 | * 565 | * Initiates copy of number of bytes from to at 566 | * or returns error. 567 | * 568 | * May return before copy is complete. Caller must call cf_wait() on 569 | * before modifying content of buf. 570 | */ 571 | extern int cf_setbuf_i( 572 | cf_port_addressable_t *port, 573 | size_t offset, 574 | void *buf, 575 | size_t len, 576 | cf_request_handle_t *request); 577 | 578 | 579 | /* 580 | * Synchronous set to addressable port. 581 | * 582 | * Returns on error or when number of bytes from has been 583 | * copied to at . 584 | */ 585 | extern int cf_setbuf( 586 | cf_port_addressable_t *port, 587 | size_t offset, 588 | void *buf, 589 | size_t len); 590 | 591 | /* 592 | * Synchronous map memory from addressable port into local address space. 593 | * 594 | * Maps memory number of bytes starting at in 595 | * addressable memory space associated with into local memory. 596 | * is set to one or more of CF_MAP_* to control how memory is 597 | * mapped. 598 | * 599 | * Returns the local address to the mapped memory, or ((void)-1) to 600 | * indicate error. 601 | */ 602 | extern void *cf_addressable_map( 603 | cf_port_addressable_t *port, 604 | size_t offset, 605 | size_t size, 606 | int flags); 607 | 608 | 609 | /* 610 | * Unmap memory mapped using cf_addressable_map() 611 | */ 612 | int cf_addressable_unmap( 613 | void *ptr, 614 | size_t size); 615 | 616 | 617 | /* 618 | * Release reference to buffer assoctated with request. 619 | * 620 | * This will either complete the send request or release the buffer 621 | * back to the system. 622 | */ 623 | extern void cf_release_ref( 624 | cf_request_handle_t *request); 625 | 626 | /* 627 | * Test if request has completed 628 | * 629 | * Returns true if request completed, otherwise returns false. 630 | * 631 | * Same semantic as cf_wait() expect that it does not block. 632 | */ 633 | extern int cf_test( 634 | cf_request_handle_t *request); 635 | 636 | /* 637 | * Wait for request to complete 638 | */ 639 | extern int cf_wait( 640 | cf_request_handle_t request); 641 | 642 | /* 643 | * Wait for any request in list to complete 644 | * 645 | * Returns the index in list for the request that completed. 646 | */ 647 | extern unsigned int cf_wait_any( 648 | cf_request_handle_t *request_list, 649 | unsigned int request_count); 650 | 651 | /* 652 | * Wait for all requests in list to complete 653 | * 654 | * Returns the number of requests that completed without error. 655 | */ 656 | extern unsigned int cf_wait_all( 657 | cf_request_handle_t *request_list, 658 | unsigned int request_count); 659 | 660 | /* trace function */ 661 | extern void sds_trace(unsigned ID, unsigned type); 662 | 663 | /* additional definitions from sds_lib.h */ 664 | extern unsigned long long sds_clock_counter(void); 665 | extern void sds_insert_req( unsigned int id, void *req, int num); 666 | #ifdef __cplusplus 667 | } 668 | #endif 669 | 670 | #endif /* D_apf_incl */ 671 | 672 | 673 | -------------------------------------------------------------------------------- /notebooks/pynqmmult.tcl: -------------------------------------------------------------------------------- 1 | 2 | ################################################################ 3 | # This is a generated script based on design: design_1 4 | # 5 | # Though there are limitations about the generated script, 6 | # the main purpose of this utility is to make learning 7 | # IP Integrator Tcl commands easier. 8 | ################################################################ 9 | 10 | ################################################################ 11 | # Check if script is running in correct Vivado version. 12 | ################################################################ 13 | set scripts_vivado_version 2015.4 14 | set current_vivado_version [version -short] 15 | 16 | if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { 17 | puts "" 18 | puts "ERROR: This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Please run the script in Vivado <$scripts_vivado_version> then open the design in Vivado <$current_vivado_version>. Upgrade the design by running \"Tools => Report => Report IP Status...\", then run write_bd_tcl to create an updated script." 19 | 20 | return 1 21 | } 22 | 23 | ################################################################ 24 | # START 25 | ################################################################ 26 | 27 | # To test this script, run the following commands from Vivado Tcl console: 28 | # source design_1_script.tcl 29 | 30 | # If you do not already have a project created, 31 | # you can create a project using the following command: 32 | # create_project project_1 myproj -part xc7z020clg400-1 33 | 34 | # CHECKING IF PROJECT EXISTS 35 | if { [get_projects -quiet] eq "" } { 36 | puts "ERROR: Please open or create a project!" 37 | return 1 38 | } 39 | 40 | 41 | 42 | # CHANGE DESIGN NAME HERE 43 | set design_name design_1 44 | 45 | # If you do not already have an existing IP Integrator design open, 46 | # you can create a design using the following command: 47 | # create_bd_design $design_name 48 | 49 | # Creating design if needed 50 | set errMsg "" 51 | set nRet 0 52 | 53 | set cur_design [current_bd_design -quiet] 54 | set list_cells [get_bd_cells -quiet] 55 | 56 | if { ${design_name} eq "" } { 57 | # USE CASES: 58 | # 1) Design_name not set 59 | 60 | set errMsg "ERROR: Please set the variable to a non-empty value." 61 | set nRet 1 62 | 63 | } elseif { ${cur_design} ne "" && ${list_cells} eq "" } { 64 | # USE CASES: 65 | # 2): Current design opened AND is empty AND names same. 66 | # 3): Current design opened AND is empty AND names diff; design_name NOT in project. 67 | # 4): Current design opened AND is empty AND names diff; design_name exists in project. 68 | 69 | if { $cur_design ne $design_name } { 70 | puts "INFO: Changing value of from <$design_name> to <$cur_design> since current design is empty." 71 | set design_name [get_property NAME $cur_design] 72 | } 73 | puts "INFO: Constructing design in IPI design <$cur_design>..." 74 | 75 | } elseif { ${cur_design} ne "" && $list_cells ne "" && $cur_design eq $design_name } { 76 | # USE CASES: 77 | # 5) Current design opened AND has components AND same names. 78 | 79 | set errMsg "ERROR: Design <$design_name> already exists in your project, please set the variable to another value." 80 | set nRet 1 81 | } elseif { [get_files -quiet ${design_name}.bd] ne "" } { 82 | # USE CASES: 83 | # 6) Current opened design, has components, but diff names, design_name exists in project. 84 | # 7) No opened design, design_name exists in project. 85 | 86 | set errMsg "ERROR: Design <$design_name> already exists in your project, please set the variable to another value." 87 | set nRet 2 88 | 89 | } else { 90 | # USE CASES: 91 | # 8) No opened design, design_name not in project. 92 | # 9) Current opened design, has components, but diff names, design_name not in project. 93 | 94 | puts "INFO: Currently there is no design <$design_name> in project, so creating one..." 95 | 96 | create_bd_design $design_name 97 | 98 | puts "INFO: Making design <$design_name> as current_bd_design." 99 | current_bd_design $design_name 100 | 101 | } 102 | 103 | puts "INFO: Currently the variable is equal to \"$design_name\"." 104 | 105 | if { $nRet != 0 } { 106 | puts $errMsg 107 | return $nRet 108 | } 109 | 110 | ################################################################## 111 | # DESIGN PROCs 112 | ################################################################## 113 | 114 | 115 | 116 | # Procedure to create entire design; Provide argument to make 117 | # procedure reusable. If parentCell is "", will use root. 118 | proc create_root_design { parentCell } { 119 | 120 | if { $parentCell eq "" } { 121 | set parentCell [get_bd_cells /] 122 | } 123 | 124 | # Get object for parentCell 125 | set parentObj [get_bd_cells $parentCell] 126 | if { $parentObj == "" } { 127 | puts "ERROR: Unable to find parent cell <$parentCell>!" 128 | return 129 | } 130 | 131 | # Make sure parentObj is hier blk 132 | set parentType [get_property TYPE $parentObj] 133 | if { $parentType ne "hier" } { 134 | puts "ERROR: Parent <$parentObj> has TYPE = <$parentType>. Expected to be ." 135 | return 136 | } 137 | 138 | # Save current instance; Restore later 139 | set oldCurInst [current_bd_instance .] 140 | 141 | # Set parent object as current 142 | current_bd_instance $parentObj 143 | 144 | 145 | # Create interface ports 146 | set DDR [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddrx_rtl:1.0 DDR ] 147 | set FIXED_IO [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_processing_system7:fixedio_rtl:1.0 FIXED_IO ] 148 | 149 | # Create ports 150 | 151 | # Create instance: acp_axcache_0xF, and set properties 152 | set acp_axcache_0xF [ create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 acp_axcache_0xF ] 153 | set_property -dict [ list \ 154 | CONFIG.CONST_VAL {15} \ 155 | CONFIG.CONST_WIDTH {4} \ 156 | ] $acp_axcache_0xF 157 | 158 | # Create instance: axi_ic_ps7_M_AXI_GP0, and set properties 159 | set axi_ic_ps7_M_AXI_GP0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_ic_ps7_M_AXI_GP0 ] 160 | set_property -dict [ list \ 161 | CONFIG.M00_HAS_REGSLICE {1} \ 162 | CONFIG.M01_HAS_REGSLICE {1} \ 163 | CONFIG.M02_HAS_REGSLICE {1} \ 164 | CONFIG.M03_HAS_REGSLICE {1} \ 165 | CONFIG.NUM_MI {4} \ 166 | CONFIG.NUM_SI {1} \ 167 | CONFIG.S00_HAS_REGSLICE {1} \ 168 | CONFIG.STRATEGY {2} \ 169 | ] $axi_ic_ps7_M_AXI_GP0 170 | 171 | # Create instance: axi_ic_ps7_S_AXI_ACP, and set properties 172 | set axi_ic_ps7_S_AXI_ACP [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_ic_ps7_S_AXI_ACP ] 173 | set_property -dict [ list \ 174 | CONFIG.M00_HAS_DATA_FIFO {2} \ 175 | CONFIG.M00_HAS_REGSLICE {1} \ 176 | CONFIG.NUM_MI {1} \ 177 | CONFIG.NUM_SI {3} \ 178 | CONFIG.S00_HAS_DATA_FIFO {2} \ 179 | CONFIG.S00_HAS_REGSLICE {1} \ 180 | CONFIG.S01_HAS_DATA_FIFO {2} \ 181 | CONFIG.S01_HAS_REGSLICE {1} \ 182 | CONFIG.S02_HAS_DATA_FIFO {2} \ 183 | CONFIG.S02_HAS_REGSLICE {1} \ 184 | CONFIG.STRATEGY {2} \ 185 | ] $axi_ic_ps7_S_AXI_ACP 186 | 187 | # Create instance: dm_0, and set properties 188 | set dm_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 dm_0 ] 189 | set_property -dict [ list \ 190 | CONFIG.c_dlytmr_resolution {1250} \ 191 | CONFIG.c_include_mm2s {1} \ 192 | CONFIG.c_include_mm2s_dre {1} \ 193 | CONFIG.c_include_mm2s_sf {1} \ 194 | CONFIG.c_include_s2mm {0} \ 195 | CONFIG.c_include_sg {0} \ 196 | CONFIG.c_m_axi_mm2s_data_width {64} \ 197 | CONFIG.c_m_axis_mm2s_tdata_width {64} \ 198 | CONFIG.c_mm2s_burst_size {64} \ 199 | CONFIG.c_sg_length_width {23} \ 200 | ] $dm_0 201 | 202 | # Create instance: dm_1, and set properties 203 | set dm_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 dm_1 ] 204 | set_property -dict [ list \ 205 | CONFIG.c_dlytmr_resolution {1250} \ 206 | CONFIG.c_include_mm2s {1} \ 207 | CONFIG.c_include_mm2s_dre {1} \ 208 | CONFIG.c_include_mm2s_sf {1} \ 209 | CONFIG.c_include_s2mm {0} \ 210 | CONFIG.c_include_sg {0} \ 211 | CONFIG.c_m_axi_mm2s_data_width {64} \ 212 | CONFIG.c_m_axis_mm2s_tdata_width {64} \ 213 | CONFIG.c_mm2s_burst_size {64} \ 214 | CONFIG.c_sg_length_width {23} \ 215 | ] $dm_1 216 | 217 | # Create instance: dm_2, and set properties 218 | set dm_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 dm_2 ] 219 | set_property -dict [ list \ 220 | CONFIG.c_dlytmr_resolution {1250} \ 221 | CONFIG.c_include_mm2s {0} \ 222 | CONFIG.c_include_s2mm {1} \ 223 | CONFIG.c_include_s2mm_dre {1} \ 224 | CONFIG.c_include_s2mm_sf {1} \ 225 | CONFIG.c_include_sg {0} \ 226 | CONFIG.c_m_axi_s2mm_data_width {64} \ 227 | CONFIG.c_s2mm_burst_size {64} \ 228 | CONFIG.c_s_axis_s2mm_tdata_width {64} \ 229 | CONFIG.c_sg_length_width {23} \ 230 | ] $dm_2 231 | 232 | # Create instance: mmult_accel_0, and set properties 233 | set mmult_accel_0 [ create_bd_cell -type ip -vlnv xilinx.com:hls:mmult_accel:1.0 mmult_accel_0 ] 234 | 235 | # Create instance: mmult_accel_0_if, and set properties 236 | set mmult_accel_0_if [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_accelerator_adapter:2.1 mmult_accel_0_if ] 237 | set_property -dict [ list \ 238 | CONFIG.C_AP_IARG_0_DIM_1 {1024} \ 239 | CONFIG.C_AP_IARG_0_DWIDTH {32} \ 240 | CONFIG.C_AP_IARG_0_TYPE {1} \ 241 | CONFIG.C_AP_IARG_0_WIDTH {32} \ 242 | CONFIG.C_AP_IARG_1_DIM_1 {1024} \ 243 | CONFIG.C_AP_IARG_1_DWIDTH {32} \ 244 | CONFIG.C_AP_IARG_1_TYPE {1} \ 245 | CONFIG.C_AP_IARG_1_WIDTH {32} \ 246 | CONFIG.C_AP_OARG_0_DIM_1 {1024} \ 247 | CONFIG.C_AP_OARG_0_DWIDTH {32} \ 248 | CONFIG.C_AP_OARG_0_TYPE {1} \ 249 | CONFIG.C_AP_OARG_0_WIDTH {32} \ 250 | CONFIG.C_M_AXIS_HAS_TKEEP {1} \ 251 | CONFIG.C_M_AXIS_HAS_TSTRB {1} \ 252 | CONFIG.C_M_AXIS_TDATA_WIDTH {64} \ 253 | CONFIG.C_N_INPUT_ARGS {2} \ 254 | CONFIG.C_N_OUTPUT_ARGS {1} \ 255 | CONFIG.C_S_AXIS_TDATA_WIDTH {64} \ 256 | ] $mmult_accel_0_if 257 | 258 | # Create instance: proc_sys_reset_0_100M, and set properties 259 | set proc_sys_reset_0_100M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_0_100M ] 260 | 261 | # Create instance: proc_sys_reset_1_142M, and set properties 262 | set proc_sys_reset_1_142M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_1_142M ] 263 | 264 | # Create instance: proc_sys_reset_2_200M, and set properties 265 | set proc_sys_reset_2_200M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_2_200M ] 266 | 267 | # Create instance: proc_sys_reset_3_166M, and set properties 268 | set proc_sys_reset_3_166M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 proc_sys_reset_3_166M ] 269 | 270 | # Create instance: ps7, and set properties 271 | set ps7 [ create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 ps7 ] 272 | set_property -dict [ list \ 273 | CONFIG.PCW_APU_PERIPHERAL_FREQMHZ {650} \ 274 | CONFIG.PCW_ENET0_ENET0_IO {MIO 16 .. 27} \ 275 | CONFIG.PCW_ENET0_GRP_MDIO_ENABLE {1} \ 276 | CONFIG.PCW_ENET0_PERIPHERAL_ENABLE {1} \ 277 | CONFIG.PCW_EN_CLK1_PORT {1} \ 278 | CONFIG.PCW_EN_CLK2_PORT {1} \ 279 | CONFIG.PCW_EN_CLK3_PORT {1} \ 280 | CONFIG.PCW_EN_CLKTRIG0_PORT {1} \ 281 | CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ {100} \ 282 | CONFIG.PCW_FPGA1_PERIPHERAL_FREQMHZ {142} \ 283 | CONFIG.PCW_FPGA2_PERIPHERAL_FREQMHZ {200} \ 284 | CONFIG.PCW_FPGA3_PERIPHERAL_FREQMHZ {160} \ 285 | CONFIG.PCW_I2C0_PERIPHERAL_ENABLE {1} \ 286 | CONFIG.PCW_IMPORT_BOARD_PRESET {C:/Users/tkato/Desktop/pynq-zynq.tcl} \ 287 | CONFIG.PCW_IRQ_F2P_INTR {1} \ 288 | CONFIG.PCW_PRESET_BANK1_VOLTAGE {LVCMOS 1.8V} \ 289 | CONFIG.PCW_QSPI_GRP_FBCLK_ENABLE {1} \ 290 | CONFIG.PCW_QSPI_PERIPHERAL_ENABLE {1} \ 291 | CONFIG.PCW_SD0_GRP_CD_ENABLE {1} \ 292 | CONFIG.PCW_SD0_PERIPHERAL_ENABLE {1} \ 293 | CONFIG.PCW_SDIO_PERIPHERAL_FREQMHZ {50} \ 294 | CONFIG.PCW_UART0_PERIPHERAL_ENABLE {1} \ 295 | CONFIG.PCW_UART0_UART0_IO {MIO 14 .. 15} \ 296 | CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY0 {0.223} \ 297 | CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY1 {0.212} \ 298 | CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY2 {0.085} \ 299 | CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY3 {0.092} \ 300 | CONFIG.PCW_UIPARAM_DDR_BUS_WIDTH {16 Bit} \ 301 | CONFIG.PCW_UIPARAM_DDR_CWL {6} \ 302 | CONFIG.PCW_UIPARAM_DDR_DEVICE_CAPACITY {4096 MBits} \ 303 | CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_0 {0.040} \ 304 | CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_1 {0.058} \ 305 | CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_2 {-0.009} \ 306 | CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_3 {-0.033} \ 307 | CONFIG.PCW_UIPARAM_DDR_DRAM_WIDTH {16 Bits} \ 308 | CONFIG.PCW_UIPARAM_DDR_FREQ_MHZ {525} \ 309 | CONFIG.PCW_UIPARAM_DDR_PARTNO {Custom} \ 310 | CONFIG.PCW_UIPARAM_DDR_T_FAW {40.0} \ 311 | CONFIG.PCW_UIPARAM_DDR_T_RAS_MIN {35.0} \ 312 | CONFIG.PCW_UIPARAM_DDR_T_RC {50.625} \ 313 | CONFIG.PCW_UIPARAM_DDR_T_RCD {13.125} \ 314 | CONFIG.PCW_UIPARAM_DDR_T_RP {13.125} \ 315 | CONFIG.PCW_USB0_PERIPHERAL_ENABLE {1} \ 316 | CONFIG.PCW_USE_DEFAULT_ACP_USER_VAL {1} \ 317 | CONFIG.PCW_USE_FABRIC_INTERRUPT {1} \ 318 | CONFIG.PCW_USE_M_AXI_GP0 {1} \ 319 | CONFIG.PCW_USE_S_AXI_ACP {1} \ 320 | ] $ps7 321 | 322 | # Create instance: xlconcat, and set properties 323 | set xlconcat [ create_bd_cell -type ip -vlnv xilinx.com:ip:xlconcat:2.1 xlconcat ] 324 | set_property -dict [ list \ 325 | CONFIG.NUM_PORTS {3} \ 326 | ] $xlconcat 327 | 328 | # Create interface connections 329 | connect_bd_intf_net -intf_net axi_ic_ps7_M_AXI_GP0_M00_AXI [get_bd_intf_pins axi_ic_ps7_M_AXI_GP0/M00_AXI] [get_bd_intf_pins mmult_accel_0_if/S_AXI] 330 | connect_bd_intf_net -intf_net axi_ic_ps7_M_AXI_GP0_M01_AXI [get_bd_intf_pins axi_ic_ps7_M_AXI_GP0/M01_AXI] [get_bd_intf_pins dm_0/S_AXI_LITE] 331 | connect_bd_intf_net -intf_net axi_ic_ps7_M_AXI_GP0_M02_AXI [get_bd_intf_pins axi_ic_ps7_M_AXI_GP0/M02_AXI] [get_bd_intf_pins dm_1/S_AXI_LITE] 332 | connect_bd_intf_net -intf_net axi_ic_ps7_M_AXI_GP0_M03_AXI [get_bd_intf_pins axi_ic_ps7_M_AXI_GP0/M03_AXI] [get_bd_intf_pins dm_2/S_AXI_LITE] 333 | connect_bd_intf_net -intf_net axi_ic_ps7_S_AXI_ACP_M00_AXI [get_bd_intf_pins axi_ic_ps7_S_AXI_ACP/M00_AXI] [get_bd_intf_pins ps7/S_AXI_ACP] 334 | connect_bd_intf_net -intf_net dm_0_M_AXIS_MM2S [get_bd_intf_pins dm_0/M_AXIS_MM2S] [get_bd_intf_pins mmult_accel_0_if/S_AXIS_1] 335 | connect_bd_intf_net -intf_net dm_0_M_AXI_MM2S [get_bd_intf_pins axi_ic_ps7_S_AXI_ACP/S00_AXI] [get_bd_intf_pins dm_0/M_AXI_MM2S] 336 | connect_bd_intf_net -intf_net dm_1_M_AXIS_MM2S [get_bd_intf_pins dm_1/M_AXIS_MM2S] [get_bd_intf_pins mmult_accel_0_if/S_AXIS_0] 337 | connect_bd_intf_net -intf_net dm_1_M_AXI_MM2S [get_bd_intf_pins axi_ic_ps7_S_AXI_ACP/S01_AXI] [get_bd_intf_pins dm_1/M_AXI_MM2S] 338 | connect_bd_intf_net -intf_net dm_2_M_AXI_S2MM [get_bd_intf_pins axi_ic_ps7_S_AXI_ACP/S02_AXI] [get_bd_intf_pins dm_2/M_AXI_S2MM] 339 | connect_bd_intf_net -intf_net mmult_accel_0_if_AP_CTRL [get_bd_intf_pins mmult_accel_0/ap_ctrl] [get_bd_intf_pins mmult_accel_0_if/AP_CTRL] 340 | connect_bd_intf_net -intf_net mmult_accel_0_if_M_AXIS_0 [get_bd_intf_pins dm_2/S_AXIS_S2MM] [get_bd_intf_pins mmult_accel_0_if/M_AXIS_0] 341 | connect_bd_intf_net -intf_net mmult_accel_0_in_A [get_bd_intf_pins mmult_accel_0/in_A] [get_bd_intf_pins mmult_accel_0_if/AP_FIFO_IARG_0] 342 | connect_bd_intf_net -intf_net mmult_accel_0_in_B [get_bd_intf_pins mmult_accel_0/in_B] [get_bd_intf_pins mmult_accel_0_if/AP_FIFO_IARG_1] 343 | connect_bd_intf_net -intf_net mmult_accel_0_out_C [get_bd_intf_pins mmult_accel_0/out_C] [get_bd_intf_pins mmult_accel_0_if/AP_FIFO_OARG_0] 344 | connect_bd_intf_net -intf_net ps7_DDR [get_bd_intf_ports DDR] [get_bd_intf_pins ps7/DDR] 345 | connect_bd_intf_net -intf_net ps7_FIXED_IO [get_bd_intf_ports FIXED_IO] [get_bd_intf_pins ps7/FIXED_IO] 346 | connect_bd_intf_net -intf_net ps7_M_AXI_GP0 [get_bd_intf_pins axi_ic_ps7_M_AXI_GP0/S00_AXI] [get_bd_intf_pins ps7/M_AXI_GP0] 347 | 348 | # Create port connections 349 | connect_bd_net -net acp_axcache_0xF_dout [get_bd_pins acp_axcache_0xF/dout] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S00_AXI_arcache] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S01_AXI_arcache] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S02_AXI_awcache] 350 | connect_bd_net -net dm_0_mm2s_introut [get_bd_pins dm_0/mm2s_introut] [get_bd_pins xlconcat/In0] 351 | connect_bd_net -net dm_1_mm2s_introut [get_bd_pins dm_1/mm2s_introut] [get_bd_pins xlconcat/In1] 352 | connect_bd_net -net dm_2_s2mm_introut [get_bd_pins dm_2/s2mm_introut] [get_bd_pins xlconcat/In2] 353 | connect_bd_net -net mmult_accel_0_if_aresetn [get_bd_pins mmult_accel_0/ap_rst_n] [get_bd_pins mmult_accel_0_if/aresetn] 354 | connect_bd_net -net proc_sys_reset_2_200M_interconnect_aresetn [get_bd_pins axi_ic_ps7_M_AXI_GP0/ARESETN] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M00_ARESETN] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M01_ARESETN] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M02_ARESETN] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M03_ARESETN] [get_bd_pins axi_ic_ps7_M_AXI_GP0/S00_ARESETN] [get_bd_pins axi_ic_ps7_S_AXI_ACP/ARESETN] [get_bd_pins axi_ic_ps7_S_AXI_ACP/M00_ARESETN] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S00_ARESETN] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S01_ARESETN] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S02_ARESETN] [get_bd_pins proc_sys_reset_2_200M/interconnect_aresetn] 355 | connect_bd_net -net proc_sys_reset_2_200M_peripheral_aresetn [get_bd_pins dm_0/axi_resetn] [get_bd_pins dm_1/axi_resetn] [get_bd_pins dm_2/axi_resetn] [get_bd_pins mmult_accel_0_if/m_axis_aresetn] [get_bd_pins mmult_accel_0_if/s_axi_aresetn] [get_bd_pins mmult_accel_0_if/s_axis_aresetn] [get_bd_pins proc_sys_reset_2_200M/peripheral_aresetn] 356 | connect_bd_net -net ps7_FCLK_CLK0 [get_bd_pins proc_sys_reset_0_100M/slowest_sync_clk] [get_bd_pins ps7/FCLK_CLK0] 357 | connect_bd_net -net ps7_FCLK_CLK1 [get_bd_pins proc_sys_reset_1_142M/slowest_sync_clk] [get_bd_pins ps7/FCLK_CLK1] 358 | connect_bd_net -net ps7_FCLK_CLK2 [get_bd_pins axi_ic_ps7_M_AXI_GP0/ACLK] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M00_ACLK] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M01_ACLK] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M02_ACLK] [get_bd_pins axi_ic_ps7_M_AXI_GP0/M03_ACLK] [get_bd_pins axi_ic_ps7_M_AXI_GP0/S00_ACLK] [get_bd_pins axi_ic_ps7_S_AXI_ACP/ACLK] [get_bd_pins axi_ic_ps7_S_AXI_ACP/M00_ACLK] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S00_ACLK] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S01_ACLK] [get_bd_pins axi_ic_ps7_S_AXI_ACP/S02_ACLK] [get_bd_pins dm_0/m_axi_mm2s_aclk] [get_bd_pins dm_0/s_axi_lite_aclk] [get_bd_pins dm_1/m_axi_mm2s_aclk] [get_bd_pins dm_1/s_axi_lite_aclk] [get_bd_pins dm_2/m_axi_s2mm_aclk] [get_bd_pins dm_2/s_axi_lite_aclk] [get_bd_pins mmult_accel_0/ap_clk] [get_bd_pins mmult_accel_0_if/aclk] [get_bd_pins mmult_accel_0_if/m_axis_aclk] [get_bd_pins mmult_accel_0_if/s_axi_aclk] [get_bd_pins mmult_accel_0_if/s_axis_aclk] [get_bd_pins proc_sys_reset_2_200M/slowest_sync_clk] [get_bd_pins ps7/FCLK_CLK2] [get_bd_pins ps7/M_AXI_GP0_ACLK] [get_bd_pins ps7/S_AXI_ACP_ACLK] 359 | connect_bd_net -net ps7_FCLK_CLK3 [get_bd_pins proc_sys_reset_3_166M/slowest_sync_clk] [get_bd_pins ps7/FCLK_CLK3] 360 | connect_bd_net -net ps7_FCLK_RESET0_N [get_bd_pins proc_sys_reset_0_100M/ext_reset_in] [get_bd_pins proc_sys_reset_1_142M/ext_reset_in] [get_bd_pins proc_sys_reset_2_200M/ext_reset_in] [get_bd_pins proc_sys_reset_3_166M/ext_reset_in] [get_bd_pins ps7/FCLK_RESET0_N] 361 | connect_bd_net -net xlconcat_0_dout [get_bd_pins ps7/IRQ_F2P] [get_bd_pins xlconcat/dout] 362 | 363 | # Create address segments 364 | create_bd_addr_seg -range 0x20000000 -offset 0x0 [get_bd_addr_spaces dm_0/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_DDR_LOWOCM] SEG_ps7_ACP_DDR_LOWOCM 365 | create_bd_addr_seg -range 0x400000 -offset 0xE0000000 [get_bd_addr_spaces dm_0/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_IOP] SEG_ps7_ACP_IOP 366 | create_bd_addr_seg -range 0x40000000 -offset 0x40000000 [get_bd_addr_spaces dm_0/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_M_AXI_GP0] SEG_ps7_ACP_M_AXI_GP0 367 | create_bd_addr_seg -range 0x1000000 -offset 0xFC000000 [get_bd_addr_spaces dm_0/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_QSPI_LINEAR] SEG_ps7_ACP_QSPI_LINEAR 368 | create_bd_addr_seg -range 0x20000000 -offset 0x0 [get_bd_addr_spaces dm_1/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_DDR_LOWOCM] SEG_ps7_ACP_DDR_LOWOCM 369 | create_bd_addr_seg -range 0x400000 -offset 0xE0000000 [get_bd_addr_spaces dm_1/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_IOP] SEG_ps7_ACP_IOP 370 | create_bd_addr_seg -range 0x40000000 -offset 0x40000000 [get_bd_addr_spaces dm_1/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_M_AXI_GP0] SEG_ps7_ACP_M_AXI_GP0 371 | create_bd_addr_seg -range 0x1000000 -offset 0xFC000000 [get_bd_addr_spaces dm_1/Data_MM2S] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_QSPI_LINEAR] SEG_ps7_ACP_QSPI_LINEAR 372 | create_bd_addr_seg -range 0x20000000 -offset 0x0 [get_bd_addr_spaces dm_2/Data_S2MM] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_DDR_LOWOCM] SEG_ps7_ACP_DDR_LOWOCM 373 | create_bd_addr_seg -range 0x400000 -offset 0xE0000000 [get_bd_addr_spaces dm_2/Data_S2MM] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_IOP] SEG_ps7_ACP_IOP 374 | create_bd_addr_seg -range 0x40000000 -offset 0x40000000 [get_bd_addr_spaces dm_2/Data_S2MM] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_M_AXI_GP0] SEG_ps7_ACP_M_AXI_GP0 375 | create_bd_addr_seg -range 0x1000000 -offset 0xFC000000 [get_bd_addr_spaces dm_2/Data_S2MM] [get_bd_addr_segs ps7/S_AXI_ACP/ACP_QSPI_LINEAR] SEG_ps7_ACP_QSPI_LINEAR 376 | create_bd_addr_seg -range 0x10000 -offset 0x40400000 [get_bd_addr_spaces ps7/Data] [get_bd_addr_segs dm_0/S_AXI_LITE/Reg] SEG_dm_0_Reg 377 | create_bd_addr_seg -range 0x10000 -offset 0x40410000 [get_bd_addr_spaces ps7/Data] [get_bd_addr_segs dm_1/S_AXI_LITE/Reg] SEG_dm_1_Reg 378 | create_bd_addr_seg -range 0x10000 -offset 0x40420000 [get_bd_addr_spaces ps7/Data] [get_bd_addr_segs dm_2/S_AXI_LITE/Reg] SEG_dm_2_Reg 379 | create_bd_addr_seg -range 0x10000 -offset 0x43C00000 [get_bd_addr_spaces ps7/Data] [get_bd_addr_segs mmult_accel_0_if/S_AXI/Reg] SEG_mmult_accel_0_if_Reg 380 | 381 | # Perform GUI Layout 382 | regenerate_bd_layout -layout_string { 383 | guistr: "# # String gsaved with Nlview 6.5.5 2015-06-26 bk=1.3371 VDI=38 GEI=35 GUI=JA:1.6 384 | # -string -flagsOSRD 385 | preplace port DDR -pg 1 -y 330 -defaultsOSRD 386 | preplace port FIXED_IO -pg 1 -y 350 -defaultsOSRD 387 | preplace inst proc_sys_reset_2_200M -pg 1 -lvl 3 -y 440 -defaultsOSRD 388 | preplace inst ps7 -pg 1 -lvl 2 -y 420 -defaultsOSRD 389 | preplace inst proc_sys_reset_0_100M -pg 1 -lvl 3 -y 80 -defaultsOSRD 390 | preplace inst xlconcat -pg 1 -lvl 1 -y 410 -defaultsOSRD 391 | preplace inst proc_sys_reset_1_142M -pg 1 -lvl 3 -y 240 -defaultsOSRD 392 | preplace inst proc_sys_reset_3_166M -pg 1 -lvl 3 -y 600 -defaultsOSRD 393 | preplace netloc ps7_FIXED_IO 1 2 2 NJ 350 NJ 394 | preplace netloc ps7_FCLK_CLK0 1 2 1 540 395 | preplace netloc ps7_FCLK_CLK1 1 2 1 550 396 | preplace netloc ps7_DDR 1 2 2 NJ 330 NJ 397 | preplace netloc xlconcat_0_dout 1 1 1 N 398 | preplace netloc ps7_FCLK_CLK2 1 2 1 570 399 | preplace netloc ps7_FCLK_CLK3 1 2 1 540 400 | preplace netloc ps7_FCLK_RESET0_N 1 2 1 560 401 | levelinfo -pg 1 0 90 360 720 890 -top 0 -bot 690 402 | ", 403 | } 404 | 405 | # Restore current instance 406 | current_bd_instance $oldCurInst 407 | 408 | save_bd_design 409 | } 410 | # End of create_root_design() 411 | 412 | 413 | ################################################################## 414 | # MAIN FLOW 415 | ################################################################## 416 | 417 | create_root_design "" 418 | 419 | 420 | --------------------------------------------------------------------------------