├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── Makefile ├── README.md ├── ThunderGP.mk ├── ThunderGP_camera_ready-pdfa.pdf ├── application ├── ar │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ ├── l2.h │ └── main.cpp ├── bfs │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ ├── l2.h │ └── main.cpp ├── casair │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── customized_apply.cpp │ ├── dataPrepare.cpp │ ├── host_vertex_apply.cpp │ ├── l2.h │ └── main.cpp ├── casir │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── customized_apply.cpp │ ├── dataPrepare.cpp │ ├── host_vertex_apply.cpp │ ├── l2.h │ └── main.cpp ├── cc │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ ├── l2.h │ └── main.cpp ├── common.mk ├── global_config.h ├── para_check.h ├── pr │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ └── l2.h ├── spmv │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ └── l2.h ├── sssp │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ └── l2.h ├── template │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── host_vertex_apply.cpp │ ├── l2.h │ └── vertex_apply.cpp └── wcc │ ├── apply_kernel.mk │ ├── build.mk │ ├── config.mk │ ├── dataPrepare.cpp │ └── l2.h ├── automation ├── auto_gen_code.mk ├── auto_gen_makefile.mk ├── auto_gen_parameters.mk ├── devices │ ├── device_common.h │ ├── xilinx_u200_xdma_201830_2.h │ ├── xilinx_u250_xdma_201830_2.h │ └── xilinx_vcu1525_xdma_201830_1.h ├── makefile_gen.cpp ├── para_gen.cpp ├── parser.cpp ├── parser.h ├── parser │ ├── customize.cpp │ ├── customize.h │ ├── customize_str.h │ ├── kernel_interface.cpp │ ├── kernel_interface.h │ ├── makefile.cpp │ ├── makefile.h │ ├── mem_interface.cpp │ └── mem_interface.h ├── parser_debug.cpp └── parser_debug.h ├── dataset ├── README.md ├── kronecker_generator.m ├── rmat-14-32.txt └── rmat.m ├── docs ├── algorithm_mapping.md ├── api_details.md ├── compile_arch.md ├── images │ ├── GAS.png │ ├── GASmodel.png │ ├── SPMV.png │ ├── ThunderGP.png │ ├── automation.png │ ├── dataset.png │ ├── l2_dataflow.png │ ├── mem_hir.png │ ├── overview.png │ ├── sche0.png │ ├── sche1.png │ ├── scheduling.png │ ├── scheduling0.png │ ├── scheduling1.png │ ├── scheduling2.png │ ├── scheduling3.png │ ├── scheduling4.png │ └── scheduling5.png ├── memory.md ├── results.md ├── scheduling.md └── verification.md ├── libfpga ├── common_template │ ├── apply_kernel.mk │ ├── apply_top.cpp │ └── scatter_gather_top.cpp ├── customize_template │ ├── customize_apply_cl_kernel.h │ ├── customize_apply_kernel.mk │ ├── customize_apply_top.cpp │ └── customize_mem.h ├── fpga_application.h ├── fpga_apply.h ├── fpga_cache.h ├── fpga_decoder.h ├── fpga_edge_prop.h ├── fpga_filter.h ├── fpga_gather.h ├── fpga_global_mem.h ├── fpga_gs_top.h ├── fpga_process_edge.h ├── fpga_raw_solver.h ├── fpga_slice.h └── graph_fpga.h ├── libgraph ├── common.h ├── default_entry.cpp ├── host_graph_api.h ├── host_graph_data_structure.h ├── host_graph_dataflow.cpp ├── host_graph_partition.cpp ├── host_graph_sw.h ├── kernel │ ├── host_graph_kernel.cpp │ └── host_graph_kernel.h ├── memory │ ├── he_mapping.cpp │ ├── he_mem.cpp │ ├── he_mem.h │ ├── he_mem_attr.h │ ├── he_mem_config.h │ └── he_mem_id.h ├── misc │ ├── data_helper.cpp │ ├── graph.cpp │ ├── graph.h │ ├── host_graph_csv.hpp │ ├── host_graph_mem.cpp │ ├── host_graph_mem.h │ └── host_graph_misc_inner.h ├── scheduler │ ├── host_graph_scheduler.cpp │ ├── host_graph_scheduler.h │ ├── normal │ │ └── scheduler.cpp │ └── secondOrderEstimator │ │ └── scheduler.cpp ├── test │ └── test_col.c └── verification │ ├── host_graph_cmodel.cpp │ ├── host_graph_verification.h │ ├── host_graph_verification_apply.cpp │ ├── host_graph_verification_gs.cpp │ └── host_graph_verification_inner.h └── utils ├── automation.sh ├── bitstream.mk ├── clean.mk ├── help.mk ├── hw_emu.sh ├── main.mk ├── opencl.mk ├── report_usage.tcl ├── resetfpga.sh ├── sdaccel.ini ├── tool_compile_check.sh ├── tool_grep.sh ├── tool_profile.sh ├── tool_rebuild.sh ├── tool_release.sh ├── tool_report.sh ├── tool_test.sh ├── tool_test_all.sh ├── tool_test_app.sh ├── tool_timing.sh ├── utils.mk └── xcl ├── xcl.c ├── xcl.h └── xcl.mk /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.diff 2 | *.log 3 | host_graph_fpga* 4 | _x 5 | xclbin 6 | .Xil 7 | *.str 8 | sdx_* 9 | *.json 10 | *.jou 11 | release* 12 | test_log* 13 | xclbin* 14 | *.protoinst 15 | *.csv 16 | sdaccel.ini 17 | .run 18 | csr 19 | *.xclbin 20 | *.sh 21 | tmp_fpga_top 22 | tmp_para 23 | code_gen 24 | para_gen 25 | makefile_gen -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at xtra-computing@googlegroups.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include ThunderGP.mk 2 | 3 | .PHONY: all clean exe hwemuprepare 4 | 5 | 6 | include utils/main.mk -------------------------------------------------------------------------------- /ThunderGP.mk: -------------------------------------------------------------------------------- 1 | TARGETS := hw 2 | # emu or acc: 3 | # hw 4 | # hw_emu 5 | 6 | APP := 7 | # pass in by app= 8 | 9 | TARGET_BANDWIDTH := 77 10 | # target memory bandwidth in GB/s 11 | # max: 77GB/s 12 | # this value can be overridden by $(app)/build.mk 13 | 14 | DEVICES := xilinx_vcu1525_xdma_201830_1 15 | # device list: 16 | # xilinx_vcu1525_xdma_201830_1 17 | # xilinx_u200_xdma_201830_2 18 | # xilinx_u250_xdma_201830_2 19 | -------------------------------------------------------------------------------- /ThunderGP_camera_ready-pdfa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/ThunderGP_camera_ready-pdfa.pdf -------------------------------------------------------------------------------- /application/ar/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/ar/apply_kernel.mk -------------------------------------------------------------------------------- /application/ar/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=false 4 | HAVE_UNSIGNED_PROP=false 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=true 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=false 16 | -------------------------------------------------------------------------------- /application/ar/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=250 2 | 3 | QUEUE_SIZE_FILTER=16 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=6 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/ar/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | 4 | 5 | static unsigned int avg_outdegree; 6 | 7 | unsigned int dataPrepareGetArg(graphInfo *info) 8 | { 9 | return avg_outdegree; 10 | } 11 | 12 | int dataPrepareProperty(graphInfo *info) 13 | { 14 | int *outDeg = (int *)get_host_mem_pointer(MEM_ID_OUT_DEG_ORIGIN); 15 | prop_t *vertexPushinProp = (prop_t*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 16 | 17 | int vertexNum = info->vertexNum; 18 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size/sizeof(int); 19 | 20 | unsigned int total_outdegree = 0; 21 | 22 | for (int i = 0; i < vertexNum; i++) 23 | { 24 | vertexPushinProp[i] = 0; 25 | total_outdegree += outDeg[i]; 26 | } 27 | avg_outdegree = (unsigned int)(((double)total_outdegree) / vertexNum); 28 | 29 | for (int i = vertexNum; i < alignedVertexNum; i++) { 30 | vertexPushinProp[i] = 0; 31 | } 32 | return 0; 33 | } -------------------------------------------------------------------------------- /application/ar/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define kDamp (0.85f) 6 | #define kDampFixPoint 108//(0.85 << 7) // * 128 7 | 8 | #define SCALE_DEGREE (16) 9 | #define SCALE_DAMPING (7) 10 | 11 | /* source vertex property process */ 12 | inline prop_t preprocessProperty(prop_t srcProp) 13 | { 14 | return (srcProp); 15 | } 16 | 17 | /* source vertex property & edge property */ 18 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 19 | { 20 | return (srcProp); 21 | } 22 | 23 | /* destination property update dst buffer update */ 24 | inline prop_t gatherFunc(prop_t ori, prop_t update) 25 | { 26 | return ((ori) + (update)); 27 | } 28 | 29 | inline prop_t applyFunc( prop_t tProp, 30 | prop_t source, 31 | prop_t outDeg, 32 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 33 | unsigned int arg 34 | ) 35 | { 36 | 37 | prop_t new_score ; 38 | prop_t old_score ; 39 | unsigned int C_avg = arg; 40 | 41 | prop_t tmp; 42 | prop_t C_Ta = outDeg; 43 | tmp = (1 << SCALE_DEGREE ) / (C_Ta + C_avg); 44 | 45 | old_score = source * tmp; 46 | new_score = kDampFixPoint * tProp + (unsigned int) ((1 << (SCALE_DEGREE + SCALE_DAMPING)) * (1.0f - kDamp)); 47 | 48 | prop_t update = (new_score * tmp); 49 | 50 | extra[0] = (new_score - old_score) > 0 ? (new_score - old_score) : (old_score - new_score) ; 51 | 52 | return update; 53 | } 54 | #endif /* __L2_H__ */ 55 | -------------------------------------------------------------------------------- /application/ar/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "host_graph_api.h" 7 | #include "host_graph_verification.h" 8 | 9 | using namespace std; 10 | 11 | graphInfo graphDataInfo; 12 | 13 | int main(int argc, char **argv) { 14 | 15 | char * xcl_file = NULL; 16 | if (argc > 1) 17 | { 18 | xcl_file = argv[1]; 19 | } 20 | 21 | std::string gName; 22 | if (argc > 2) 23 | { 24 | gName = argv[2]; 25 | } 26 | else 27 | { 28 | gName = "wiki-talk"; 29 | } 30 | std::string mode = "normal"; 31 | 32 | 33 | DEBUG_PRINTF("start main\n"); 34 | 35 | acceleratorInit("graph_fpga", xcl_file); 36 | 37 | acceleratorDataLoad(gName, mode, &graphDataInfo); 38 | 39 | acceleratorDataPreprocess(&graphDataInfo); 40 | 41 | for (int runCounter = 0 ; runCounter < 10 ; runCounter ++) 42 | { 43 | double startStamp, endStamp; 44 | startStamp = getCurrentTimestamp(); 45 | 46 | acceleratorSuperStep(runCounter, &graphDataInfo); 47 | 48 | endStamp = getCurrentTimestamp(); 49 | 50 | /* profile */ 51 | acceleratorProfile(runCounter, runCounter, &graphDataInfo, endStamp - startStamp); 52 | } 53 | acceleratorDeinit(); 54 | 55 | return 0; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /application/bfs/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/bfs/apply_kernel.mk -------------------------------------------------------------------------------- /application/bfs/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=false 4 | HAVE_UNSIGNED_PROP=true 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=false 16 | 17 | -------------------------------------------------------------------------------- /application/bfs/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=250 2 | 3 | QUEUE_SIZE_FILTER=32 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=7 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/bfs/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | unsigned int dataPrepareGetArg(graphInfo *info) 9 | { 10 | return 0; 11 | } 12 | 13 | int dataPrepareProperty(graphInfo *info) 14 | { 15 | int *vertexPushinProp = (int*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 16 | 17 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size / sizeof(int); 18 | 19 | for (int i = 0; i < alignedVertexNum; i++) 20 | { 21 | vertexPushinProp[i] = MAX_PROP; 22 | } 23 | int select_index = ((double)std::rand()) / ((RAND_MAX + 1u) / info->vertexNum); 24 | vertexPushinProp[select_index] = 0x80000001; 25 | return 0; 26 | } -------------------------------------------------------------------------------- /application/bfs/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define MAX_PROP (INT_MAX - 1) 6 | 7 | #define VERTEX_ACTIVE_BIT_MASK (0x80000000) 8 | #define IS_ACTIVE_VERTEX(a) ((((((a) & VERTEX_ACTIVE_BIT_MASK) == VERTEX_ACTIVE_BIT_MASK))) ? 1 : 0) 9 | 10 | /* source vertex property process */ 11 | inline prop_t preprocessProperty(prop_t srcProp) 12 | { 13 | return ((srcProp) + 1); 14 | } 15 | 16 | /* source vertex property & edge property */ 17 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 18 | { 19 | return (srcProp); 20 | } 21 | 22 | /* destination property update dst buffer update */ 23 | inline prop_t gatherFunc(prop_t ori, prop_t update) 24 | { 25 | return ( 26 | ( 27 | ( 28 | (((ori) & (~VERTEX_ACTIVE_BIT_MASK)) > ((update) & (~VERTEX_ACTIVE_BIT_MASK))) 29 | && (update != 0) 30 | ) 31 | || (ori == 0x0) 32 | ) ? (update) : (ori) 33 | ); 34 | } 35 | 36 | inline prop_t applyFunc( prop_t tProp, 37 | prop_t source, 38 | prop_t outDeg, 39 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 40 | unsigned int arg 41 | ) 42 | { 43 | prop_t update = 0; 44 | 45 | prop_t uProp = source; 46 | prop_t wProp; 47 | if (((tProp & VERTEX_ACTIVE_BIT_MASK) == VERTEX_ACTIVE_BIT_MASK) && (uProp == MAX_PROP)) 48 | { 49 | extra[0] = 1; 50 | wProp = tProp; // current active vertex, not travsered 51 | } 52 | else 53 | { 54 | extra[0] = 0; 55 | wProp = uProp & 0x7fffffff; // not travsered 56 | } 57 | update = wProp; 58 | 59 | return update; 60 | } 61 | 62 | #endif /* __L2_H__ */ 63 | -------------------------------------------------------------------------------- /application/bfs/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "host_graph_api.h" 7 | #include "host_graph_verification.h" 8 | 9 | using namespace std; 10 | 11 | graphInfo graphDataInfo; 12 | 13 | int main(int argc, char **argv) { 14 | 15 | char * xcl_file = NULL; 16 | if (argc > 1) 17 | { 18 | xcl_file = argv[1]; 19 | } 20 | 21 | std::string gName; 22 | if (argc > 2) 23 | { 24 | gName = argv[2]; 25 | } 26 | else 27 | { 28 | gName = "wiki-talk"; 29 | } 30 | std::string mode = "normal"; 31 | 32 | 33 | 34 | DEBUG_PRINTF("start main\n"); 35 | 36 | acceleratorInit("graph_fpga", xcl_file); 37 | 38 | acceleratorDataLoad(gName, mode, &graphDataInfo); 39 | 40 | acceleratorDataPreprocess(&graphDataInfo); 41 | 42 | int runCounter = 0; 43 | int activeVertices = 1; 44 | while (activeVertices != 0) 45 | { 46 | 47 | double startStamp, endStamp; 48 | startStamp = getCurrentTimestamp(); 49 | 50 | acceleratorSuperStep(runCounter, &graphDataInfo); 51 | 52 | endStamp = getCurrentTimestamp(); 53 | 54 | int *reg = (int *)acceleratorQueryRegister(); 55 | activeVertices = reg[0]; 56 | DEBUG_PRINTF("activeVertice : %d \n", activeVertices); 57 | 58 | 59 | /* profile */ 60 | acceleratorProfile(runCounter, runCounter, &graphDataInfo, endStamp - startStamp); 61 | runCounter++; 62 | } 63 | acceleratorDeinit(); 64 | 65 | return 0; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /application/casair/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/casair/apply_kernel.mk -------------------------------------------------------------------------------- /application/casair/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=true 4 | HAVE_UNSIGNED_PROP=false 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=true 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=false 16 | 17 | #override bandwidth 18 | TARGET_BANDWIDTH = 17 19 | 20 | #override partition_size 21 | TARGET_PARTITION_SIZE = 66536 22 | -------------------------------------------------------------------------------- /application/casair/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=300 2 | 3 | QUEUE_SIZE_FILTER=32 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=7 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/casair/customized_apply.cpp: -------------------------------------------------------------------------------- 1 | 2 | // pre-define: 3 | // uProp = theta_i * N 4 | // tProp = sum(sourceVertexProp * w) in scatter-gather stage 5 | // wProp: write back value 6 | 7 | /* Apply: updates all vertices for next iteration */ 8 | int applyFunc( ... ) 9 | { 10 | #pragma THUNDERGP APPLY_BASE_TYPE float 11 | 12 | #pragma THUNDERGP DEF_ARRAY theta_s 13 | float theta_s; 14 | #pragma THUNDERGP DEF_ARRAY theta_a 15 | float theta_a; 16 | #pragma THUNDERGP DEF_ARRAY theta_r 17 | float theta_r; 18 | #pragma THUNDERGP DEF_INPUT_ONLY_ARRAY alpha 19 | float alpha; 20 | #pragma THUNDERGP DEF_INPUT_ONLY_ARRAY pi 21 | float pi; 22 | #pragma THUNDERGP DEF_INPUT_ONLY_ARRAY N 23 | float N; 24 | #pragma THUNDERGP DEF_SCALAR beta 25 | float beta; 26 | #pragma THUNDERGP DEF_SCALAR gamma 27 | float gamma; 28 | 29 | 30 | #pragma THUNDERGP USER_APPLY_CODE_START 31 | //start 32 | 33 | float n = 1 / N; 34 | float theta_i = uProp * n / FIXED_SCALE; 35 | float weightSum = tProp / FIXED_SCALE; 36 | float updated_i = beta * pi * theta_s * (theta_i + weightSum * n); 37 | float theta_i_update = (1 - gamma) * theta_i + updated_i; 38 | 39 | float newtheta_a = theta_a + alpha * theta_s; 40 | float newtheta_s = (1 - alpha) * theta_s - updated_i; 41 | float newtheta_r = theta_r + gamma * theta_i; 42 | 43 | wProp = (theta_i_update * N) * FIXED_SCALE; 44 | 45 | #pragma THUNDERGP USER_APPLY_CODE_END 46 | //end 47 | } 48 | -------------------------------------------------------------------------------- /application/casair/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | #include "customize_mem_1.h" 4 | 5 | 6 | const float epsilion = 0.077; 7 | const float nu = 0.023; 8 | 9 | #define DATA_PATH "../../sir/" 10 | #define MEM_ID_LOAD_EDGE_PROP (MEM_ID_USER_DEFINE_BASE) 11 | #define MEM_ID_LOAD_VERTEX_PROP (MEM_ID_USER_DEFINE_BASE + 1) 12 | 13 | int dataPrepareProperty(graphInfo *info) 14 | { 15 | 16 | global.gamma = nu; 17 | global.beta = epsilion; 18 | 19 | load_from_csv(DATA_PATH"input_S0.csv", MEM_ID_THETA_S, MEM_ATTR_THETA_S); 20 | output_init(MEM_ID_NEWTHETA_S, MEM_ATTR_NEWTHETA_S, MEM_ID_THETA_S); 21 | load_from_csv(DATA_PATH"input_A0.csv", MEM_ID_THETA_A, MEM_ATTR_THETA_A); 22 | output_init(MEM_ID_NEWTHETA_A, MEM_ATTR_NEWTHETA_A, MEM_ID_THETA_A); 23 | load_from_csv(DATA_PATH"input_R0.csv", MEM_ID_THETA_R, MEM_ATTR_THETA_R); 24 | output_init(MEM_ID_NEWTHETA_R, MEM_ATTR_NEWTHETA_R, MEM_ID_THETA_R); 25 | 26 | load_from_csv(DATA_PATH"input_alpha.csv", MEM_ID_ALPHA, MEM_ATTR_ALPHA); 27 | load_from_csv(DATA_PATH"input_pi.csv", MEM_ID_PI, MEM_ATTR_PI); 28 | 29 | float * pop = load_from_csv(DATA_PATH"input_pop.csv", MEM_ID_N, MEM_ATTR_N); 30 | float * i0 = load_from_csv(DATA_PATH"input_I0.csv", MEM_ID_LOAD_VERTEX_PROP, ATTR_HOST_ONLY); 31 | 32 | if ((pop == NULL) || (i0 == NULL)) 33 | { 34 | return -1; 35 | } 36 | 37 | prop_t *vertexPushinProp = (prop_t*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 38 | int vertexNum = info->vertexNum; 39 | for (int i = 0; i < info->vertexNum; i++) 40 | { 41 | vertexPushinProp[i] = (int)(i0[i] * pop[i] * FIXED_SCALE); 42 | } 43 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size / sizeof(int); 44 | for (int i = vertexNum; i < alignedVertexNum; i++) { 45 | vertexPushinProp[i] = 0; 46 | } 47 | 48 | int * loadedEdgeProp = load_from_csv(DATA_PATH"edge_prop.txt", MEM_ID_LOAD_EDGE_PROP, ATTR_HOST_ONLY); 49 | prop_t *edgeProp = (prop_t*)get_host_mem_pointer(MEM_ID_EDGE_PROP); 50 | int edgeNum = info->edgeNum; 51 | for (int i = 0; i < edgeNum; i++) 52 | { 53 | edgeProp[i] = loadedEdgeProp[i]; 54 | } 55 | int alignedEdgeNum = get_he_mem(MEM_ID_EDGE_PROP)->size / sizeof(int); 56 | for (int i = edgeNum; i < alignedEdgeNum; i++) 57 | { 58 | edgeProp[i] = 0; 59 | } 60 | 61 | 62 | return 0; 63 | } 64 | 65 | unsigned int dataPrepareGetArg(graphInfo *info) 66 | { 67 | return 0; 68 | } -------------------------------------------------------------------------------- /application/casair/host_vertex_apply.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "fpga_application.h" 4 | 5 | #include "host_graph_verification.h" 6 | 7 | 8 | #include "customize_mem_1.h" 9 | #include "customize_apply_cl_kernel_1.h" 10 | 11 | 12 | 13 | 14 | 15 | void partitionApplyCModel( 16 | cl_context &context, 17 | cl_device_id &device, 18 | int superStep, 19 | int partId, 20 | unsigned int applyArg 21 | ) 22 | { 23 | } 24 | -------------------------------------------------------------------------------- /application/casair/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define FIXED_SCALE (1000000) 6 | 7 | 8 | /* source vertex property process */ 9 | inline prop_t preprocessProperty(prop_t srcProp) 10 | { 11 | return (srcProp); 12 | } 13 | 14 | /* Scatter: return source vertex property "srcProp" as the value of update tuple */ 15 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 16 | { 17 | return ((srcProp) * (edgeProp)); 18 | } 19 | 20 | /* Gather: accumulates the update values from source vertices to original values*/ 21 | inline prop_t gatherFunc(prop_t ori, prop_t update) 22 | { 23 | return ((ori) + (update)); 24 | } 25 | 26 | #endif /* __L2_H__ */ 27 | -------------------------------------------------------------------------------- /application/casair/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | #include "customize_mem_1.h" 8 | #include "host_graph_api.h" 9 | 10 | using namespace std; 11 | 12 | graphInfo graphDataInfo; 13 | 14 | int main(int argc, char **argv) { 15 | 16 | char * xcl_file = NULL; 17 | if (argc > 1) 18 | { 19 | xcl_file = argv[1]; 20 | } 21 | 22 | std::string gName; 23 | if (argc > 2) 24 | { 25 | gName = argv[2]; 26 | } 27 | else 28 | { 29 | gName = "wiki-talk"; 30 | } 31 | std::string mode = "normal"; 32 | 33 | DEBUG_PRINTF("start main\n"); 34 | 35 | acceleratorInit("graph_fpga", xcl_file); 36 | acceleratorDataLoad(gName, mode, &graphDataInfo); 37 | acceleratorDataPreprocess(&graphDataInfo); 38 | 39 | { 40 | double startStamp, endStamp; 41 | startStamp = getCurrentTimestamp(); 42 | acceleratorSuperStep(0, &graphDataInfo); 43 | endStamp = getCurrentTimestamp(); 44 | DEBUG_PRINTF("exe time : %lf \n", endStamp - startStamp); 45 | } 46 | 47 | write_back_csv("out_s1.csv", MEM_ID_NEWTHETA_S); 48 | write_back_csv("out_a1.csv", MEM_ID_NEWTHETA_A); 49 | write_back_csv("out_r1.csv", MEM_ID_NEWTHETA_R); 50 | 51 | acceleratorDeinit(); 52 | 53 | return 0; 54 | } 55 | -------------------------------------------------------------------------------- /application/casir/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/casir/apply_kernel.mk -------------------------------------------------------------------------------- /application/casir/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=true 4 | HAVE_UNSIGNED_PROP=false 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=true 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=false 16 | 17 | #override bandwidth 18 | TARGET_BANDWIDTH = 17 19 | 20 | #override partition_size 21 | TARGET_PARTITION_SIZE = 66536 22 | -------------------------------------------------------------------------------- /application/casir/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=300 2 | 3 | QUEUE_SIZE_FILTER=32 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=7 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/casir/customized_apply.cpp: -------------------------------------------------------------------------------- 1 | 2 | // pre-define: 3 | // uProp = theta_i * N 4 | // tProp = sum(sourceVertexProp * w) in scatter-gather stage 5 | // wProp: write back value 6 | 7 | /* Apply: updates all vertices for next iteration */ 8 | int applyFunc( ... ) 9 | { 10 | #pragma THUNDERGP APPLY_BASE_TYPE float 11 | 12 | #pragma THUNDERGP DEF_ARRAY theta_s 13 | float theta_s; 14 | #pragma THUNDERGP DEF_ARRAY theta_r 15 | float theta_r; 16 | #pragma THUNDERGP DEF_INPUT_ONLY_ARRAY N 17 | float N; 18 | #pragma THUNDERGP DEF_SCALAR epsilon 19 | float epsilon; 20 | #pragma THUNDERGP DEF_SCALAR nu 21 | float nu; 22 | #pragma THUNDERGP DEF_SCALAR a 23 | float a; 24 | #pragma THUNDERGP DEF_SCALAR eta 25 | float eta; 26 | 27 | 28 | #pragma THUNDERGP USER_APPLY_CODE_START 29 | //start 30 | /* 31 | St = Sp-epsilon*Sp*Ip - Sp*a * epsilon * eta* wetVec 32 | It = (1-nu)*Ip+epsilon*Sp*Ip + Sp *a * epsilon * eta * wetVec 33 | Rt = Rp+nu*Ip 34 | */ 35 | float theta_i = uProp / N / FIXED_SCALE; 36 | float wetVec = tProp / N / FIXED_SCALE; 37 | float newtheta_s = theta_s 38 | - epsilon * theta_s * theta_i 39 | - theta_s * a * eta * wetVec; 40 | float newtheta_r = theta_r + nu * theta_i; 41 | 42 | float newtheta_i = (1 - nu) * theta_i 43 | + epsilon * theta_s * theta_i 44 | + theta_s * a * eta * wetVec; 45 | wProp = newtheta_i * N * FIXED_SCALE; 46 | #pragma THUNDERGP USER_APPLY_CODE_END 47 | //end 48 | } 49 | -------------------------------------------------------------------------------- /application/casir/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | #include "customize_mem_1.h" 4 | 5 | 6 | #define DATA_PATH "/graph_data/sir/" 7 | #define MEM_ID_LOAD_EDGE_PROP (MEM_ID_USER_DEFINE_BASE) 8 | #define MEM_ID_LOAD_VERTEX_PROP (MEM_ID_USER_DEFINE_BASE + 1) 9 | 10 | int dataPrepareProperty(graphInfo *info) 11 | { 12 | 13 | global.nu = 0.023; 14 | global.epsilon = 0.077; 15 | global.a = 0.5; 16 | global.eta = 0.01; 17 | 18 | load_from_csv(DATA_PATH"input_S0.csv", MEM_ID_THETA_S, MEM_ATTR_THETA_S); 19 | output_init(MEM_ID_NEWTHETA_S, MEM_ATTR_NEWTHETA_S, MEM_ID_THETA_S); 20 | load_from_csv(DATA_PATH"input_R0.csv", MEM_ID_THETA_R, MEM_ATTR_THETA_R); 21 | output_init(MEM_ID_NEWTHETA_R, MEM_ATTR_NEWTHETA_R, MEM_ID_THETA_R); 22 | 23 | float * pop = load_from_csv(DATA_PATH"input_pop.csv", MEM_ID_N, MEM_ATTR_N); 24 | float * i0 = load_from_csv(DATA_PATH"input_I0.csv", MEM_ID_LOAD_VERTEX_PROP, ATTR_HOST_ONLY); 25 | 26 | if (i0 == NULL) 27 | { 28 | return -1; 29 | } 30 | 31 | prop_t *vertexPushinProp = (prop_t*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 32 | int vertexNum = info->vertexNum; 33 | for (int i = 0; i < info->vertexNum; i++) 34 | { 35 | vertexPushinProp[i] = (int)(i0[i] * pop[i] * FIXED_SCALE); 36 | } 37 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size / sizeof(int); 38 | for (int i = vertexNum; i < alignedVertexNum; i++) { 39 | vertexPushinProp[i] = 0; 40 | } 41 | 42 | int * loadedEdgeProp = load_from_csv(DATA_PATH"edge_prop_sir.txt", MEM_ID_LOAD_EDGE_PROP, ATTR_HOST_ONLY); 43 | prop_t *edgeProp = (prop_t*)get_host_mem_pointer(MEM_ID_EDGE_PROP); 44 | int edgeNum = info->edgeNum; 45 | for (int i = 0; i < edgeNum; i++) 46 | { 47 | edgeProp[i] = loadedEdgeProp[i]; 48 | } 49 | int alignedEdgeNum = get_he_mem(MEM_ID_EDGE_PROP)->size / sizeof(int); 50 | for (int i = edgeNum; i < alignedEdgeNum; i++) 51 | { 52 | edgeProp[i] = 0; 53 | } 54 | 55 | prop_t *test = (prop_t*)get_host_mem_pointer(MEM_ID_TEST); 56 | for (int i = 0; i < info->vertexNum; i++) 57 | { 58 | test[i] = vertexPushinProp[i]; 59 | } 60 | 61 | 62 | return 0; 63 | } 64 | 65 | unsigned int dataPrepareGetArg(graphInfo *info) 66 | { 67 | return 0; 68 | } -------------------------------------------------------------------------------- /application/casir/host_vertex_apply.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "fpga_application.h" 4 | 5 | #include "host_graph_verification.h" 6 | 7 | 8 | #include "customize_mem_1.h" 9 | #include "customize_apply_cl_kernel_1.h" 10 | 11 | 12 | 13 | 14 | 15 | void partitionApplyCModel( 16 | cl_context &context, 17 | cl_device_id &device, 18 | int superStep, 19 | int partId, 20 | unsigned int applyArg 21 | ) 22 | { 23 | } 24 | -------------------------------------------------------------------------------- /application/casir/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define FIXED_SCALE (1000000) 6 | 7 | 8 | /* source vertex property process */ 9 | inline prop_t preprocessProperty(prop_t srcProp) 10 | { 11 | return (srcProp); 12 | } 13 | 14 | /* Scatter: return source vertex property "srcProp" as the value of update tuple */ 15 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 16 | { 17 | return ((srcProp) * (edgeProp)); 18 | } 19 | 20 | /* Gather: accumulates the update values from source vertices to original values*/ 21 | inline prop_t gatherFunc(prop_t ori, prop_t update) 22 | { 23 | return ((ori) + (update)); 24 | } 25 | 26 | #endif /* __L2_H__ */ 27 | -------------------------------------------------------------------------------- /application/casir/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "customize_mem_1.h" 7 | #include "host_graph_api.h" 8 | using namespace std; 9 | 10 | graphInfo graphDataInfo; 11 | 12 | int main(int argc, char **argv) { 13 | 14 | char * xcl_file = NULL; 15 | if (argc > 1) 16 | { 17 | xcl_file = argv[1]; 18 | } 19 | 20 | std::string gName; 21 | if (argc > 2) 22 | { 23 | gName = argv[2]; 24 | } 25 | else 26 | { 27 | gName = "wiki-talk"; 28 | } 29 | std::string mode = "normal"; 30 | 31 | DEBUG_PRINTF("start main\n"); 32 | 33 | acceleratorInit("graph_fpga", xcl_file); 34 | acceleratorDataLoad(gName, mode, &graphDataInfo); 35 | acceleratorDataPreprocess(&graphDataInfo); 36 | 37 | { 38 | double startStamp, endStamp; 39 | startStamp = getCurrentTimestamp(); 40 | acceleratorSuperStep(0, &graphDataInfo); 41 | endStamp = getCurrentTimestamp(); 42 | DEBUG_PRINTF("exe time : %lf \n", endStamp - startStamp); 43 | } 44 | 45 | write_back_csv("out_s1.csv", MEM_ID_NEWTHETA_S); 46 | write_back_csv("out_r1.csv", MEM_ID_NEWTHETA_R); 47 | 48 | acceleratorDeinit(); 49 | 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /application/cc/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/cc/apply_kernel.mk -------------------------------------------------------------------------------- /application/cc/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=false 4 | HAVE_UNSIGNED_PROP=true 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=false 16 | 17 | -------------------------------------------------------------------------------- /application/cc/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=250 2 | 3 | QUEUE_SIZE_FILTER=16 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=7 7 | 8 | APPLY_REF_ARRAY_SIZE=32 9 | -------------------------------------------------------------------------------- /application/cc/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | unsigned int dataPrepareGetArg(graphInfo *info) 10 | { 11 | return 0; 12 | } 13 | 14 | int dataPrepareProperty(graphInfo *info) 15 | { 16 | std::srand(std::time(nullptr)); 17 | int *vertexPushinProp = (int*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 18 | 19 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size/sizeof(int); 20 | 21 | for (int i = 0; i < alignedVertexNum; i++) 22 | { 23 | vertexPushinProp[i] = 0; 24 | } 25 | for (int i = 0; i < 32; i++) 26 | { 27 | int select_index = ((double)std::rand())/((RAND_MAX + 1u)/info->vertexNum); 28 | vertexPushinProp[select_index] = 1 << i; 29 | } 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /application/cc/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define MAX_PROP (INT_MAX - 1) 6 | 7 | #define VERTEX_ACTIVE_BIT_MASK (0x80000000) 8 | #define IS_ACTIVE_VERTEX(a) (a != 0) 9 | 10 | /* source vertex property process */ 11 | inline prop_t preprocessProperty(prop_t srcProp) 12 | { 13 | return ((srcProp)); 14 | } 15 | 16 | /* source vertex property & edge property */ 17 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 18 | { 19 | return (srcProp); 20 | } 21 | 22 | /* destination property update dst buffer update */ 23 | inline prop_t gatherFunc(prop_t ori, prop_t update) 24 | { 25 | return (ori | update); 26 | } 27 | 28 | inline prop_t applyFunc( prop_t tProp, 29 | prop_t source, 30 | prop_t outDeg, 31 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 32 | unsigned int arg 33 | ) 34 | { 35 | 36 | for (int i = 0; i < APPLY_REF_ARRAY_SIZE; i++) 37 | { 38 | const prop_t mask = (1 << i); 39 | 40 | if (((source & mask) != mask ) && ((tProp & mask) == mask)) 41 | { 42 | extra[i] = 1; 43 | } 44 | else 45 | { 46 | extra[i] = 0; 47 | } 48 | } 49 | 50 | return tProp | source; 51 | } 52 | 53 | #endif /* __L2_H__ */ 54 | -------------------------------------------------------------------------------- /application/cc/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "host_graph_api.h" 7 | #include "host_graph_verification.h" 8 | 9 | using namespace std; 10 | 11 | graphInfo graphDataInfo; 12 | 13 | int main(int argc, char **argv) { 14 | 15 | char * xcl_file = NULL; 16 | if (argc > 1) 17 | { 18 | xcl_file = argv[1]; 19 | } 20 | 21 | std::string gName; 22 | if (argc > 2) 23 | { 24 | gName = argv[2]; 25 | } 26 | else 27 | { 28 | gName = "wiki-talk"; 29 | } 30 | std::string mode = "normal"; 31 | 32 | DEBUG_PRINTF("start main\n"); 33 | 34 | acceleratorInit("graph_fpga", xcl_file); 35 | 36 | acceleratorDataLoad(gName, mode, &graphDataInfo); 37 | 38 | acceleratorDataPreprocess(&graphDataInfo); 39 | // 10 times for averaging result; 40 | for (int j = 0; j < 10 ; j ++) 41 | { 42 | int runCounter = 0; 43 | int totalActiveVertices = 1; 44 | int closenessCentrality[32]; 45 | 46 | //random set 32 nodes to calculated their closeness centrality 47 | reTransferProp(&graphDataInfo); 48 | for (int i = 0; i < 32; i++) 49 | { 50 | closenessCentrality[i] = 0; 51 | } 52 | while (totalActiveVertices != 0) 53 | { 54 | totalActiveVertices = 0; 55 | double startStamp, endStamp; 56 | startStamp = getCurrentTimestamp(); 57 | 58 | acceleratorSuperStep(runCounter, &graphDataInfo); 59 | 60 | endStamp = getCurrentTimestamp(); 61 | 62 | int *reg = (int *)acceleratorQueryRegister(); 63 | for (int i = 0; i < 32; i++) 64 | { 65 | int activeVertices = reg[i]; 66 | totalActiveVertices += activeVertices; 67 | DEBUG_PRINTF("activeVertice@path_%d : %d \n", i, activeVertices); 68 | } 69 | for (int i = 0; i < 32; i++) 70 | { 71 | closenessCentrality[i] += reg[i] * runCounter; 72 | } 73 | /* profile */ 74 | acceleratorProfile(runCounter, runCounter, &graphDataInfo, endStamp - startStamp); 75 | runCounter ++; 76 | } 77 | } 78 | //dumpResult(&graphDataInfo); 79 | acceleratorDeinit(); 80 | 81 | return 0; 82 | } 83 | 84 | -------------------------------------------------------------------------------- /application/global_config.h: -------------------------------------------------------------------------------- 1 | #ifndef __GLOBAL_CONFIG_H__ 2 | #define __GLOBAL_CONFIG_H__ 3 | 4 | #include 5 | #include 6 | 7 | 8 | #if HAVE_UNSIGNED_PROP 9 | typedef unsigned int prop_t; 10 | #else 11 | typedef int prop_t; 12 | #endif 13 | 14 | #ifdef TARGET_PARTITION_SIZE 15 | #undef PARTITION_SIZE 16 | #define PARTITION_SIZE (TARGET_PARTITION_SIZE) 17 | #endif 18 | #define MAX_VERTICES_IN_ONE_PARTITION (PARTITION_SIZE) 19 | 20 | #define ENDFLAG 0xffffffff 21 | 22 | #define LOG_BURSTBUFFERSIZE (7) 23 | #define BURSTBUFFERSIZE (1<<7) 24 | #define ALIGN_SIZE (BURSTBUFFERSIZE * 16) 25 | 26 | //#define SW_DEBUG 27 | 28 | #define CAHCE_FETCH_DEBUG (0) 29 | 30 | 31 | 32 | 33 | 34 | #endif /* __GLOBAL_CONFIG_H__ */ 35 | -------------------------------------------------------------------------------- /application/para_check.h: -------------------------------------------------------------------------------- 1 | #ifndef __PARA_CHECK__ 2 | #define __PARA_CHECK__ 3 | 4 | #ifndef SUB_PARTITION_NUM 5 | #error "SUB_PARTITION_NUM is not define! please check the ThunderGP para_gen." 6 | #endif 7 | 8 | #ifndef PARTITION_SIZE 9 | #error "PARTITION_SIZE is not define! please check the ThunderGP para_gen." 10 | #endif 11 | 12 | #endif /* __PARA_CHECK__ */ 13 | -------------------------------------------------------------------------------- /application/pr/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/pr/apply_kernel.mk -------------------------------------------------------------------------------- /application/pr/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=false 4 | HAVE_UNSIGNED_PROP=false 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=true 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=true 16 | -------------------------------------------------------------------------------- /application/pr/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=280 2 | 3 | QUEUE_SIZE_FILTER=16 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=6 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/pr/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | 4 | #define INT2FLOAT (pow(2,30)) 5 | 6 | int float2int(float a) { 7 | return (int)(a * INT2FLOAT); 8 | } 9 | 10 | float int2float(int a) { 11 | return ((float)a / INT2FLOAT); 12 | } 13 | 14 | unsigned int dataPrepareGetArg(graphInfo *info) 15 | { 16 | return float2int((1.0f - kDamp) / info->vertexNum); 17 | } 18 | 19 | int dataPrepareProperty(graphInfo *info) 20 | { 21 | int *tempPropValue = (int*)get_host_mem_pointer(MEM_ID_PROP_FOR_DATAPREPARE); 22 | int *outDeg = (int*)get_host_mem_pointer(MEM_ID_OUT_DEG_ORIGIN); 23 | prop_t *vertexPushinProp = (prop_t*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 24 | 25 | int vertexNum = info->vertexNum; 26 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size/sizeof(int); 27 | 28 | float init_score_float = 1.0f / vertexNum; 29 | int init_score_int = float2int(init_score_float); 30 | 31 | 32 | for (int i = 0; i < vertexNum; i++) { 33 | tempPropValue[i] = init_score_int; 34 | if (outDeg[i] > 0) 35 | { 36 | vertexPushinProp[i] = tempPropValue[i] / outDeg[i]; 37 | } 38 | else 39 | { 40 | vertexPushinProp[i] = 0; 41 | } 42 | } 43 | 44 | for (int i = vertexNum; i < alignedVertexNum; i++) { 45 | vertexPushinProp[i] = 0; 46 | } 47 | return 0; 48 | } -------------------------------------------------------------------------------- /application/pr/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define kDamp (0.85f) 6 | #define kDampFixPoint 108//(0.85 << 7) // * 128 7 | 8 | /* source vertex property process */ 9 | inline prop_t preprocessProperty(prop_t srcProp) 10 | { 11 | return (srcProp); 12 | } 13 | 14 | /* source vertex property & edge property */ 15 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 16 | { 17 | return (srcProp); 18 | } 19 | 20 | /* destination property update dst buffer update */ 21 | inline prop_t gatherFunc(prop_t ori, prop_t update) 22 | { 23 | return ((ori) + (update)); 24 | } 25 | 26 | inline prop_t applyFunc( prop_t tProp, 27 | prop_t source, 28 | prop_t outDeg, 29 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 30 | unsigned int arg 31 | ) 32 | { 33 | 34 | prop_t old_score = source; 35 | prop_t new_score = arg + ((kDampFixPoint * tProp) >> 7); 36 | prop_t tmp; 37 | if (outDeg != 0) 38 | { 39 | tmp = (1 << 16 ) / outDeg; 40 | } 41 | else 42 | { 43 | tmp = 0; 44 | } 45 | 46 | prop_t update = (new_score * tmp) >> 16; 47 | 48 | extra[0] = (new_score - old_score) > 0 ? (new_score - old_score) : (old_score - new_score) ; 49 | 50 | return update; 51 | } 52 | #endif /* __L2_H__ */ 53 | -------------------------------------------------------------------------------- /application/spmv/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/spmv/apply_kernel.mk -------------------------------------------------------------------------------- /application/spmv/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=true 4 | HAVE_UNSIGNED_PROP=false 5 | 6 | #apply kernel 7 | HAVE_APPLY=false 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=true 16 | -------------------------------------------------------------------------------- /application/spmv/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=250 2 | 3 | QUEUE_SIZE_FILTER=32 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=7 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/spmv/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | 3 | 4 | unsigned int dataPrepareGetArg(graphInfo *info) 5 | { 6 | return 0; 7 | } 8 | 9 | int dataPrepareProperty(graphInfo *info) 10 | { 11 | prop_t *vertexPushinProp = (prop_t*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 12 | 13 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size/sizeof(int); 14 | 15 | for (int i = 0; i < alignedVertexNum; i++) { 16 | vertexPushinProp[i] = i; 17 | } 18 | prop_t *edgeProp = (prop_t*)get_host_mem_pointer(MEM_ID_EDGE_PROP); 19 | 20 | //int edgeNum = info->edgeNum; 21 | int alignedEdgeNum = get_he_mem(MEM_ID_EDGE_PROP)->size/sizeof(int); 22 | 23 | for (int i = 0; i < alignedEdgeNum; i++) 24 | { 25 | edgeProp[i] = i; 26 | } 27 | 28 | 29 | return 0; 30 | } -------------------------------------------------------------------------------- /application/spmv/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | /* source vertex property process */ 5 | inline prop_t preprocessProperty(prop_t srcProp) 6 | { 7 | return (srcProp); 8 | } 9 | 10 | /* source vertex property & edge property */ 11 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 12 | { 13 | return ((srcProp) * (edgeProp)); 14 | } 15 | 16 | /* destination property update dst buffer update */ 17 | inline prop_t gatherFunc(prop_t ori, prop_t update) 18 | { 19 | return ((ori) + (update)); 20 | } 21 | 22 | inline prop_t applyFunc( prop_t tProp, 23 | prop_t source, 24 | prop_t outDeg, 25 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 26 | unsigned int arg 27 | ) 28 | { 29 | return tProp; 30 | } 31 | 32 | 33 | #endif /* __L2_H__ */ 34 | -------------------------------------------------------------------------------- /application/sssp/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/sssp/apply_kernel.mk -------------------------------------------------------------------------------- /application/sssp/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=true 4 | HAVE_UNSIGNED_PROP=true 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=true 16 | -------------------------------------------------------------------------------- /application/sssp/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=250 2 | 3 | QUEUE_SIZE_FILTER=16 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=6 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/sssp/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | 4 | unsigned int dataPrepareGetArg(graphInfo *info) 5 | { 6 | return 0; 7 | } 8 | 9 | int dataPrepareProperty(graphInfo *info) 10 | { 11 | int *vertexPushinProp = (int*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 12 | 13 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size / sizeof(int); 14 | 15 | for (int i = 0; i < alignedVertexNum; i++) 16 | { 17 | vertexPushinProp[i] = MAX_PROP; 18 | } 19 | prop_t *edgeProp = (prop_t*)get_host_mem_pointer(MEM_ID_EDGE_PROP); 20 | 21 | //int edgeNum = info->edgeNum; 22 | int alignedEdgeNum = get_he_mem(MEM_ID_EDGE_PROP)->size / sizeof(int); 23 | 24 | for (int i = 0; i < alignedEdgeNum; i++) 25 | { 26 | edgeProp[i] = i % 16; 27 | } 28 | int select_index = ((double)std::rand()) / ((RAND_MAX + 1u) / info->vertexNum); 29 | vertexPushinProp[select_index] = 0x80000001; 30 | 31 | return 0; 32 | } -------------------------------------------------------------------------------- /application/sssp/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | #define MAX_PROP (INT_MAX - 1) 6 | 7 | #define VERTEX_ACTIVE_BIT_MASK (0x80000000) 8 | #define IS_ACTIVE_VERTEX(a) ((((((a) & VERTEX_ACTIVE_BIT_MASK) == VERTEX_ACTIVE_BIT_MASK))) ? 1 : 0) 9 | 10 | /* source vertex property process */ 11 | inline prop_t preprocessProperty(prop_t srcProp) 12 | { 13 | return (srcProp); 14 | } 15 | 16 | /* source vertex property & edge property */ 17 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 18 | { 19 | if((srcProp & VERTEX_ACTIVE_BIT_MASK) == VERTEX_ACTIVE_BIT_MASK) 20 | return ((srcProp) + (edgeProp)); 21 | else 22 | return (srcProp); 23 | } 24 | 25 | /* destination property update dst buffer update */ 26 | inline prop_t gatherFunc(prop_t ori, prop_t update) 27 | { 28 | return ( 29 | ( 30 | ( 31 | (((ori) & (~VERTEX_ACTIVE_BIT_MASK)) > ((update) & (~VERTEX_ACTIVE_BIT_MASK))) 32 | && (update != 0) 33 | ) 34 | || (ori == 0x0) 35 | ) ? (update) : (ori) 36 | ); 37 | } 38 | 39 | inline prop_t applyFunc( prop_t tProp, 40 | prop_t source, 41 | prop_t outDeg, 42 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 43 | unsigned int arg 44 | ) 45 | { 46 | prop_t update = 0; 47 | 48 | prop_t uProp = source; 49 | prop_t wProp; 50 | if (((tProp & VERTEX_ACTIVE_BIT_MASK) == VERTEX_ACTIVE_BIT_MASK) && (uProp == MAX_PROP)) 51 | { 52 | extra[0] = 1; 53 | wProp = tProp; // current active vertex, not travsered 54 | } 55 | else 56 | { 57 | extra[0] = 0; 58 | wProp = uProp & 0x7fffffff; // not travsered 59 | } 60 | update = wProp; 61 | 62 | return update; 63 | } 64 | 65 | 66 | #endif /* __L2_H__ */ 67 | -------------------------------------------------------------------------------- /application/template/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/template/apply_kernel.mk -------------------------------------------------------------------------------- /application/template/build.mk: -------------------------------------------------------------------------------- 1 | # Need modification ! 2 | 3 | #scatter-gather kernel 4 | HAVE_EDGE_PROP=true/false 5 | HAVE_UNSIGNED_PROP=true/false 6 | 7 | #apply kernel 8 | HAVE_APPLY=true/false 9 | CUSTOMIZE_APPLY=true/false 10 | HAVE_APPLY_OUTDEG=true/false 11 | 12 | #scheduler 13 | SCHEDULER=secondOrderEstimator 14 | 15 | #entry 16 | DEFAULT_ENTRY=true 17 | 18 | -------------------------------------------------------------------------------- /application/template/config.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/template/config.mk -------------------------------------------------------------------------------- /application/template/host_vertex_apply.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "fpga_application.h" 4 | 5 | #include "host_graph_verification.h" 6 | 7 | 8 | void setApplyKernel(cl_kernel &kernel_apply, int partId, int vertexNum) 9 | { 10 | 11 | } 12 | 13 | 14 | void partitionApplyCModel( 15 | cl_context &context, 16 | cl_device_id &device, 17 | int partId, 18 | unsigned int argReg 19 | ) 20 | { 21 | } 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /application/template/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | /* source vertex property process */ 5 | inline prop_t preprocessProperty(prop_t srcProp) 6 | { 7 | return 0; 8 | } 9 | 10 | /* source vertex property & edge property */ 11 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 12 | { 13 | return 0; 14 | } 15 | 16 | /* destination property update in RAW solver */ 17 | inline prop_t updateMergeInRAWSolver(prop_t ori, prop_t update) 18 | { 19 | return 0; 20 | } 21 | 22 | /* destination property update dst buffer update */ 23 | inline prop_t gatherFunc(prop_t ori, prop_t update) 24 | { 25 | return 0; 26 | } 27 | 28 | 29 | #endif /* __L2_H__ */ 30 | -------------------------------------------------------------------------------- /application/wcc/apply_kernel.mk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/application/wcc/apply_kernel.mk -------------------------------------------------------------------------------- /application/wcc/build.mk: -------------------------------------------------------------------------------- 1 | 2 | #scatter-gather kernel 3 | HAVE_EDGE_PROP=false 4 | HAVE_UNSIGNED_PROP=true 5 | 6 | #apply kernel 7 | HAVE_APPLY=true 8 | CUSTOMIZE_APPLY=false 9 | HAVE_APPLY_OUTDEG=false 10 | 11 | #scheduler 12 | SCHEDULER=secondOrderEstimator 13 | 14 | #entry 15 | DEFAULT_ENTRY=true 16 | 17 | -------------------------------------------------------------------------------- /application/wcc/config.mk: -------------------------------------------------------------------------------- 1 | FREQ=250 2 | 3 | QUEUE_SIZE_FILTER=32 4 | QUEUE_SIZE_MEMORY=512 5 | 6 | LOG_SCATTER_CACHE_BURST_SIZE=7 7 | 8 | APPLY_REF_ARRAY_SIZE=1 9 | -------------------------------------------------------------------------------- /application/wcc/dataPrepare.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_api.h" 2 | #include "fpga_application.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | unsigned int dataPrepareGetArg(graphInfo *info) 9 | { 10 | return 0; 11 | } 12 | 13 | int dataPrepareProperty(graphInfo *info) 14 | { 15 | int *vertexPushinProp = (int*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP); 16 | 17 | int alignedVertexNum = get_he_mem(MEM_ID_PUSHIN_PROP)->size/sizeof(int); 18 | 19 | for (int i = 0; i < alignedVertexNum; i++) 20 | { 21 | vertexPushinProp[i] = 0; 22 | } 23 | int select_index = ((double)std::rand()) / ((RAND_MAX + 1u) / info->vertexNum); 24 | vertexPushinProp[select_index] = 1; 25 | return 0; 26 | } -------------------------------------------------------------------------------- /application/wcc/l2.h: -------------------------------------------------------------------------------- 1 | #ifndef __L2_H__ 2 | #define __L2_H__ 3 | 4 | 5 | /* 6 | Reference the mapping method in TABLE 1 from hitgraph: 7 | Zhou, Shijie, et al. "HitGraph: High-throughput graph processing framework on FPGA." 8 | IEEE Transactions on Parallel and Distributed Systems 30.10 (2019): 2249-2264. 9 | 10 | but we change the min operation to max, for the initialization of BRAM is zero. 11 | Therefore, do not need another memory interface to reload data from DRAM. 12 | Note: this implementation ONLY for performance comparison! 13 | */ 14 | 15 | 16 | /* source vertex property process */ 17 | inline prop_t preprocessProperty(prop_t srcProp) 18 | { 19 | return (srcProp); 20 | } 21 | 22 | /* source vertex property & edge property */ 23 | inline prop_t scatterFunc(prop_t srcProp, prop_t edgeProp) 24 | { 25 | return (srcProp); 26 | } 27 | 28 | /* destination property update dst buffer update */ 29 | inline prop_t gatherFunc(prop_t ori, prop_t update) 30 | { 31 | return (update > ori) ? update : ori; 32 | } 33 | 34 | 35 | inline prop_t applyFunc( prop_t tProp, 36 | prop_t source, 37 | prop_t outDeg, 38 | unsigned int (&extra)[APPLY_REF_ARRAY_SIZE], 39 | unsigned int arg 40 | ) 41 | { 42 | return tProp; 43 | } 44 | 45 | #endif /* __L2_H__ */ 46 | -------------------------------------------------------------------------------- /automation/auto_gen_code.mk: -------------------------------------------------------------------------------- 1 | include ThunderGP.mk 2 | 3 | CODE_GEN_PATH =./automation 4 | CODE_GEN_PARSER_PATH = $(CODE_GEN_PATH)/parser 5 | CODE_GEN_FILE = $(CODE_GEN_PATH)/parser.cpp 6 | CODE_GEN_FILE += $(CODE_GEN_PATH)/parser_debug.cpp 7 | CODE_GEN_FILE += $(CODE_GEN_PARSER_PATH)/mem_interface.cpp 8 | CODE_GEN_FILE += $(CODE_GEN_PARSER_PATH)/kernel_interface.cpp 9 | CODE_GEN_FILE += $(CODE_GEN_PARSER_PATH)/customize.cpp 10 | CODE_GEN_FILE += $(CODE_GEN_PARSER_PATH)/makefile.cpp 11 | 12 | 13 | # AUTOGEN_CFLAG in here: 14 | include tmp_para/para.mk 15 | 16 | 17 | INCLUDE_FLAG = -I application 18 | INCLUDE_FLAG += -I libgraph 19 | INCLUDE_FLAG += -I $(CODE_GEN_PATH) 20 | INCLUDE_FLAG += -I $(CODE_GEN_PARSER_PATH) 21 | 22 | 23 | VAR_TRUE=true 24 | APP = $(app) 25 | APPCONFIG = ./application/$(APP) 26 | include $(APPCONFIG)/build.mk 27 | 28 | .PHONY: code_gen 29 | code_gen: 30 | 31 | rm -rf tmp_fpga_top 32 | mkdir -p tmp_fpga_top 33 | g++ -static-libstdc++ $(INCLUDE_FLAG) $(AUTOGEN_CFLAG) $(CODE_GEN_FILE) -o code_gen 34 | ./code_gen libfpga/common_template/apply_top.cpp tmp_fpga_top/apply_top 35 | ./code_gen libfpga/common_template/scatter_gather_top.cpp tmp_fpga_top/scatter_gather_top 36 | ifndef app 37 | $(error app is undefined) 38 | else 39 | ifeq ($(strip $(CUSTOMIZE_APPLY)), $(strip $(VAR_TRUE))) 40 | ./code_gen libfpga/customize_template/customize_apply_top.cpp application/$(app)/customized_apply.cpp tmp_fpga_top/customize_apply_top 41 | ./code_gen libfpga/customize_template/customize_mem.h application/$(app)/customized_apply.cpp tmp_fpga_top/customize_mem 42 | ./code_gen libfpga/customize_template/customize_apply_cl_kernel.h application/$(app)/customized_apply.cpp tmp_fpga_top/customize_apply_cl_kernel 43 | endif 44 | endif 45 | -------------------------------------------------------------------------------- /automation/auto_gen_makefile.mk: -------------------------------------------------------------------------------- 1 | include ThunderGP.mk 2 | 3 | CODE_GEN_PATH =./automation 4 | CODE_GEN_FILE = $(CODE_GEN_PATH)/makefile_gen.cpp 5 | CODE_GEN_FILE += -I libgraph/ 6 | # AUTOGEN_CFLAG in here: 7 | include tmp_para/para.mk 8 | 9 | 10 | VAR_TRUE=true 11 | APP = $(app) 12 | APPCONFIG = ./application/$(APP) 13 | include $(APPCONFIG)/build.mk 14 | 15 | .PHONY: makefile_gen 16 | makefile_gen: 17 | rm -f tmp_fpga_top/*.mk 18 | mkdir -p tmp_fpga_top 19 | g++ -static-libstdc++ -I application $(AUTOGEN_CFLAG) $(CODE_GEN_FILE) -o makefile_gen 20 | ./makefile_gen 21 | 22 | ifndef app 23 | $(error app is undefined) 24 | else 25 | ifeq ($(strip $(CUSTOMIZE_APPLY)), $(strip $(VAR_TRUE))) 26 | ./code_gen libfpga/customize_template/customize_apply_kernel.mk application/$(app)/customized_apply.cpp tmp_fpga_top/apply_kernel 27 | else 28 | ./code_gen libfpga/common_template/apply_kernel.mk tmp_fpga_top/apply_kernel 29 | endif 30 | endif 31 | -------------------------------------------------------------------------------- /automation/auto_gen_parameters.mk: -------------------------------------------------------------------------------- 1 | include ThunderGP.mk 2 | 3 | CODE_GEN_PATH =./automation 4 | PARA_GEN_CFLAGS := -I $(CODE_GEN_PATH)/devices 5 | PARA_GEN_CFLAGS += -DDEVICE_HEADER="$(DEVICES).h" 6 | PARA_GEN_CFLAGS += -I libgraph/ 7 | 8 | 9 | 10 | .PHONY: auto_para para_gen tmp_para/para.mk 11 | auto_para: tmp_para/para.mk 12 | 13 | VAR_TRUE=true 14 | APP = $(app) 15 | APPCONFIG = ./application/$(APP) 16 | include $(APPCONFIG)/build.mk 17 | 18 | PARA_GEN_CFLAGS += -DTARGET_BANDWIDTH=$(TARGET_BANDWIDTH) 19 | 20 | para_gen: 21 | rm -rf para_gen 22 | rm -rf tmp_para 23 | mkdir -p tmp_para 24 | g++ -g -static-libstdc++ $(PARA_GEN_CFLAGS) ./automation/para_gen.cpp -o para_gen 25 | 26 | tmp_para/para.mk: para_gen 27 | ./para_gen 28 | rm -rf code_gen 29 | -------------------------------------------------------------------------------- /automation/devices/device_common.h: -------------------------------------------------------------------------------- 1 | #ifndef __DEVICE_COMMON_H__ 2 | #define __DEVICE_COMMON_H__ 3 | 4 | typedef struct 5 | { 6 | int slr_id; 7 | int luts; 8 | int ffs; 9 | int dsp; 10 | int bram; 11 | int uram; 12 | int mem_chns; 13 | } slr_resource_info_t; 14 | 15 | 16 | #endif /* __DEVICE_COMMON_H__ */ 17 | -------------------------------------------------------------------------------- /automation/devices/xilinx_u200_xdma_201830_2.h: -------------------------------------------------------------------------------- 1 | #include "device_common.h" 2 | 3 | const char * board_name = "u200"; 4 | 5 | const slr_resource_info_t slrs[] = 6 | { 7 | { 8 | .slr_id = 0, 9 | .luts = 354831, 10 | .ffs = 723372, 11 | .dsp = 2265, 12 | .bram = 638, 13 | .uram = 320, 14 | .mem_chns= 1, 15 | }, 16 | { 17 | .slr_id = 1, 18 | .luts = 159854, 19 | .ffs = 331711, 20 | .dsp = 1317, 21 | .bram = 326, 22 | .uram = 160, 23 | .mem_chns= 2, 24 | }, 25 | { 26 | .slr_id = 2, 27 | .luts = 354962, 28 | .ffs = 723353, 29 | .dsp = 2265, 30 | .bram = 638, 31 | .uram = 320, 32 | .mem_chns= 1, 33 | }, 34 | }; 35 | 36 | const int mem_cu_map[] = 37 | { 38 | 0, 1, 1, 2 39 | }; 40 | 41 | const int platform_memory_bandwidth_per_channel = 19; 42 | -------------------------------------------------------------------------------- /automation/devices/xilinx_u250_xdma_201830_2.h: -------------------------------------------------------------------------------- 1 | #include "device_common.h" 2 | 3 | const char * board_name = "u250"; 4 | 5 | const slr_resource_info_t slrs[] = 6 | { 7 | { 8 | .slr_id = 0, 9 | .luts = 345171, 10 | .ffs = 704801, 11 | .dsp = 2877, 12 | .bram = 500, 13 | .uram = 320, 14 | .mem_chns= 1, 15 | }, 16 | { 17 | .slr_id = 1, 18 | .luts = 344533, 19 | .ffs = 702517, 20 | .dsp = 2877, 21 | .bram = 500, 22 | .uram = 320, 23 | .mem_chns= 1, 24 | }, 25 | { 26 | .slr_id = 2, 27 | .luts = 344878, 28 | .ffs = 703253, 29 | .dsp = 2877, 30 | .bram = 500, 31 | .uram = 320, 32 | .mem_chns= 1, 33 | }, 34 | { 35 | .slr_id = 3, 36 | .luts = 345158, 37 | .ffs = 703761, 38 | .dsp = 2877, 39 | .bram = 500, 40 | .uram = 320, 41 | .mem_chns= 1, 42 | }, 43 | }; 44 | 45 | const int mem_cu_map[] = 46 | { 47 | 0, 1, 2, 3 48 | }; 49 | 50 | const int platform_memory_bandwidth_per_channel = 19; 51 | -------------------------------------------------------------------------------- /automation/devices/xilinx_vcu1525_xdma_201830_1.h: -------------------------------------------------------------------------------- 1 | #include "device_common.h" 2 | 3 | const char * board_name = "vcu1525"; 4 | 5 | const slr_resource_info_t slrs[] = 6 | { 7 | { 8 | .slr_id = 0, 9 | .luts = 354830, 10 | .ffs = 723371, 11 | .dsp = 2265, 12 | .bram = 638, 13 | .uram = 320, 14 | .mem_chns= 1, 15 | }, 16 | { 17 | .slr_id = 1, 18 | .luts = 159088, 19 | .ffs = 329162, 20 | .dsp = 1317, 21 | .bram = 326, 22 | .uram = 160, 23 | .mem_chns= 2, 24 | }, 25 | { 26 | .slr_id = 2, 27 | .luts = 354934, 28 | .ffs = 723328, 29 | .dsp = 2265, 30 | .bram = 638, 31 | .uram = 320, 32 | .mem_chns= 1, 33 | }, 34 | }; 35 | 36 | const int mem_cu_map[] = 37 | { 38 | 0, 1, 1, 2 39 | }; 40 | 41 | const int platform_memory_bandwidth_per_channel = 19; 42 | -------------------------------------------------------------------------------- /automation/makefile_gen.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | #include "para_check.h" 10 | 11 | /* TODO: remove hard-code */ 12 | /* only generate the makefile of scatter-gather kernels */ 13 | /* the makefile of apply kernel is generated by code_gen in parser/customize.cpp */ 14 | 15 | int main(int argc, char **argv) { 16 | std::ofstream file; 17 | file.open("./tmp_fpga_top/gs_kernel.mk"); 18 | for (int i = 0; i < SUB_PARTITION_NUM; i ++) 19 | { 20 | file << "############# gs " << (i + 1) << " #############" << std::endl; 21 | file << "$(XCLBIN)/readEdgesCU" << (i + 1) 22 | << ".$(TARGET).$(DSA).xo: $(GS_KERNEL_PATH)/scatter_gather_top_" 23 | << (i + 1) << ".cpp" << std::endl; 24 | file << "\tmkdir -p $(XCLBIN)" << std::endl; 25 | file << "\t$(XOCC) $(CLFLAGS) -c -k readEdgesCU" << (i + 1) << " -I'$( %sStream;\n\ 10 | #pragma HLS stream variable=%sStream depth=16\n\ 11 | burstReadLite(addrOffset, vertexNum, %s, %sStream);\n\ 12 | ") 13 | #define STREAM_DUPLEX_OUTPUT_ATTR_STR ("\n\ 14 | #pragma HLS INTERFACE m_axi port=%s offset=slave bundle=gmem%d \n\ 15 | #pragma HLS INTERFACE s_axilite port=%s bundle=control \n\ 16 | hls::stream %sStream;\n\ 17 | #pragma HLS stream variable=%sStream depth=16\n\ 18 | ") 19 | 20 | 21 | #define SCALAR_ATTR_STR ("\n\ 22 | #pragma HLS INTERFACE s_axilite port=%s bundle=control \n\ 23 | ") 24 | 25 | #define WRITE_STR ("\n \ 26 | writeBackLite(vertexNum, %s + (addrOffset >> 4), %sStream);\n\ 27 | ") 28 | #define READ_STR ("\n\ 29 | burst_raw %s_u512;\n\ 30 | read_from_stream(%sStream, %s_u512);\n\ 31 | ") 32 | 33 | #define COV_STR ("type_cov %s_tmp; \n\ 34 | %s_tmp.ui=%s_u512.range((i + 1) * INT_WIDTH - 1, i * INT_WIDTH );\n\ 35 | float %s=%s_tmp.f;") 36 | 37 | 38 | #define COV_STR_WRITE ("type_cov new%s_tmp;\n\ 39 | new%s_tmp.f =new%s;\n\ 40 | new%s_u512.range((i + 1) * INT_WIDTH - 1, i * INT_WIDTH ) = new%s_tmp.ui;") 41 | 42 | #define MAKFILE_STR ("\n\ 43 | BINARY_LINK_OBJS += --sp vertexApply_1.%s:DDR[%d]") 44 | 45 | 46 | #endif /* __CUSTOMIZE_STR_H__ */ 47 | -------------------------------------------------------------------------------- /automation/parser/kernel_interface.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "parser.h" 10 | #include "parser_debug.h" 11 | 12 | using namespace std; 13 | 14 | typedef struct 15 | { 16 | std::string name; 17 | arg_instance_t kernel_arg; 18 | } kernel_arg_instance_t; 19 | 20 | 21 | 22 | static std::vector kernel_arg_list; 23 | 24 | static kernel_arg_instance_t * get_kernel_arg(std::string arg) 25 | { 26 | for (int i = 0; i < kernel_arg_list.size(); i ++) 27 | { 28 | if (arg == kernel_arg_list[i].name) 29 | { 30 | return &kernel_arg_list[i]; 31 | } 32 | } 33 | return NULL; 34 | } 35 | 36 | int register_kernel_arg(arg_instance_t item) 37 | { 38 | if (get_kernel_arg(item.name)) 39 | { 40 | DEBUG_PRINTF(" error: %s redefine!\n", item.name.c_str()); 41 | return -1; 42 | } 43 | kernel_arg_instance_t new_arg; 44 | new_arg.name = item.name; 45 | new_arg.kernel_arg = item; 46 | DEBUG_PRINTF(" registered %s \n", item.name.c_str()); 47 | DEBUG_PRINTF(" object: %s \n", item.object.c_str()); 48 | kernel_arg_list.push_back(new_arg); 49 | return 0; 50 | } 51 | static int get_of_number(void) 52 | { 53 | if (kernel_arg_list.size() > 0) 54 | return OUTPUT_ATTR_MULTI; 55 | 56 | else 57 | return OUTPUT_ATTR_SINGLE; 58 | } 59 | static int output_to_file(std::ofstream * of, int ln, int fileid, int gn) 60 | { 61 | for (int i = 0; i < kernel_arg_list.size(); i ++) 62 | { 63 | if (kernel_arg_list[i].kernel_arg.ln == ln ) 64 | { 65 | return 1; 66 | } 67 | else if (kernel_arg_list[i].kernel_arg.ln == ln + 1 ) 68 | { 69 | std::string object = kernel_arg_list[i].kernel_arg.object; 70 | while (true) 71 | { 72 | bool ret = replace(object, "#%d#", to_string(fileid + 1)); 73 | if (ret == false) 74 | { 75 | break; 76 | } 77 | } 78 | ( *of) << object << std::endl; 79 | return 1; 80 | } 81 | } 82 | return 0; 83 | } 84 | 85 | output_method_t kernel_output_method = 86 | { 87 | .of_number = get_of_number, 88 | .output = output_to_file, 89 | }; 90 | 91 | 92 | static parser_item_t local_parser[] = 93 | { 94 | { 95 | .id = PRAGMA_ID_CU_DUPLICATE, 96 | .keyword = "MSLR_FUNCTION", 97 | .func = register_kernel_arg, 98 | }, 99 | }; 100 | 101 | reg_parser_item_t kernel_interface_parser = 102 | { 103 | .pointer = local_parser, 104 | .size = ARRAY_SIZE(local_parser), 105 | }; 106 | -------------------------------------------------------------------------------- /automation/parser/kernel_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef __KERNEL_INTERFACE_H__ 2 | #define __KERNEL_INTERFACE_H__ 3 | 4 | extern output_method_t kernel_output_method; 5 | 6 | extern reg_parser_item_t kernel_interface_parser; 7 | 8 | #endif /* __KERNEL_INTERFACE_H__ */ 9 | -------------------------------------------------------------------------------- /automation/parser/makefile.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "parser.h" 9 | #include "parser_debug.h" 10 | 11 | using namespace std; 12 | 13 | typedef struct 14 | { 15 | std::string name; 16 | arg_instance_t input; 17 | } mk_arg_instance_t; 18 | 19 | static std::vector mk_list; 20 | 21 | int register_makefile_instance(arg_instance_t item) 22 | { 23 | mk_arg_instance_t mk_item; 24 | mk_item.input = item; 25 | mk_list.push_back(mk_item); 26 | return 0; 27 | } 28 | 29 | static int get_of_number(void) 30 | { 31 | return OUTPUT_ATTR_SINGLE; 32 | } 33 | 34 | static int output_to_file(std::ofstream * of, int ln, int fileid, int gn) 35 | { 36 | for (int i = 0; i < mk_list.size(); i ++) 37 | { 38 | /* MSLR_INTERFACE_ARG */ 39 | if (mk_list[i].input.ln == ln ) 40 | { 41 | return 1; 42 | } 43 | else if (mk_list[i].input.ln == ln + 1 ) 44 | { 45 | for (int j = 0; j < gn ; j ++) 46 | { 47 | std::string object = mk_list[i].input.object; 48 | while (true) 49 | { 50 | bool ret = replace(object, "$(%d)", to_string(j)); 51 | if (ret == false) 52 | { 53 | break; 54 | } 55 | } 56 | ( *of) << object << std::endl; 57 | } 58 | return 1; 59 | } 60 | } 61 | return 0; 62 | } 63 | 64 | output_method_t makefile_output_method = 65 | { 66 | .of_number = get_of_number, 67 | .output = output_to_file, 68 | }; 69 | 70 | 71 | static parser_item_t local_parser[] = 72 | { 73 | { 74 | .id = MAKEFILE_MEM_INSTANCE, 75 | .keyword = "MAKEFILE_MEM_INSTANCE", 76 | .func = register_makefile_instance, 77 | } 78 | }; 79 | 80 | 81 | 82 | reg_parser_item_t makefile_interface_parser = 83 | { 84 | .pointer = local_parser, 85 | .size = ARRAY_SIZE(local_parser), 86 | }; 87 | -------------------------------------------------------------------------------- /automation/parser/makefile.h: -------------------------------------------------------------------------------- 1 | #ifndef __MAKEFILE_INTERFACE_H__ 2 | #define __MAKEFILE_INTERFACE_H__ 3 | 4 | extern output_method_t makefile_output_method; 5 | 6 | extern reg_parser_item_t makefile_interface_parser; 7 | 8 | #endif /* __MAKEFILE_INTERFACE_H__ */ 9 | -------------------------------------------------------------------------------- /automation/parser/mem_interface.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEM_INTERFACE_H__ 2 | #define __MEM_INTERFACE_H__ 3 | 4 | extern output_method_t mem_output_method; 5 | 6 | extern reg_parser_item_t mem_interface_parser; 7 | 8 | #endif /* __MEM_INTERFACE_H__ */ 9 | -------------------------------------------------------------------------------- /automation/parser_debug.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void logger (char *fmt, ...) 5 | { 6 | va_list argp; 7 | fprintf(stdout, "[PARSER] "); 8 | va_start(argp, fmt); 9 | vfprintf(stdout, fmt, argp); 10 | va_end(argp); 11 | } 12 | 13 | 14 | bool replace(std::string& str, const std::string& from, const std::string& to) { 15 | size_t start_pos = str.find(from); 16 | if (start_pos == std::string::npos) 17 | return false; 18 | str.replace(start_pos, from.length(), to); 19 | return true; 20 | } 21 | -------------------------------------------------------------------------------- /automation/parser_debug.h: -------------------------------------------------------------------------------- 1 | #ifndef __PARSER_DEBUG__ 2 | #define __PARSER_DEBUG__ 3 | 4 | #undef DEBUG_PRINTF 5 | #define DEBUG_PRINTF(fmt,...) logger((char *)fmt,##__VA_ARGS__); 6 | 7 | 8 | extern void logger (char *fmt, ...); 9 | 10 | 11 | #endif /* __PARSER_DEBUG__ */ 12 | -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- 1 | # Dataset and Data Format 2 | 3 | 4 | | Graphs | *```E```* | *```V```* | *```D_avg```* | Graph type | 5 | |--------|-----------|------------|-------------|------------ | 6 | | rmat-19-32 (R19) | 524.3K | 16.8M | 32 | Synthetic | 7 | | rmat-21-32 (R21) | 2.1M | 67.1M | 32 | Synthetic | 8 | | rmat-24-16 (R24) | 16.8M | 268.4M | 16 | Synthetic | 9 | | graph500-scale23-ef16 (G23) | 4.6M | 258.5M | 56 | Synthetic | 10 | | graph500-scale24-ef16 (G24) | 8.9M | 520.5M | 59 | Synthetic | 11 | | graph500-scale25-ef16 (G25) | 17.0M | 1.0B | 61 | Synthetic | 12 | | wiki-talk (WT) | 2.4M | 5.0M | 2 | Communication | 13 | | web-google (GG) | 916.4K | 5.1M | 6 | Web | 14 | | amazon-2008 (AM) | 735.3K | 5.2M | 7 | Social | 15 | | bio-mouse-gene (MG) | 45.1K | 14.5M | 322 | biological | 16 | | web-hudong (HD) | 2.0M | 14.9M | 7 | Web | 17 | | soc-flickr-und (FU) | 1.7M | 15.6M | 9 | Social | 18 | | web-baidu-baike (BB) | 2.1M | 17.8M | 8 | Web | 19 | | wiki-topcats (TC) | 1.8M | 28.5M | 16 | Web | 20 | | pokec-relationships (PK) | 1.6M | 30.6M | 19 | Social | 21 | | wikipedia-20070206 (WP) | 3.6M | 45.0M | 13 | Web | 22 | | ca-hollywood-2009 (HW) | 1.1M | 56.3M | 53 | Social | 23 | | liveJournal1 (LJ) | 4.8M | 69.0M | 14 | Social | 24 | | soc-twitter (TW) | 21.3M | 265.0M | 12 | Social | 25 | 26 | 27 | The recursive graph (rmat) datasets are generated by the script located at ```dataset```, and the format is edge list which is shown below. 28 | 29 | **Edge list file format:** 30 | ``` 31 | 32 | ``` 33 | 34 | The property of vertices and the property of edges are __32-bit__ value (can be integer of float type). During the entire processing flow the id of vertices is unsigned __32-bit__ integer. 35 | -------------------------------------------------------------------------------- /dataset/kronecker_generator.m: -------------------------------------------------------------------------------- 1 | function ijw = kronecker_generator (SCALE, edgefactor) 2 | %% Generate an edgelist according to the Graph500 parameters. In this 3 | %% sample, the edge list is returned in an array with three rows, 4 | %% where StartVertex is first row, EndVertex is the second row, and 5 | %% Weight is the third row. The vertex labels start at zero. 6 | %% 7 | %% Example, creating a sparse matrix for viewing: 8 | %% ijw = kronecker_generator (10, 16); 9 | %% G = sparse (ijw(1,:)+1, ijw(2,:)+1, ones (1, size (ijw, 2))); 10 | %% spy (G); 11 | %% The spy plot should appear fairly dense. Any locality 12 | %% is removed by the final permutations. 13 | 14 | %% Set number of vertices. 15 | N = 2^SCALE; 16 | 17 | %% Set number of edges. 18 | M = edgefactor * N; 19 | 20 | %% Set initiator probabilities. 21 | [A, B, C] = deal (0.57, 0.19, 0.19); 22 | 23 | %% Create index arrays. 24 | ijw = ones (3, M); 25 | %% Loop over each order of bit. 26 | ab = A + B; 27 | c_norm = C/(1 - (A + B)); 28 | a_norm = A/(A + B); 29 | 30 | for ib = 1:SCALE, 31 | %% Compare with probabilities and set bits of indices. 32 | ii_bit = rand (1, M) > ab; 33 | jj_bit = rand (1, M) > ( c_norm * ii_bit + a_norm * not (ii_bit) ); 34 | ijw(1:2,:) = ijw(1:2,:) + 2^(ib-1) * [ii_bit; jj_bit]; 35 | end 36 | 37 | %% Generate weights 38 | ijw(3,:) = unifrnd(0, 1, [1,M]); 39 | 40 | %% Permute vertex labels 41 | p = randperm (N); 42 | ijw(1:2,:) = p(ijw(1:2,:)); 43 | 44 | %% Permute the edge list 45 | p = randperm (M); 46 | ijw = ijw(:, p); 47 | 48 | %% Adjust to zero-based labels. 49 | ijw(1:2,:) = ijw(1:2,:) - 1; 50 | 51 | endfunction 52 | -------------------------------------------------------------------------------- /dataset/rmat.m: -------------------------------------------------------------------------------- 1 | clear all 2 | scale = 19; 3 | edge_factor = 32; 4 | rand ("seed", 103); 5 | 6 | ijw = kronecker_generator (scale, edge_factor); 7 | 8 | G = sparse(ijw(1,:) + 1, ijw(2,:) + 1, ones (1, size (ijw, 2))); 9 | 10 | [ei,ej] = find(G>0); 11 | 12 | dim = size(ei); 13 | edge_num = dim(1); 14 | 15 | filename = sprintf("rmat-%d-%d.txt",scale,edge_factor); 16 | fp=fopen(filename,'w'); 17 | 18 | for i = 1 : edge_num 19 | fprintf(fp,"%d %d \n",ei(i) , ej(i)); 20 | end 21 | 22 | fclose(fp) 23 | -------------------------------------------------------------------------------- /docs/compile_arch.md: -------------------------------------------------------------------------------- 1 | # Compiling ThunderGP 2 | This page provides details of quickly deploying build-in graph analytic algorithms of ThunderGP. 3 | The compilation of ThunderGP is managed by [GNU make](https://www.gnu.org/software/make/manual/html_node/Introduction.html). 4 | 5 | ## Compilation Arguments 6 | 7 | Currently, ThunderGP supports four graph analytic applications, namely PR, SpMV, BFS and SSSP. The wanted application can be implemented by passing argument app=[the wanted application] to the make command. The below table are details of this argument. 8 | 9 | | Argument | Accelerated algorithm | 10 | |--------------|--------------| 11 | | ```app=pr``` | PageRank | 12 | | ```app=spmv``` | Sparse matrix-vector multiplication (SpMV) | 13 | | ```app=bfs``` | Breadth first search | 14 | | ```app=sssp``` | Single Source Shortest Path | 15 | 16 | Other arguments the developers may use. 17 | 18 | | Argument | Description | 19 | |--------------|--------------| 20 | | ```all``` | Compile host + cccelerator programs, it is time-costly (10+ hours) | 21 | | ```exe``` | Compile host program only, it is very fast | 22 | | ```cleanexe``` | Clean the host program | 23 | | ```clean``` | Clean the accelerator program | 24 | | ```cleanall``` | Clean all of the host + accelerator programs | 25 | 26 | 27 | For example, if you want to implement the PageRank, the command is: 28 | 29 | ```sh 30 | $ make app=pr all 31 | ``` 32 | 33 | ## Makefile Tree 34 | 35 | The Makefile file structure is shown in the bellowing tree: 36 | 37 | ```sh 38 | ├── application 39 | │   ├── common.mk # general configuration for both host and accelerator programs 40 | │   ├── gs_kernel.mk # accelerator configuration for gather-scatter kernel 41 | │   ├── pr # application specific folder 42 | │   │   ├── apply_kernel.mk # accelerator configuration for apply kernel 43 | │   │   ├── build.mk # application specific build configuration 44 | │   │   ├── config.mk # application specific design parameters 45 | │   └── ... 46 | ├── docs 47 | ├── libfpga 48 | ├── libgraph 49 | ├── Makefile # Main entrance for make 50 | ├── README.md 51 | ├── utils 52 | │   ├── help.mk # Help information 53 | │   ├── opencl.mk # OpenCL library 54 | │   └── utils.mk # Misc. 55 | └── xcl 56 | └── xcl.mk # Xilinx OpenCL library 57 | ``` 58 | 59 | ## Fast Debugging Compilation 60 | 61 | Compiling the entire accelerator can be very time-costly (*__14+ hours__*), and it is very unfriendly for iteratively debugging. ThunderGP provides a simplified and fast mode for compilation by only utilizing one SLR, it also means that in this mode, only have one __Scatter-Gather CU__ and the compilation time can be significantly reduced to about *__4 hours__*. 62 | 63 | To enable fast compilation for debugging, you need to change the ```HAVE_FULL_SLR``` to false in the ```build.mk``` located at the application-specific directory, then start the build. The host program do not need any manual change, as it will automatically adapt partitioning according to this configuration. 64 | 65 | 66 | ## Waveform-based Debugging 67 | 68 | 69 | Xilinx provide real-time waveform display in __hw_emu__ mode, but it need many setting-up steps in command-line. ThunderGP simplified the details to use this feature. 70 | 71 | * Change the ```TARGETS``` to ```hw_emu``` in main Makefile, or directly pass this argument from the make command. 72 | * After the application has been built, run ```make hwemuprepare``` 73 | * Run env setup script:```source ./utils/hw_emu.sh ``` 74 | * Start the program 75 | 76 | __Notes__: 77 | We found that the waveform debugging in SDAccel 2019.2 have one problem: all of the stream interfaces are not displayed correctly. Therefore we only use SDAccel 2018.3 to perform the waveform-based debugging. 78 | 79 | 80 | 81 | ## Timing Problems 82 | 83 | As ThunderGP has a very high resources utilization, the timing issues can be very difficult to be fixed from high-level-synthesis code. our framework used the stream slicing technique and multi-level data duplication technique to achieve a higher frequency(__250MHz__). the details can be found in our paper. However, there are still many randomized factors in the placement and routine stage which can not be controlled by our HLS code. From our experience, we always start __3__ same compilation, which could result with a better frequency finally. 84 | -------------------------------------------------------------------------------- /docs/images/GAS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/GAS.png -------------------------------------------------------------------------------- /docs/images/GASmodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/GASmodel.png -------------------------------------------------------------------------------- /docs/images/SPMV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/SPMV.png -------------------------------------------------------------------------------- /docs/images/ThunderGP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/ThunderGP.png -------------------------------------------------------------------------------- /docs/images/automation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/automation.png -------------------------------------------------------------------------------- /docs/images/dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/dataset.png -------------------------------------------------------------------------------- /docs/images/l2_dataflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/l2_dataflow.png -------------------------------------------------------------------------------- /docs/images/mem_hir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/mem_hir.png -------------------------------------------------------------------------------- /docs/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/overview.png -------------------------------------------------------------------------------- /docs/images/sche0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/sche0.png -------------------------------------------------------------------------------- /docs/images/sche1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/sche1.png -------------------------------------------------------------------------------- /docs/images/scheduling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling.png -------------------------------------------------------------------------------- /docs/images/scheduling0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling0.png -------------------------------------------------------------------------------- /docs/images/scheduling1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling1.png -------------------------------------------------------------------------------- /docs/images/scheduling2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling2.png -------------------------------------------------------------------------------- /docs/images/scheduling3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling3.png -------------------------------------------------------------------------------- /docs/images/scheduling4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling4.png -------------------------------------------------------------------------------- /docs/images/scheduling5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xtra-Computing/ThunderGP/c316a34d241f0ea2b619cbf6b52db9e8aed3662b/docs/images/scheduling5.png -------------------------------------------------------------------------------- /docs/memory.md: -------------------------------------------------------------------------------- 1 | # Memory Management 2 | In this section, we demonstrate how to manage the memory in host side with __L3__ level APIs provided by ThunderGP. 3 | 4 | 5 | Currently, Xilinx Multiple-SLRs FPGAs have many independent DRAM banks, and many graph algorithms manipulate at least five types of data. Managing the data and the partitions among the banks using the traditional OpenCL APIs need a lot of redundant code for configuration, which makes the code hard to maintaining. Therefore we developed an unique ID based memory management module(**he_mem**), and basing on this module, a hierarchy of data structures is constructed: 6 | 7 | drawing 8 | 9 | 10 | All the compuations and operations are heirarchically based on these object, which makes it easy to understand the relationship among the objects. There are two classes of objects in ThunderGP, we call them kernel descriptor and partition descriptor. 11 | 12 | ## Partition Descriptor 13 | Partition descriptor maintains all the partitioned data among the available DRAM banks, As illustrated in the above figure: 14 | 15 | 16 | * __partitionDescriptor__: the **purple** blocks are ```partitionDescriptor``` . There are *M* ```partitionDescriptor``` during the processing, where the *M* is determined by the size of graph dataset dynamically. Following figure shows how does ThunderGP perform the partitioning on the graph, each partitions hold edges in a fixed range of destination. 17 | 18 | 19 | drawing 20 | 21 | * __subPartitionDescriptor__: ```partitionDescriptor``` consist of *N* ```subPartitionDescriptor``` (the **grey** blocks), the *N* is determined by how many CUs are available in the FPGA side. The ```subPartitionDescriptor``` structure consists of three types of partitioned data of edge: the destination of edges, the source of edges and the property of edges. Each type of data is contained in ```he_mem``` elements (the **yellow** blocks). Following figure shows how does ThunderGP perform the sub-partitioning on the partitions, the edges of each partition is equally divided into *N* parts, here the *N* is 4. 22 | 23 | drawing 24 | 25 | 26 | ## Kernel Descriptor 27 | 28 | There are two types of kernel descriptor: the ```gatherScatterDescriptor``` and ```applyDescriptor```. Showing in the heirarchy figure, the **blue** blocks are the kernel descriptor, both of these two types of descriptors have an instance of OpenCL kernel (the **green** blocks). The property of vertices are also included in ```gatherScatterDescriptor```. 29 | 30 | 31 | ## Appendix 32 | 33 | API list for ```he_mem``` 34 | 35 | | Function | Description | 36 | |-------------|--------------| 37 | | ``` int he_mem_init() ```| initialize the ```he_mem``` structure | 38 | | ``` cl_mem* get_cl_mem_pointer() ```| get the OpenCL ```cl_mem``` instance in the specific ```he_mem``` | 39 | | ``` void* get_host_mem_pointer() ```| get the host memory pointer in the specific ```he_mem```| 40 | | ``` he_mem_t* get_he_mem() ```| get the instance of ```he_mem``` by the unique ID| 41 | | ``` int transfer_data_from_pl() ```| transfer the data from FPGA to CPU | 42 | | ``` int transfer_data_to_pl() ```| transfer the data from CPU to FPGA | 43 | -------------------------------------------------------------------------------- /docs/scheduling.md: -------------------------------------------------------------------------------- 1 | # Scheduling across Multi-SLRs 2 | 3 | While modern multi-SLRs based FPGAs provide high memory bandwidth and large hardware resources, utilizing it is not straightforward since the higher bandwidth comes from multiple independent memory channels, and it can make obtaining an efficient mapping of the kernels with high data transmission between the SLRs difficult. 4 | 5 | This poses two challenges of deploying efficient graph processing framework to modern FPGAs with multiple SLRs: 6 | 7 | * allocating multiple kernels into multiple SLRs to fully utilize the FPGA resources; 8 | * scheduling graph partitions through multiple memory channels to fully utilize the memory bandwidth; 9 | 10 | 11 | For the first problem, you can find more details in our paper(under review), and in this section we will focus on the second problem (__scheduling__). 12 | 13 | 14 | ## Unbalanced Workloads 15 | ![normal](images/sche0.png) 16 | 17 | This figure shows a processing routine with __naive__ scheduling, the sub-partitions are arranged in sequence (the 1st sub-partitions goes to 1st CU), and the partitions are processed in order. From our observation, this naive arrangement have significantly bad influence on the performance because the __unbalanced__ partitions, following figure shows an __unbalanced__ case. 18 | 19 | 20 | drawing 21 | 22 | ## Scheduling in ThunderGP 23 | 24 | To solve this unbalancing problem, ThunderGP schedules the sub-partitions in the following step: 25 | 26 | * Calculate the estimated execution time of scheduled sub-partitions in each CUs. 27 | * Calculate the estimated execution time of sub-partitions of the coming partition. 28 | * The determination of the sub-partition order can convert to an optimization problem which the goal is to minimize the difference of the total estimated execution time of each CUs in the given order. 29 | 30 | in here ThunderGP also build a performance estimator: 31 | 32 | * Assuming the execution time is a function like that: 33 | * ***T = f(V, E)*** 34 | * ***T***: execution time of one sub-partition 35 | * ***V***: the number of vertices in this sub-partition 36 | * ***E***: the number of edges in this sub-partition 37 | 38 | * Sample some sub-partitions from the dataset, and get the real execution time in single compute unit. 39 | * Polynomial model is choosed to represent this model, we start fit the model parameters from high order polynomial, and iterativly decrease the order to find the best fitting and estimation. 40 | 41 | The result shows that this scheduling scheme have significant improvement on real graph dataset, comparing with the unbalanced order. For Twitter-SoC dataset it have near **55%** throughput improvement and **21%** for Livejournal1. 42 | 43 | 44 | ## Customization 45 | 46 | As the scheduling problem is well studied in system communities and there is a huge solution space, ThunderGP also provides interface for users to customize the scheduling method for further exploration. To write a new scheduler: 47 | 48 | ![normal](images/sche1.png) 49 | 50 | * Firstly, user need to create a folder in the ```libgraph/scheduler```. The name of the folder is also the name of this customized scheduler (e.g. *__normal__*). 51 | 52 | * Secondly, in this folder, ```scheduler.cpp``` is needed to write the customized code. The two types of arrangement(shown in the above diagram) need to be instanced in a scheduler. 53 | 54 | * **```subPartitionArrangement```**: ① in the figure, orchestrates the sub-partitions in a specific order. 55 | 56 | * **```partitionArrangement```**: ② in the figure, orchestrates the partitions in a user-defined order. 57 | 58 | 59 | * When this two orchestrations are finished, it need to be integrated into ThunderGP. Taking the existing scheduler named *__normal__* as an example: ( ```libgraph/scheduler/normal/scheduler.cpp```) 60 | ```c 61 | static graphStaticScheduler dut = { 62 | .init = normalInitHanlder, 63 | .subPartionScheduler = normalSubPartitionArrangementHandler, //subPartitionArrangement 64 | .partitionScheduler = normalSchedulerPartitionArrangement, //partitionArrangement 65 | }; 66 | 67 | int schedulerRegister(void) 68 | { 69 | return registerScheduler(&dut); 70 | } 71 | 72 | 73 | ``` 74 | 75 | A ```graphStaticScheduler``` object need to be constructed, and then user needs to provide an entry for ThunderGP to execute, the **```schedulerRegister```**, in this function ```registerScheduler``` need to be called for register the ```dut``` which is the ```graphStaticScheduler``` object constructed earlier. 76 | 77 | * Finally, to run the application with this customized scheduler, the ```SCHEDULER``` variable in the ```build.mk``` which is located at the application-specific path need to be modified as the name of the folder which is described in the first step. 78 | 79 | ```shell 80 | HAVE_FULL_SLR=true 81 | 82 | #scatter-gather kernel 83 | HAVE_VERTEX_ACTIVE_BIT=true 84 | HAVE_EDGE_PROP=false 85 | HAVE_UNSIGNED_PROP=true 86 | 87 | #apply kernel 88 | HAVE_APPLY=true 89 | CUSTOMIZE_APPLY=false 90 | HAVE_APPLY_OUTDEG=false 91 | 92 | #scheduler 93 | SCHEDULER=normal # We modified this parameter from secondOrderEstimator to normal 94 | 95 | ``` 96 | -------------------------------------------------------------------------------- /docs/verification.md: -------------------------------------------------------------------------------- 1 | # Verification 2 | 3 | As we have a cache in ThunderGP, the static verification method (emu-sw) can not be used for the verification and validation of our framework. ThunderGP provides an automatic verification scheme, by calling the function ```acceleratorProfile``` after the calcuation, the verification will be automatically performed, and it will output the difference between the result from hardware unit and the golden. Currently it only supports the verification on the fisrt super step. -------------------------------------------------------------------------------- /libfpga/common_template/apply_kernel.mk: -------------------------------------------------------------------------------- 1 | ifeq ($(strip $(HAVE_APPLY)), $(strip $(VAR_TRUE))) 2 | $(XCLBIN)/vertexApply.$(TARGET).$(DSA).xo: $(APPLY_KERNEL_PATH)/apply_top_1.cpp 3 | mkdir -p $(XCLBIN) 4 | $(XOCC) $(CLFLAGS) -c -k vertexApply -I'$( 2 | #include "graph_fpga.h" 3 | 4 | 5 | #include "fpga_global_mem.h" 6 | #include "fpga_apply.h" 7 | 8 | 9 | 10 | extern "C" { 11 | void vertexApply( 12 | uint16 *vertexProp, 13 | #pragma THUNDERGP MSLR_INTERFACE_ARG tmpVertexProp 14 | uint16 *tmpVertexProp#%d#, 15 | #pragma THUNDERGP MSLR_INTERFACE_ARG newVertexProp 16 | uint16 *newVertexProp#%d#, 17 | #if HAVE_APPLY_OUTDEG 18 | uint16 *outDegree, 19 | #endif 20 | int *outReg, 21 | unsigned int vertexNum, 22 | unsigned int addrOffset, 23 | unsigned int argReg 24 | ) 25 | { 26 | 27 | 28 | #pragma THUNDERGP MSLR_INTERFACE_ATTR tmpVertexProp 29 | 30 | #pragma THUNDERGP MSLR_INTERFACE_ATTR newVertexProp 31 | 32 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE tmpVertexProp 33 | #pragma HLS INTERFACE m_axi port=tmpVertexProp#%d# offset=slave bundle=gmem#%d# max_read_burst_length=64 num_write_outstanding=4 34 | 35 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE tmpVertexProp 36 | #pragma HLS INTERFACE s_axilite port=tmpVertexProp#%d# bundle=control 37 | 38 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE newVertexProp 39 | #pragma HLS INTERFACE m_axi port=newVertexProp#%d# offset=slave bundle=gmem#%d# max_read_burst_length=64 num_write_outstanding=4 40 | 41 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE newVertexProp 42 | #pragma HLS INTERFACE s_axilite port=newVertexProp#%d# bundle=control 43 | 44 | 45 | #pragma HLS INTERFACE m_axi port=outReg offset=slave bundle=gmem5 46 | #pragma HLS INTERFACE s_axilite port=outReg bundle=control 47 | 48 | 49 | #pragma HLS INTERFACE m_axi port=vertexProp offset=slave bundle=gmem6 max_read_burst_length=64 50 | #pragma HLS INTERFACE s_axilite port=vertexProp bundle=control 51 | 52 | #if HAVE_APPLY_OUTDEG 53 | 54 | #pragma HLS INTERFACE m_axi port=outDegree offset=slave bundle=gmem7 max_read_burst_length=64 55 | #pragma HLS INTERFACE s_axilite port=outDegree bundle=control 56 | 57 | hls::stream outDegreeStream; 58 | #pragma HLS stream variable=outDegreeStream depth=256 59 | burstReadLite(addrOffset, vertexNum, outDegree, outDegreeStream); 60 | 61 | #endif 62 | 63 | #pragma HLS INTERFACE s_axilite port=vertexNum bundle=control 64 | #pragma HLS INTERFACE s_axilite port=argReg bundle=control 65 | #pragma HLS INTERFACE s_axilite port=addrOffset bundle=control 66 | #pragma HLS INTERFACE s_axilite port=return bundle=control 67 | 68 | #pragma HLS DATAFLOW 69 | 70 | hls::stream vertexPropStream; 71 | #pragma HLS stream variable=vertexPropStream depth=128 72 | 73 | hls::stream tmpVertexPropArray[SUB_PARTITION_NUM]; 74 | #pragma HLS stream variable=tmpVertexPropArray depth=2 75 | 76 | 77 | hls::stream tmpVertexPropStream[SUB_PARTITION_NUM + 1]; 78 | #pragma HLS stream variable=tmpVertexPropStream depth=2 79 | 80 | 81 | 82 | hls::stream newVertexPropStream; 83 | #pragma HLS stream variable=newVertexPropStream depth=16 84 | 85 | hls::stream newVertexPropArray[SUB_PARTITION_NUM]; 86 | #pragma HLS stream variable=newVertexPropArray depth=2 87 | 88 | int loopNum = (vertexNum >> 4) ; 89 | 90 | 91 | 92 | burstReadLite(addrOffset, vertexNum, vertexProp, vertexPropStream); 93 | 94 | 95 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE tmpVertexProp 96 | burstReadLite(0, vertexNum, tmpVertexProp#%d#, tmpVertexPropArray[#%d#]); 97 | 98 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 99 | { 100 | #pragma HLS UNROLL 101 | cuMerge(loopNum, tmpVertexPropArray[i], tmpVertexPropStream[i], tmpVertexPropStream[i + 1]); 102 | } 103 | 104 | applyFunction( 105 | loopNum, 106 | #if HAVE_APPLY_OUTDEG 107 | outDegreeStream, 108 | #endif 109 | vertexPropStream, 110 | tmpVertexPropStream[SUB_PARTITION_NUM], 111 | argReg, 112 | newVertexPropStream, 113 | outReg 114 | ); 115 | 116 | cuDuplicate(loopNum , newVertexPropStream, 117 | newVertexPropArray); 118 | 119 | 120 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE newVertexProp 121 | writeBackLite(vertexNum, newVertexProp#%d# + (addrOffset >> 4), newVertexPropArray[#%d#]); 122 | 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /libfpga/common_template/scatter_gather_top.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "graph_fpga.h" 4 | 5 | #include "fpga_global_mem.h" 6 | #include "fpga_slice.h" 7 | #include "fpga_gather.h" 8 | #include "fpga_filter.h" 9 | #include "fpga_process_edge.h" 10 | #include "fpga_cache.h" 11 | #include "fpga_edge_prop.h" 12 | 13 | 14 | 15 | 16 | extern "C" { 17 | #pragma THUNDERGP MSLR_FUNCTION 18 | void readEdgesCU#%d#( 19 | uint16 *edgesHeadArray, 20 | uint16 *vertexPushinProp, 21 | uint16 *edgesTailArray, 22 | uint16 *tmpVertexProp, 23 | #if HAVE_EDGE_PROP 24 | uint16 *edgeProp, 25 | #endif 26 | int edge_end, 27 | int sink_offset, 28 | int sink_end 29 | ) 30 | { 31 | #include "fpga_gs_top.h" 32 | } 33 | 34 | } // extern C 35 | 36 | -------------------------------------------------------------------------------- /libfpga/customize_template/customize_apply_cl_kernel.h: -------------------------------------------------------------------------------- 1 | 2 | scalar_t global; 3 | 4 | void setApplyKernel(int partId, int superStep, graphInfo *info) 5 | { 6 | int currentPropId = superStep % 2; 7 | int updatePropId = (superStep + 1) % 2; 8 | 9 | applyDescriptor * applyHandler = getApply(); 10 | int argvi = 0; 11 | subPartitionDescriptor *p_partition = getSubPartition(partId * SUB_PARTITION_NUM); 12 | 13 | volatile unsigned int partitionVertexNum = ((p_partition->dstVertexEnd - p_partition->dstVertexStart) 14 | / (ALIGN_SIZE ) + 1) * (ALIGN_SIZE ); 15 | int sink_end = partitionVertexNum; 16 | int offset = p_partition->dstVertexStart; 17 | 18 | 19 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), 20 | get_cl_mem_pointer(getGatherScatter(getCuIDbyInterface(DEFAULT_KERNEL_ID))->prop[currentPropId].id)); 21 | 22 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 23 | { 24 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), 25 | get_cl_mem_pointer(getSubPartition(partId * SUB_PARTITION_NUM + (i))->tmpProp.id) 26 | ); 27 | } 28 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 29 | { 30 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), 31 | get_cl_mem_pointer(getGatherScatter((i))->prop[updatePropId].id) 32 | ); 33 | he_set_dirty(getGatherScatter((i))->prop[updatePropId].id); 34 | } 35 | 36 | #pragma THUNDERGP USER_APPLY_CL_KERNEL 37 | // mark for auto generation 38 | 39 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(int), &sink_end); 40 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(int), &offset); 41 | 42 | } -------------------------------------------------------------------------------- /libfpga/customize_template/customize_apply_kernel.mk: -------------------------------------------------------------------------------- 1 | ifeq ($(strip $(HAVE_APPLY)), $(strip $(VAR_TRUE))) 2 | $(XCLBIN)/vertexApply.$(TARGET).$(DSA).xo: $(APPLY_KERNEL_PATH)/customize_apply_top_1.cpp 3 | mkdir -p $(XCLBIN) 4 | $(XOCC) $(CLFLAGS) -c -k vertexApply -I'$( 2 | #include "graph_fpga.h" 3 | 4 | 5 | #include "fpga_global_mem.h" 6 | #include "fpga_apply.h" 7 | 8 | union type_cov { 9 | float f; 10 | prop_t i; 11 | unsigned int ui; 12 | }; 13 | 14 | extern "C" { 15 | void vertexApply( 16 | uint16 *vertexProp, 17 | #pragma THUNDERGP MSLR_INTERFACE_ARG tmpVertexProp 18 | uint16 *tmpVertexProp#%d#, 19 | #pragma THUNDERGP MSLR_INTERFACE_ARG newVertexProp 20 | uint16 *newVertexProp#%d#, 21 | #pragma THUNDERGP USER_APPLY_ARG 22 | //mark for auto generation 23 | unsigned int vertexNum, 24 | unsigned int addrOffset 25 | ) 26 | { 27 | #pragma HLS DATAFLOW 28 | 29 | #pragma THUNDERGP MSLR_INTERFACE_ATTR tmpVertexProp 30 | 31 | #pragma THUNDERGP MSLR_INTERFACE_ATTR newVertexProp 32 | 33 | 34 | #pragma HLS INTERFACE m_axi port=vertexProp offset=slave bundle=gmem5 max_read_burst_length=64 35 | #pragma HLS INTERFACE s_axilite port=vertexProp bundle=control 36 | 37 | hls::stream vertexPropStream; 38 | #pragma HLS stream variable=vertexPropStream depth=128 39 | burstReadLite(addrOffset, vertexNum, vertexProp, vertexPropStream); 40 | 41 | 42 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE tmpVertexProp 43 | #pragma HLS INTERFACE m_axi port=tmpVertexProp#%d# offset=slave bundle=gmem#%d# max_read_burst_length=64 num_write_outstanding=4 44 | 45 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE tmpVertexProp 46 | #pragma HLS INTERFACE s_axilite port=tmpVertexProp#%d# bundle=control 47 | 48 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE newVertexProp 49 | #pragma HLS INTERFACE m_axi port=newVertexProp#%d# offset=slave bundle=gmem#%d# max_read_burst_length=64 num_write_outstanding=4 50 | 51 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE newVertexProp 52 | #pragma HLS INTERFACE s_axilite port=newVertexProp#%d# bundle=control 53 | 54 | #pragma THUNDERGP USER_APPLY_STREAM_ATTR 55 | //mark for auto generation 56 | 57 | #pragma THUNDERGP USER_APPLY_SCALAR_ATTR 58 | //mark for auto generation 59 | 60 | #pragma HLS INTERFACE s_axilite port=vertexNum bundle=control 61 | #pragma HLS INTERFACE s_axilite port=addrOffset bundle=control 62 | #pragma HLS INTERFACE s_axilite port=return bundle=control 63 | 64 | /* tmpVertexProp start*/ 65 | hls::stream tmpVertexPropArray[SUB_PARTITION_NUM]; 66 | #pragma HLS stream variable=tmpVertexPropArray depth=2 67 | 68 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE tmpVertexProp 69 | burstReadLite(0, vertexNum, tmpVertexProp#%d#, tmpVertexPropArray[#%d#]); 70 | 71 | hls::stream tmpVertexPropStream[SUB_PARTITION_NUM + 1]; 72 | #pragma HLS stream variable=tmpVertexPropStream depth=2 73 | 74 | int loopNum = (vertexNum >> 4) ; 75 | 76 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 77 | { 78 | #pragma HLS UNROLL 79 | cuMerge(loopNum, tmpVertexPropArray[i], tmpVertexPropStream[i], tmpVertexPropStream[i + 1]); 80 | } 81 | /* tmpVertexProp end */ 82 | 83 | hls::stream newVertexPropStream; 84 | #pragma HLS stream variable=newVertexPropStream depth=16 85 | 86 | 87 | 88 | 89 | for (int loopCount = 0; loopCount < loopNum; loopCount ++) 90 | { 91 | 92 | #pragma HLS PIPELINE II=1 93 | /* input */ 94 | burst_raw vertexProp; 95 | burst_raw tmpVertexProp; 96 | 97 | read_from_stream(vertexPropStream, vertexProp); 98 | read_from_stream(tmpVertexPropStream[SUB_PARTITION_NUM], tmpVertexProp); 99 | 100 | #pragma THUNDERGP USER_APPLY_READ_FROM_STREAM 101 | //mark for auto generation 102 | 103 | /* output */ 104 | burst_raw newVertexProp; 105 | 106 | core_loop: for (int i = 0; i < BURST_ALL_BITS / INT_WIDTH; i++) 107 | { 108 | #pragma HLS UNROLL 109 | prop_t wProp; 110 | prop_t tProp = tmpVertexProp.range((i + 1) * INT_WIDTH - 1, i * INT_WIDTH ); 111 | prop_t uProp = vertexProp.range( (i + 1) * INT_WIDTH - 1, i * INT_WIDTH ); 112 | 113 | #pragma THUNDERGP USER_APPLY_COV_FOR_CAL 114 | //mark for auto generation 115 | 116 | #pragma THUNDERGP USER_APPLY_CAL 117 | //mark for auto generation 118 | 119 | /* 120 | prop_t wProp = applyFunc( tProp, uProp, out_deg, tmpInfoArray[i], argReg); 121 | */ 122 | 123 | #pragma THUNDERGP USER_APPLY_COV_FOR_WRITE 124 | //mark for auto generation 125 | newVertexProp.range((i + 1) * INT_WIDTH - 1, i * INT_WIDTH ) = wProp; 126 | 127 | } 128 | #pragma THUNDERGP USER_APPLY_WRITE_TO_STREAM 129 | //mark for auto generation 130 | write_to_stream(newVertexPropStream, newVertexProp); 131 | } 132 | 133 | 134 | hls::stream newVertexPropArray[SUB_PARTITION_NUM]; 135 | #pragma HLS stream variable=newVertexPropArray depth=2 136 | 137 | 138 | cuDuplicate(loopNum , newVertexPropStream, 139 | newVertexPropArray); 140 | 141 | #pragma THUNDERGP MSLR_INTERFACE_INSTANCE newVertexProp 142 | writeBackLite(vertexNum, newVertexProp#%d# + (addrOffset >> 4), newVertexPropArray[#%d#]); 143 | 144 | #pragma THUNDERGP USER_APPLY_WRITE 145 | //mark for auto generation 146 | 147 | } 148 | 149 | } 150 | -------------------------------------------------------------------------------- /libfpga/customize_template/customize_mem.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUSTOMIZE_MEM_H__ 2 | #define __CUSTOMIZE_MEM_H__ 3 | 4 | 5 | typedef struct 6 | { 7 | #pragma THUNDERGP DUMP_MEM_SCALAR 8 | // mark for auto generation 9 | } scalar_t; 10 | 11 | extern scalar_t global; 12 | 13 | #pragma THUNDERGP DUMP_MEM_ATTR 14 | // mark for auto generation 15 | 16 | #endif /* __CUSTOMIZE_MEM_H__ */ -------------------------------------------------------------------------------- /libfpga/fpga_application.h: -------------------------------------------------------------------------------- 1 | #ifndef __FPGA_APPLICATION_H__ 2 | #define __FPGA_APPLICATION_H__ 3 | 4 | 5 | 6 | #include "l2.h" 7 | 8 | 9 | #ifndef IS_ACTIVE_VERTEX 10 | #define IS_ACTIVE_VERTEX(a) (1) 11 | #endif 12 | 13 | /* source vertex property process */ 14 | #define PROP_COMPUTE_STAGE0(srcProp) preprocessProperty(srcProp) 15 | 16 | /* source vertex property & edge property */ 17 | #define PROP_COMPUTE_STAGE1(srcProp, edgeProp) scatterFunc(srcProp,edgeProp) 18 | 19 | /* destination property update in RAW solver */ 20 | #define PROP_COMPUTE_STAGE2(ori, update) gatherFunc(ori ,update) 21 | 22 | /* destination property update dst buffer update */ 23 | #define PROP_COMPUTE_STAGE3(ori,update) gatherFunc(ori, update) 24 | 25 | /* destination property merge */ 26 | #define PROP_COMPUTE_STAGE4(ori,update) gatherFunc(ori, update) 27 | 28 | 29 | #endif /* __FPGA_APPLICATION_H__ */ 30 | -------------------------------------------------------------------------------- /libfpga/fpga_apply.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "graph_fpga.h" 4 | 5 | 6 | #include "fpga_global_mem.h" 7 | 8 | 9 | template 10 | void cuDuplicate ( int loopNum, 11 | hls::stream &input, 12 | hls::stream (&output)[SUB_PARTITION_NUM]) 13 | { 14 | #pragma HLS function_instantiate variable=input 15 | for (int i = 0; i < loopNum ; i++) 16 | { 17 | #pragma HLS PIPELINE II=1 18 | T unit; 19 | read_from_stream(input, unit); 20 | for (int j = 0; j < SUB_PARTITION_NUM; j ++) 21 | { 22 | #pragma HLS UNROLL 23 | write_to_stream(output[j], unit); 24 | } 25 | } 26 | } 27 | 28 | 29 | 30 | template 31 | void cuMerge ( int loopNum, 32 | hls::stream &input_a, 33 | hls::stream &input_b, 34 | hls::stream &output) 35 | { 36 | #pragma HLS function_instantiate variable=input_a 37 | for (int i = 0; i < loopNum ; i++) 38 | { 39 | #pragma HLS PIPELINE II=1 40 | T unit[2]; 41 | #pragma HLS ARRAY_PARTITION variable=unit dim=0 complete 42 | 43 | 44 | read_from_stream(input_a, unit[0]); 45 | read_from_stream(input_b, unit[1]); 46 | 47 | T res; 48 | for (int inner = 0; inner < 16 ; inner ++) 49 | { 50 | #pragma HLS UNROLL 51 | uint_raw tmp = PROP_COMPUTE_STAGE4( 52 | unit[0].range(31 + inner * 32, 0 + inner * 32), 53 | unit[1].range(31 + inner * 32, 0 + inner * 32) 54 | ); 55 | res.range(31 + inner * 32, 0 + inner * 32) = tmp; 56 | } 57 | 58 | write_to_stream(output, res); 59 | } 60 | } 61 | 62 | #if (CUSTOMIZE_APPLY==0) 63 | 64 | void applyFunction( 65 | int loopNum, 66 | #if HAVE_APPLY_OUTDEG 67 | hls::stream &outDegreeStream, 68 | #endif 69 | hls::stream &vertexPropStream, 70 | hls::stream &tmpVertexPropStream, 71 | unsigned int argReg, 72 | hls::stream &newVertexPropStream, 73 | int *outReg 74 | ) 75 | { 76 | unsigned int infoArray[BURST_ALL_BITS / INT_WIDTH][APPLY_REF_ARRAY_SIZE]; 77 | #pragma HLS ARRAY_PARTITION variable=infoArray dim=0 complete 78 | for (int i = 0; i < BURST_ALL_BITS / INT_WIDTH; i++) 79 | { 80 | for (int j = 0; j < APPLY_REF_ARRAY_SIZE; j++) 81 | { 82 | infoArray[i][j] = 0; 83 | } 84 | } 85 | for (int loopCount = 0; loopCount < loopNum; loopCount ++) 86 | { 87 | 88 | #pragma HLS PIPELINE II=1 89 | burst_raw vertexProp; 90 | burst_raw tmpVertexProp; 91 | 92 | read_from_stream(vertexPropStream, vertexProp); 93 | read_from_stream(tmpVertexPropStream, tmpVertexProp); 94 | 95 | #if HAVE_APPLY_OUTDEG 96 | burst_raw outDeg; 97 | read_from_stream(outDegreeStream, outDeg); 98 | #endif 99 | 100 | burst_raw newVertexProp; 101 | 102 | for (int i = 0; i < BURST_ALL_BITS / INT_WIDTH; i++) 103 | { 104 | #pragma HLS UNROLL 105 | prop_t tProp = tmpVertexProp.range((i + 1) * INT_WIDTH - 1, i * INT_WIDTH ); 106 | prop_t uProp = vertexProp.range( (i + 1) * INT_WIDTH - 1, i * INT_WIDTH ); 107 | #if HAVE_APPLY_OUTDEG 108 | prop_t out_deg = outDeg.range( (i + 1) * INT_WIDTH - 1, i * INT_WIDTH ); 109 | #else 110 | prop_t out_deg = 0; 111 | #endif 112 | unsigned int tmpInfoArray[BURST_ALL_BITS / INT_WIDTH][APPLY_REF_ARRAY_SIZE]; 113 | #pragma HLS ARRAY_PARTITION variable=tmpInfoArray dim=0 complete 114 | //#pragma HLS DEPENDENCE variable=tmpInfoArray inter false 115 | 116 | prop_t wProp = applyFunc( tProp, uProp, out_deg, tmpInfoArray[i], argReg); 117 | for (int j = 0; j < APPLY_REF_ARRAY_SIZE; j++) 118 | { 119 | infoArray[i][j] += tmpInfoArray[i][j]; 120 | } 121 | newVertexProp.range((i + 1) * INT_WIDTH - 1, i * INT_WIDTH ) = wProp; 122 | 123 | } 124 | write_to_stream(newVertexPropStream, newVertexProp); 125 | } 126 | 127 | for (int j = 0; j < APPLY_REF_ARRAY_SIZE; j++) 128 | { 129 | int infoAggregate = 0; 130 | 131 | for (int i = 0; i < BURST_ALL_BITS / INT_WIDTH; i ++) 132 | { 133 | DEBUG_PRINTF("infoArray %d %d \n", i, infoArray[i]); 134 | infoAggregate += infoArray[i][j]; 135 | } 136 | outReg[j] = infoAggregate; 137 | } 138 | } 139 | #endif 140 | -------------------------------------------------------------------------------- /libfpga/fpga_edge_prop.h: -------------------------------------------------------------------------------- 1 | #ifndef __FPGA_EDGE_PROP_H__ 2 | #define __FPGA_EDGE_PROP_H__ 3 | 4 | #include "graph_fpga.h" 5 | 6 | #include "fpga_application.h" 7 | 8 | void halfEdgeProp( 9 | hls::stream &input, 10 | hls::stream &output 11 | ) 12 | { 13 | while (true) 14 | { 15 | #pragma HLS PIPELINE II=2 16 | burst_token tmp; 17 | read_from_stream(input, tmp); 18 | burst_half out1 = tmp.data.range(255, 0); 19 | burst_half out2 = tmp.data.range(511, 256); 20 | write_to_stream(output, out1); 21 | write_to_stream(output, out2); 22 | if (tmp.flag == FLAG_SET) 23 | { 24 | break; 25 | } 26 | } 27 | } 28 | 29 | 30 | void edgePropCouple ( 31 | hls::stream &input, 32 | hls::stream &edgeProp, 33 | hls::stream &output) 34 | { 35 | 36 | while (true) 37 | { 38 | #pragma HLS PIPELINE II=1 39 | edge_tuples_t tuples; 40 | edge_tuples_t out; 41 | read_from_stream(input, tuples); 42 | burst_half prop; 43 | read_from_stream(edgeProp, prop); 44 | 45 | for (int i = 0; i < EDGE_NUM; i++) 46 | { 47 | #pragma HLS UNROLL 48 | out.data[i].x = tuples.data[i].x; 49 | out.data[i].y = PROP_COMPUTE_STAGE1(tuples.data[i].y, prop.range(31 + 32 * i, 0 + 32 * i)); 50 | out.flag = tuples.flag; 51 | } 52 | write_to_stream(output, out); 53 | if (tuples.flag == FLAG_SET) 54 | { 55 | break; 56 | } 57 | } 58 | clear_stream(input); 59 | clear_stream(edgeProp); 60 | } 61 | 62 | void propProcess( hls::stream &propInput, 63 | hls::stream &tupleInput, 64 | hls::stream &tupleOuput 65 | ) 66 | { 67 | #pragma HLS DATAFLOW 68 | 69 | hls::stream halfPropStream; 70 | #pragma HLS stream variable=halfPropStream depth=2 71 | 72 | halfEdgeProp(propInput, halfPropStream); 73 | edgePropCouple(tupleInput, halfPropStream, tupleOuput); 74 | 75 | } 76 | 77 | 78 | void propProcessSelf( hls::stream &tupleInput, 79 | hls::stream &tupleOuput 80 | ) 81 | 82 | { 83 | while (true) 84 | { 85 | #pragma HLS PIPELINE II=1 86 | edge_tuples_t in; 87 | edge_tuples_t out; 88 | read_from_stream(tupleInput, in); 89 | 90 | for (int i = 0; i < EDGE_NUM; i++) 91 | { 92 | #pragma HLS UNROLL 93 | out.data[i].x = in.data[i].x; 94 | out.data[i].y = PROP_COMPUTE_STAGE0(in.data[i].y); 95 | out.flag = in.flag; 96 | } 97 | write_to_stream(tupleOuput, out); 98 | if (in.flag == FLAG_SET) 99 | { 100 | break; 101 | } 102 | } 103 | clear_stream(tupleInput); 104 | } 105 | 106 | #endif /* __FPGA_EDGE_PROP_H__ */ 107 | -------------------------------------------------------------------------------- /libfpga/fpga_filter.h: -------------------------------------------------------------------------------- 1 | #ifndef __FPGA_FILTER_H__ 2 | #define __FPGA_FILTER_H__ 3 | 4 | #include "graph_fpga.h" 5 | 6 | 7 | void tupleFilter( 8 | filter_type &filter, 9 | uint_raw &filter_num, 10 | hls::stream &toFilterItem, 11 | hls::stream &buildArray 12 | ) 13 | { 14 | #pragma HLS function_instantiate variable=filter_num 15 | 16 | uint_raw filter_end; 17 | while (true) { 18 | #pragma HLS PIPELINE II=1 19 | #pragma HLS dependence variable=filter inter false 20 | #pragma HLS dependence variable=buildArray inter false 21 | #pragma HLS dependence variable=filter_end inter false 22 | 23 | read_from_stream(toFilterItem, filter); 24 | filter_end = filter.end; 25 | filter_num = filter.num; 26 | 27 | 28 | for (int j = 0; j < filter_num; j ++) { 29 | #pragma HLS PIPELINE II=1 rewind 30 | /* 31 | if (j >= filter_num) 32 | { 33 | continue; 34 | } 35 | else 36 | */ 37 | { 38 | int2_token token; 39 | token.data = filter.data[j]; 40 | token.flag = (j == (filter_num - 1 ))? filter.end: 0; 41 | write_to_stream(buildArray, token); 42 | } 43 | } 44 | if (filter_end) 45 | { 46 | break; 47 | } 48 | 49 | } 50 | //clear_stream(toFilterItem); 51 | return; 52 | } 53 | 54 | 55 | #endif /* __FPGA_FILTER_H__ */ 56 | 57 | -------------------------------------------------------------------------------- /libfpga/fpga_gather.h: -------------------------------------------------------------------------------- 1 | #ifndef __FPGA_GATHER_H__ 2 | #define __FPGA_GATHER_H__ 3 | 4 | #include "graph_fpga.h" 5 | 6 | #include "fpga_decoder.h" 7 | 8 | void shuffleDispatcher( 9 | int i, 10 | uchar &opcode, 11 | filter_type &filter, 12 | hls::stream &toFilterArray 13 | 14 | ) 15 | { 16 | if (opcode | (filter.end == 1)) { 17 | write_to_stream(toFilterArray, filter); 18 | } 19 | } 20 | 21 | 22 | void shuffleEntry ( 23 | int i, 24 | hls::stream &edgeTuplesArray, 25 | hls::stream &toFilterArray, 26 | filter_type &filter, 27 | edge_tuples_t &tuples, 28 | uchar &opcode, 29 | shuffled_type &shuff_ifo) 30 | { 31 | #pragma HLS function_instantiate variable=i 32 | 33 | 34 | 35 | unsigned char unitFinishFlag[EDGE_NUM]; 36 | #pragma HLS ARRAY_PARTITION variable=unitFinishFlag dim=0 complete 37 | 38 | for (int j = 0; j < EDGE_NUM; j++) { 39 | #pragma HLS UNROLL 40 | unitFinishFlag[j] = 0; 41 | } 42 | unsigned char end_flag_shift; 43 | 44 | 45 | while (true) 46 | { 47 | #pragma HLS PIPELINE II=1 48 | read_from_stream(edgeTuplesArray, tuples); 49 | 50 | 51 | uchar valid_r[8]; 52 | #pragma HLS ARRAY_PARTITION variable=valid_r dim=0 complete 53 | 54 | uchar idx[8]; 55 | #pragma HLS ARRAY_PARTITION variable=idx dim=0 complete 56 | // each collect engine do their work 57 | //int16 data_r; 58 | for (int j = 0; j < 8; j ++) 59 | { 60 | #pragma HLS UNROLL 61 | valid_r[j] = 0; 62 | idx[j] = 0; 63 | } 64 | end_flag_shift = tuples.flag; 65 | 66 | //data_r = tuples.data; 67 | 68 | valid_r[0] = ((tuples.data[0].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[0].y); 69 | valid_r[1] = ((tuples.data[1].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[1].y); 70 | valid_r[2] = ((tuples.data[2].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[2].y); 71 | valid_r[3] = ((tuples.data[3].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[3].y); 72 | valid_r[4] = ((tuples.data[4].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[4].y); 73 | valid_r[5] = ((tuples.data[5].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[5].y); 74 | valid_r[6] = ((tuples.data[6].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[6].y); 75 | valid_r[7] = ((tuples.data[7].x & HASH_MASK) == i) && IS_ACTIVE_VERTEX(tuples.data[7].y); 76 | 77 | 78 | opcode = valid_r[0] 79 | + (valid_r[1] << 1) 80 | + (valid_r[2] << 2) 81 | + (valid_r[3] << 3) 82 | + (valid_r[4] << 4) 83 | + (valid_r[5] << 5) 84 | + (valid_r[6] << 6) 85 | + (valid_r[7] << 7); 86 | 87 | shuff_ifo = shuffleDecoder(opcode); 88 | 89 | filter.num = shuff_ifo.num; 90 | idx[0] = shuff_ifo.idx & 0x7; 91 | idx[1] = (shuff_ifo.idx >> 3) & 0x7; 92 | idx[2] = (shuff_ifo.idx >> 6) & 0x7; 93 | idx[3] = (shuff_ifo.idx >> 9) & 0x7; 94 | idx[4] = (shuff_ifo.idx >> 12) & 0x7; 95 | idx[5] = (shuff_ifo.idx >> 15) & 0x7; 96 | idx[6] = (shuff_ifo.idx >> 18) & 0x7; 97 | idx[7] = (shuff_ifo.idx >> 21) & 0x7; 98 | 99 | filter.end = 0; 100 | 101 | 102 | if ((end_flag_shift) != 0) 103 | { 104 | filter.end = 1; 105 | filter.num = 1; 106 | } 107 | else 108 | { 109 | filter.end = 0; 110 | } 111 | 112 | for (int j = 0; j < EDGE_NUM; j ++) { 113 | #pragma HLS UNROLL 114 | filter.data[j] = tuples.data[idx[j]]; //data_r_uint2[idx_t]; 115 | } 116 | shuffleDispatcher(i, opcode, filter, toFilterArray); 117 | if (filter.end != 0) 118 | { 119 | break; 120 | } 121 | } 122 | return; 123 | } 124 | 125 | 126 | #endif /* __FPGA_GATHER_H__ */ 127 | -------------------------------------------------------------------------------- /libfpga/fpga_slice.h: -------------------------------------------------------------------------------- 1 | #ifndef __FPGA_SLICE_H__ 2 | #define __FPGA_SLICE_H__ 3 | 4 | #include "graph_fpga.h" 5 | 6 | template 7 | void sliceStream(hls::stream &input, 8 | hls::stream &output) 9 | { 10 | #pragma HLS function_instantiate variable=input 11 | while (true) 12 | { 13 | #pragma HLS PIPELINE II=1 14 | T unit; 15 | read_from_stream(input, unit); 16 | write_to_stream(output, unit); 17 | if (unit.flag == FLAG_SET) 18 | { 19 | break; 20 | } 21 | } 22 | } 23 | 24 | 25 | 26 | template 27 | void duplicateStream2(hls::stream &input, 28 | hls::stream &output1, 29 | hls::stream &output2) 30 | { 31 | #pragma HLS function_instantiate variable=input 32 | while (true) 33 | { 34 | #pragma HLS PIPELINE II=1 35 | T unit; 36 | read_from_stream(input, unit); 37 | write_to_stream(output1, unit); 38 | write_to_stream(output2, unit); 39 | if (unit.range(31, 0) == ENDFLAG) 40 | { 41 | break; 42 | } 43 | } 44 | } 45 | 46 | 47 | 48 | template 49 | void duplicateStream4(hls::stream &input, 50 | hls::stream &output1, 51 | hls::stream &output2, 52 | hls::stream &output3, 53 | hls::stream &output4) 54 | { 55 | #pragma HLS function_instantiate variable=input 56 | while (true) 57 | { 58 | #pragma HLS PIPELINE II=1 59 | T unit; 60 | read_from_stream(input, unit); 61 | write_to_stream(output1, unit); 62 | write_to_stream(output2, unit); 63 | write_to_stream(output3, unit); 64 | write_to_stream(output4, unit); 65 | if (unit.flag == FLAG_SET) 66 | { 67 | break; 68 | } 69 | } 70 | } 71 | 72 | template 73 | void duplicateStream4WithClear(hls::stream &input, 74 | hls::stream &output1, 75 | hls::stream &output2, 76 | hls::stream &output3, 77 | hls::stream &output4) 78 | { 79 | #pragma HLS function_instantiate variable=input 80 | while (true) 81 | { 82 | #pragma HLS PIPELINE II=1 83 | T unit; 84 | read_from_stream(input, unit); 85 | write_to_stream(output1, unit); 86 | write_to_stream(output2, unit); 87 | write_to_stream(output3, unit); 88 | write_to_stream(output4, unit); 89 | if (unit.flag == FLAG_SET) 90 | { 91 | break; 92 | } 93 | } 94 | } 95 | 96 | 97 | 98 | void processEdgesBuildSlice(hls::stream &in , hls::stream &out) 99 | { 100 | #pragma HLS function_instantiate variable=in 101 | while (true) 102 | { 103 | int2_token tmp_data; 104 | read_from_stream(in, tmp_data); 105 | write_to_stream(out, tmp_data); 106 | if (tmp_data.flag == FLAG_SET) 107 | { 108 | break; 109 | } 110 | } 111 | } 112 | 113 | 114 | 115 | void processEdgesSlice(hls::stream &input, hls::stream &output) 116 | { 117 | #pragma HLS function_instantiate variable=input 118 | for (int i = 0; i < ((MAX_VERTICES_IN_ONE_PARTITION ) >> (LOG2_PE_NUM + 1)); i++) 119 | { 120 | #pragma HLS PIPELINE II=1 121 | uint_uram tmp; 122 | read_from_stream(input, tmp); 123 | write_to_stream(output, tmp); 124 | } 125 | } 126 | 127 | 128 | void filterSlice( 129 | hls::stream &input, 130 | hls::stream &output 131 | ) 132 | { 133 | #pragma HLS function_instantiate variable=input 134 | 135 | while (true) { 136 | filter_type tmp; 137 | 138 | read_from_stream(input, tmp); 139 | 140 | write_to_stream(output, tmp); 141 | if (tmp.end) 142 | { 143 | break; 144 | } 145 | } 146 | } 147 | 148 | 149 | 150 | #endif /* __FPGA_SLICE_H__ */ 151 | -------------------------------------------------------------------------------- /libfpga/graph_fpga.h: -------------------------------------------------------------------------------- 1 | #ifndef __GRAPH_FPGA_H__ 2 | #define __GRAPH_FPGA_H__ 3 | 4 | #define AP_INT_MAX_W 4096 5 | #include 6 | #include "para_check.h" 7 | #include "global_config.h" 8 | 9 | #define PE_NUM 16 10 | #define EDGE_NUM 8 11 | #define LOG2_PE_NUM 4 12 | #define LOG2_EDGE_NUM (LOG2_PE_NUM - 1) 13 | 14 | #define HASH_MASK (PE_NUM - 1) 15 | 16 | 17 | #define DATA_WIDTH (512) 18 | #define INT_WIDTH (32) 19 | #define INT_WIDTH_SHIFT (5) 20 | 21 | #define SIZE_BY_INT (DATA_WIDTH/INT_WIDTH) 22 | #define LOG2_SIZE_BY_INT (4) //change manual 23 | #define SIZE_BY_INT_MASK (SIZE_BY_INT - 1) 24 | 25 | #define BURST_READ_SIZE (4) 26 | #define LOG2_BURST_READ_SIZE (2) //change manual 27 | 28 | 29 | #define BURST_BUFFER_SIZE (SIZE_BY_INT) 30 | #define BURST_BUFFER_MASK (BURST_BUFFER_SIZE - 1) 31 | 32 | #define BURST_ALL_BITS (DATA_WIDTH) 33 | 34 | 35 | typedef ap_uint<8> ushort_raw; 36 | 37 | 38 | typedef ap_uint<32> uint_raw; 39 | 40 | typedef ap_uint uint16; 41 | 42 | typedef ap_uint<128> uint4_raw; 43 | 44 | 45 | typedef ap_uint burst_raw; 46 | 47 | 48 | typedef struct { 49 | burst_raw data; 50 | ap_uint<1> flag; 51 | } burst_token; 52 | 53 | typedef ap_uint burst_half; 54 | 55 | typedef ap_uint<64> uint_uram; 56 | 57 | #define BITMAP_SLICE_SIZE (16) 58 | #define BITMAP_SLICE_SHIFT (4) 59 | 60 | 61 | #define EDGE_MAX (2*1024*1024)//5610680////163840 // (1024*1024) 62 | #define BRAM_BANK 16 63 | #define LOG2_BRAM_BANK 4 64 | #define PAD_TYPE int16 65 | #define PAD_WITH 16 66 | 67 | 68 | 69 | #ifndef FLAG_SET 70 | #define FLAG_SET (1u) 71 | #endif 72 | 73 | #ifndef FLAG_RESET 74 | #define FLAG_RESET (0u) 75 | #endif 76 | 77 | 78 | #define uchar unsigned char 79 | 80 | 81 | typedef struct __int2__ 82 | { 83 | int x; 84 | #if HAVE_UNSIGNED_PROP 85 | uint_raw y; 86 | #else 87 | int y; 88 | #endif 89 | } int2; 90 | 91 | 92 | typedef struct 93 | { 94 | int2 data; 95 | ap_uint<1> flag; 96 | } int2_token; 97 | 98 | typedef struct EdgeInfo { 99 | int2 data[EDGE_NUM]; 100 | ap_uint<1> flag; 101 | } edge_tuples_t; 102 | 103 | typedef struct shuffledData { 104 | uint_raw num; 105 | uint_raw idx; 106 | } shuffled_type; 107 | 108 | typedef struct filterData { 109 | bool end; 110 | uchar num; 111 | int2 data[EDGE_NUM]; 112 | } filter_type; 113 | 114 | typedef struct processinfo { 115 | uint_raw outDeg; 116 | uint_raw data; 117 | } process_type; 118 | 119 | 120 | 121 | //#define SW_DEBUG 122 | 123 | #ifdef SW_DEBUG 124 | 125 | #include "stdio.h" 126 | 127 | #define DEBUG_PRINTF(fmt,...) printf(fmt,##__VA_ARGS__); fflush(stdout); 128 | 129 | #else 130 | 131 | #define DEBUG_PRINTF(fmt,...) ; 132 | 133 | #endif 134 | 135 | 136 | 137 | 138 | #ifdef CACHE_DEBUG 139 | 140 | #include "stdio.h" 141 | 142 | #define C_PRINTF(fmt,...) printf(fmt,##__VA_ARGS__); fflush(stdout); 143 | 144 | #else 145 | 146 | #define C_PRINTF(fmt,...) ; 147 | 148 | #endif 149 | 150 | 151 | 152 | #define CLEAR_CYCLE (256) 153 | 154 | 155 | template 156 | inline int clear_stream (hls::stream &stream) 157 | { 158 | #pragma HLS INLINE 159 | int end_counter = 0; 160 | clear_stream: while (true) 161 | { 162 | T clear_data; 163 | 164 | if ( read_from_stream_nb(stream, clear_data) == 0) 165 | { 166 | end_counter ++; 167 | } 168 | if (end_counter > CLEAR_CYCLE) 169 | { 170 | break; 171 | } 172 | } 173 | return 0; 174 | } 175 | 176 | 177 | 178 | template 179 | inline int empty_stream (hls::stream &stream) 180 | { 181 | #pragma HLS INLINE 182 | int end_counter = 0; 183 | empty_stream: while (true) 184 | { 185 | T clear_data; 186 | 187 | if ( read_from_stream_nb(stream, clear_data) == 0) 188 | { 189 | end_counter ++; 190 | } 191 | else 192 | { 193 | end_counter = 0; 194 | } 195 | if (end_counter > 4096) 196 | { 197 | break; 198 | } 199 | } 200 | return 0; 201 | } 202 | 203 | 204 | 205 | template 206 | inline int write_to_stream (hls::stream &stream, T const& value) 207 | { 208 | #pragma HLS INLINE 209 | int count = 0; 210 | stream << value; 211 | return 0; 212 | } 213 | 214 | 215 | template 216 | inline int read_from_stream (hls::stream &stream, T & value) 217 | { 218 | #pragma HLS INLINE 219 | value = stream.read(); 220 | return 0; 221 | #if 0 222 | if (stream.read_nb(value)) 223 | { 224 | return 0; 225 | } 226 | else 227 | { 228 | return -1; 229 | } 230 | #endif 231 | } 232 | 233 | 234 | template 235 | inline int read_from_stream_nb (hls::stream &stream, T & value) 236 | { 237 | #pragma HLS INLINE 238 | if (stream.empty()) 239 | { 240 | return 0; 241 | } 242 | else 243 | { 244 | value = stream.read(); 245 | return 1; 246 | } 247 | } 248 | 249 | 250 | 251 | 252 | #endif /* __GRAPH_FPGA_H__ */ 253 | -------------------------------------------------------------------------------- /libgraph/common.h: -------------------------------------------------------------------------------- 1 | #ifndef __COMMON_H__ 2 | #define __COMMON_H__ 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | #define EXPAND(x) x 12 | 13 | 14 | #define STRINGIFY_MACRO(x) STR(x) 15 | #define STR(x) #x 16 | 17 | #define CONCAT4(n1, n2, n3, n4) STRINGIFY_MACRO(EXPAND(n1)EXPAND(n2)EXPAND(n3)EXPAND(n4)) 18 | 19 | #define CAT_SECOND_LEVLE(x, y) x ## y 20 | #define VAR_CONCAT2(x, y) CAT_SECOND_LEVLE(x, y) 21 | 22 | 23 | #ifndef FLAG_SET 24 | #define FLAG_SET (1u) 25 | #endif 26 | 27 | #ifndef FLAG_RESET 28 | #define FLAG_RESET (0u) 29 | #endif 30 | 31 | 32 | #if 1 33 | 34 | #define DEBUG_PRINTF(fmt,...) printf(fmt,##__VA_ARGS__); fflush(stdout); 35 | 36 | #else 37 | 38 | #define DEBUG_PRINTF(fmt,...) ; 39 | 40 | #endif 41 | 42 | 43 | #ifndef ARRAY_SIZE 44 | #define ARRAY_SIZE(arr) sizeof(arr)/sizeof((arr)[0]) 45 | #endif 46 | 47 | 48 | inline unsigned int get_aligned_size(unsigned int in, unsigned int align) 49 | { 50 | if (in == 0) 51 | { 52 | return align; 53 | } 54 | else 55 | { 56 | return (((((in - 1) / align) + 1) * align)); 57 | } 58 | } 59 | 60 | #define SIZE_ALIGNMENT(in,align) get_aligned_size((unsigned int)in,(unsigned int)align) 61 | 62 | 63 | #endif /* __COMMON_H__ */ 64 | -------------------------------------------------------------------------------- /libgraph/default_entry.cpp: -------------------------------------------------------------------------------- 1 | /* DEFAULT_ENTRY in build.mk */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "host_graph_api.h" 9 | #include "host_graph_verification.h" 10 | 11 | 12 | using namespace std; 13 | 14 | graphInfo graphDataInfo; 15 | 16 | int main(int argc, char **argv) { 17 | 18 | char * xcl_file = NULL; 19 | if (argc > 1) 20 | { 21 | xcl_file = argv[1]; 22 | } 23 | 24 | std::string gName; 25 | if (argc > 2) 26 | { 27 | gName = argv[2]; 28 | } 29 | else 30 | { 31 | gName = "wiki-talk"; 32 | } 33 | std::string mode = "normal"; 34 | 35 | DEBUG_PRINTF("start main\n"); 36 | 37 | acceleratorInit("graph_fpga", xcl_file); 38 | 39 | acceleratorDataLoad(gName, mode, &graphDataInfo); 40 | 41 | acceleratorDataPreprocess(&graphDataInfo); 42 | /* for verification */ 43 | acceleratorCModelDataPreprocess(&graphDataInfo); 44 | 45 | for (int runCounter = 0 ; runCounter < 10 ; runCounter ++) 46 | { 47 | double startStamp, endStamp; 48 | startStamp = getCurrentTimestamp(); 49 | 50 | acceleratorSuperStep(runCounter, &graphDataInfo); 51 | 52 | endStamp = getCurrentTimestamp(); 53 | /* for verification */ 54 | acceleratorCModelSuperStep(runCounter, &graphDataInfo); 55 | 56 | /* for profile */ 57 | acceleratorProfile(runCounter, runCounter, &graphDataInfo, endStamp - startStamp); 58 | } 59 | acceleratorDeinit(); 60 | 61 | return 0; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /libgraph/host_graph_api.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_API_H__ 2 | #define __HOST_GRAPH_API_H__ 3 | 4 | #include 5 | 6 | 7 | #include "common.h" 8 | #include "global_config.h" 9 | 10 | typedef struct 11 | { 12 | int vertexNum; 13 | int compressedVertexNum; 14 | int edgeNum; 15 | int blkNum; 16 | } graphInfo; 17 | 18 | /* misc */ 19 | unsigned int dataPrepareGetArg(graphInfo *info); 20 | int dataPrepareProperty(graphInfo *info); 21 | double getCurrentTimestamp(void); 22 | void reTransferProp(graphInfo *info); 23 | 24 | /* host api -- dataflow */ 25 | int acceleratorInit(const char * name, char *file_name); 26 | int acceleratorDataLoad(const std::string &gName, const std::string &mode, graphInfo *info); 27 | int acceleratorDataPreprocess(graphInfo *info); 28 | int acceleratorSuperStep(int superStep, graphInfo *info); 29 | int acceleratorDeinit(void); 30 | 31 | /* host api -- query */ 32 | void* acceleratorQueryRegister(void); 33 | prop_t* acceleratorQueryProperty(int step); 34 | 35 | #include "host_graph_csv.hpp" 36 | 37 | #endif /* __HOST_GRAPH_API_H__ */ 38 | -------------------------------------------------------------------------------- /libgraph/host_graph_data_structure.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_DATA_STRUCTURE_H__ 2 | #define __HOST_GRAPH_DATA_STRUCTURE_H__ 3 | 4 | #include "graph.h" 5 | 6 | #define MAX_PARTITIONS_NUM (128) 7 | 8 | 9 | typedef struct 10 | { 11 | double fpgaExeTime; 12 | double effic; 13 | double compress; 14 | double degree; 15 | } profileLog; 16 | 17 | typedef struct 18 | { 19 | unsigned int listStart; 20 | unsigned int listEnd; 21 | unsigned int dstVertexStart; 22 | unsigned int dstVertexEnd; 23 | unsigned int srcVertexStart; 24 | unsigned int srcVertexEnd; 25 | float scatterCacheRatio; 26 | float compressRatio; 27 | he_mem_t edgeTail; 28 | he_mem_t edgeHead; 29 | he_mem_t edgeProp; 30 | he_mem_t tmpProp; 31 | unsigned int mapedTotalIndex; 32 | unsigned int cuIndex; 33 | profileLog log; 34 | } subPartitionDescriptor; 35 | 36 | typedef struct 37 | { 38 | subPartitionDescriptor *sub[SUB_PARTITION_NUM]; 39 | int finalOrder[SUB_PARTITION_NUM]; 40 | unsigned int totalEdge; 41 | unsigned int subPartitionSize; 42 | cl_event syncEvent[SUB_PARTITION_NUM]; 43 | cl_event applyEvent; 44 | double applyExeTime; 45 | } partitionDescriptor; 46 | 47 | typedef struct 48 | { 49 | const char* name; 50 | int partition_mem_attr; 51 | int prop_id; 52 | int output_id; 53 | cl_kernel kernel; 54 | he_mem_t prop[2]; 55 | he_mem_t tmpProp; 56 | } gatherScatterDescriptor; 57 | 58 | typedef struct 59 | { 60 | const char* name; 61 | cl_kernel kernel; 62 | } applyDescriptor; 63 | 64 | typedef struct 65 | { 66 | CSR* csr; 67 | 68 | subPartitionDescriptor subPartitions[MAX_PARTITIONS_NUM * SUB_PARTITION_NUM]; 69 | 70 | partitionDescriptor partitions[MAX_PARTITIONS_NUM]; 71 | 72 | gatherScatterDescriptor * gsKernel[SUB_PARTITION_NUM]; 73 | 74 | applyDescriptor * applyKernel; 75 | 76 | cl_command_queue gsOps[SUB_PARTITION_NUM]; 77 | 78 | cl_command_queue applyOps; 79 | 80 | cl_program program; 81 | 82 | cl_platform_id platform; 83 | 84 | cl_device_id device; 85 | 86 | cl_context context; 87 | 88 | } graphAccelerator; 89 | 90 | 91 | subPartitionDescriptor * getSubPartition(int partID); 92 | partitionDescriptor * getPartition(int partID); 93 | 94 | gatherScatterDescriptor * getGatherScatter(int kernelID); 95 | applyDescriptor * getApply(void); 96 | 97 | graphAccelerator * getAccelerator(void); 98 | 99 | 100 | inline int getCuIDbyInterface(int order) 101 | { 102 | return he_get_interface_id(order); 103 | } 104 | 105 | 106 | #endif /* __HOST_GRAPH_DATA_STRUCTURE_H__ */ 107 | -------------------------------------------------------------------------------- /libgraph/host_graph_dataflow.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "host_graph_sw.h" 3 | #include "host_graph_scheduler.h" 4 | 5 | 6 | graphAccelerator thunderGraph; 7 | 8 | graphAccelerator * getAccelerator(void) 9 | { 10 | return &thunderGraph; 11 | } 12 | 13 | subPartitionDescriptor * getSubPartition(int partID) 14 | { 15 | return &thunderGraph.subPartitions[partID]; 16 | } 17 | 18 | partitionDescriptor * getPartition(int partID) 19 | { 20 | return &thunderGraph.partitions[partID]; 21 | } 22 | 23 | 24 | int acceleratorInit(const char * name, char *file_name) 25 | { 26 | cl_int status; 27 | cl_uint numPlatforms; 28 | cl_uint numDevices; 29 | graphAccelerator * acc = getAccelerator(); 30 | 31 | status = clGetPlatformIDs(1, &(acc->platform), &numPlatforms); 32 | checkStatus("Failed clGetPlatformIDs."); 33 | DEBUG_PRINTF("Found %d platforms!\n", numPlatforms); 34 | 35 | status = clGetDeviceIDs(acc->platform, CL_DEVICE_TYPE_ALL, 1, &(acc->device), &numDevices); 36 | checkStatus("Failed clGetDeviceIDs."); 37 | DEBUG_PRINTF("Found %d devices!\n", numDevices); 38 | 39 | acc->context = clCreateContext(0, 1, &(acc->device), NULL, NULL, &status); 40 | checkStatus("Failed clCreateContext."); 41 | 42 | xcl_world world = xcl_world_single(); 43 | acc->program = xcl_import_binary(world, name, file_name); 44 | 45 | kernelInit(acc); 46 | 47 | return 0; 48 | } 49 | 50 | int acceleratorSuperStep(int superStep, graphInfo *info) 51 | { 52 | graphAccelerator * acc = getAccelerator(); 53 | int blkNum = info->blkNum; 54 | for (int i = 0; i < blkNum + 1; i ++) 55 | { 56 | if (i < blkNum) 57 | { 58 | partitionDescriptor * partition = getPartition(i); 59 | setGsKernel(getArrangedPartitionID(i), superStep, info); 60 | for (int j = 0; j < SUB_PARTITION_NUM; j ++) 61 | { 62 | clEnqueueTask(acc->gsOps[j], getGatherScatter(j)->kernel, 0, NULL, 63 | &partition->syncEvent[j]); 64 | } 65 | } 66 | #if HAVE_APPLY 67 | if (i > 0) 68 | { 69 | partitionDescriptor * lastPartition; 70 | lastPartition = getPartition(i - 1); 71 | 72 | setApplyKernel(getArrangedPartitionID(i - 1), superStep, info); 73 | clEnqueueTask(acc->applyOps, getApply()->kernel, SUB_PARTITION_NUM, 74 | lastPartition->syncEvent, 75 | &lastPartition->applyEvent); 76 | } 77 | #endif 78 | } 79 | 80 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 81 | { 82 | clFinish(acc->gsOps[i]); 83 | } 84 | clFinish(acc->applyOps); 85 | return 0; 86 | } 87 | 88 | 89 | 90 | 91 | int acceleratorDeinit(void) 92 | { 93 | graphAccelerator * acc = getAccelerator(); 94 | /* TODO free other resource */ 95 | 96 | if (acc->context) clReleaseContext(acc->context); 97 | 98 | return 0; 99 | } 100 | 101 | 102 | void* acceleratorQueryRegister(void) 103 | { 104 | graphAccelerator * acc = getAccelerator(); 105 | transfer_data_from_pl(acc->context, acc->device,MEM_ID_RESULT_REG); 106 | return get_host_mem_pointer(MEM_ID_RESULT_REG); 107 | } 108 | 109 | prop_t* acceleratorQueryProperty(int step) 110 | { 111 | graphAccelerator * acc = getAccelerator(); 112 | transfer_data_from_pl(acc->context, acc->device, getGatherScatter(0)->prop[step].id); 113 | prop_t * propValue = (prop_t *)get_host_mem_pointer(getGatherScatter(0)->prop[step].id); 114 | 115 | return propValue; 116 | } 117 | 118 | -------------------------------------------------------------------------------- /libgraph/host_graph_sw.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_SW_H__ 2 | #define __HOST_GRAPH_SW_H__ 3 | 4 | #include "para_check.h" 5 | #include 6 | 7 | #include "common.h" 8 | #include "global_config.h" 9 | 10 | #include "graph.h" 11 | 12 | #include "he_mem.h" 13 | #include "he_mem_id.h" 14 | 15 | #include "host_graph_api.h" 16 | #include "host_graph_misc_inner.h" 17 | #include "host_graph_data_structure.h" 18 | #include "host_graph_kernel.h" 19 | #include "host_graph_verification.h" 20 | #include "host_graph_csv.hpp" 21 | 22 | #endif /* __HOST_GRAPH_SW_H__ */ 23 | -------------------------------------------------------------------------------- /libgraph/kernel/host_graph_kernel.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "host_graph_sw.h" 3 | 4 | 5 | #define HW_EMU_DEBUG (0) 6 | #define HW_EMU_DEBUG_SIZE (16384 * 4) 7 | 8 | gatherScatterDescriptor localGsKernel[] = { 9 | { 10 | .name = "readEdgesCU1", 11 | }, 12 | { 13 | .name = "readEdgesCU2", 14 | }, 15 | { 16 | .name = "readEdgesCU3", 17 | }, 18 | { 19 | .name = "readEdgesCU4", 20 | }, 21 | }; 22 | 23 | applyDescriptor localApplyKernel[] = 24 | { 25 | { 26 | .name = "vertexApply", 27 | }, 28 | }; 29 | 30 | gatherScatterDescriptor * getGatherScatter(int kernelID) 31 | { 32 | return &localGsKernel[kernelID]; 33 | } 34 | 35 | applyDescriptor * getApply(void) 36 | { 37 | return &localApplyKernel[DEFAULT_KERNEL_ID]; 38 | } 39 | 40 | void kernelInit(graphAccelerator * acc) 41 | { 42 | cl_int status; 43 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 44 | { 45 | getGatherScatter(i)->kernel = clCreateKernel(acc->program, getGatherScatter(i)->name, &status); 46 | checkStatus("Failed clCreateKernel gs."); 47 | acc->gsKernel[i] = getGatherScatter(i); 48 | } 49 | 50 | #if HAVE_APPLY 51 | 52 | getApply()->kernel = clCreateKernel(acc->program, getApply()->name, &status); 53 | checkStatus("Failed clCreateKernel apply."); 54 | acc->applyKernel = getApply(); 55 | 56 | #endif 57 | 58 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 59 | { 60 | acc->gsOps[i] = clCreateCommandQueue(acc->context, acc->device, CL_QUEUE_PROFILING_ENABLE, &status); 61 | checkStatus("Failed clCreateCommandQueue of gsOps."); 62 | } 63 | acc->applyOps = clCreateCommandQueue(acc->context, acc->device, CL_QUEUE_PROFILING_ENABLE, &status); 64 | checkStatus("Failed clCreateCommandQueue of applyOps."); 65 | } 66 | 67 | void setGsKernel(int partId, int superStep, graphInfo *info) 68 | { 69 | int currentPropId = superStep % 2; 70 | 71 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 72 | { 73 | gatherScatterDescriptor * gsHandler = getGatherScatter(i); 74 | subPartitionDescriptor * partition = getSubPartition(partId * SUB_PARTITION_NUM + i); 75 | int argvi = 0; 76 | int edgeEnd = partition->listEnd; 77 | int sinkStart = 0; 78 | int sinkEnd = MAX_VERTICES_IN_ONE_PARTITION; 79 | 80 | #if HW_EMU_DEBUG 81 | edgeEnd = HW_EMU_DEBUG_SIZE; 82 | #endif 83 | 84 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(partition->edgeHead.id)); 85 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(gsHandler->prop[currentPropId].id)); 86 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(partition->edgeTail.id)); 87 | 88 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(partition->tmpProp.id)); 89 | he_set_dirty(partition->tmpProp.id); 90 | 91 | #if HAVE_EDGE_PROP 92 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(partition->edgeProp.id)); 93 | #endif 94 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(int), &edgeEnd); 95 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(int), &sinkStart); 96 | clSetKernelArg(gsHandler->kernel, argvi++, sizeof(int), &sinkEnd); 97 | } 98 | } 99 | 100 | #if CUSTOMIZE_APPLY == 0 101 | 102 | 103 | void setApplyKernel(int partId, int superStep, graphInfo *info) 104 | { 105 | #if HAVE_APPLY 106 | int currentPropId = superStep % 2; 107 | int updatePropId = (superStep + 1) % 2; 108 | 109 | applyDescriptor * applyHandler = getApply(); 110 | int argvi = 0; 111 | unsigned int argReg = dataPrepareGetArg(info); 112 | subPartitionDescriptor *p_partition = getSubPartition(partId * SUB_PARTITION_NUM); 113 | 114 | volatile unsigned int partitionVertexNum = ((p_partition->dstVertexEnd - p_partition->dstVertexStart) 115 | / (ALIGN_SIZE ) + 1) * (ALIGN_SIZE ); 116 | int sink_end = partitionVertexNum; 117 | int offset = p_partition->dstVertexStart; 118 | 119 | 120 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), 121 | get_cl_mem_pointer(getGatherScatter(getCuIDbyInterface(DEFAULT_KERNEL_ID))->prop[currentPropId].id)); 122 | 123 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 124 | { 125 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), 126 | get_cl_mem_pointer(getSubPartition(partId * SUB_PARTITION_NUM + i)->tmpProp.id) 127 | ); 128 | } 129 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 130 | { 131 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), 132 | get_cl_mem_pointer(getGatherScatter(i)->prop[updatePropId].id) 133 | ); 134 | he_set_dirty(getGatherScatter(i)->prop[updatePropId].id); 135 | } 136 | 137 | #if HAVE_APPLY_OUTDEG 138 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(MEM_ID_OUT_DEG)); 139 | #endif 140 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(cl_mem), get_cl_mem_pointer(MEM_ID_RESULT_REG)); 141 | he_set_dirty(MEM_ID_RESULT_REG); 142 | 143 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(int), &sink_end); 144 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(int), &offset); 145 | clSetKernelArg(applyHandler->kernel, argvi++, sizeof(int), &argReg); 146 | #endif 147 | } 148 | 149 | #endif 150 | -------------------------------------------------------------------------------- /libgraph/kernel/host_graph_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_SW_KERNEL_H__ 2 | #define __HOST_GRAPH_SW_KERNEL_H__ 3 | 4 | void kernelInit(graphAccelerator * acc); 5 | 6 | void setGsKernel(int partId, int superStep, graphInfo *info); 7 | 8 | void setApplyKernel(int partId, int superStep, graphInfo *info); 9 | 10 | 11 | #endif /* __HOST_GRAPH_SW_KERNEL_H__ */ 12 | -------------------------------------------------------------------------------- /libgraph/memory/he_mapping.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "he_mem.h" 3 | 4 | typedef struct 5 | { 6 | int cu_id; 7 | int mem_id; 8 | int he_attr_id; 9 | int interface_id; 10 | } cumem_lut_t; 11 | 12 | 13 | typedef struct 14 | { 15 | int interface_id; 16 | int mem_id; 17 | int he_attr_id; 18 | } attr_lut_t; 19 | 20 | const attr_lut_t attr_mapping[] = 21 | { 22 | { 23 | .interface_id = 0, 24 | .mem_id = XCL_MEM_DDR_BANK0, 25 | .he_attr_id = ATTR_PL_DDR0, 26 | }, 27 | { 28 | .interface_id = 1, 29 | .mem_id = XCL_MEM_DDR_BANK1, 30 | .he_attr_id = ATTR_PL_DDR1, 31 | }, 32 | { 33 | .interface_id = 2, 34 | .mem_id = XCL_MEM_DDR_BANK2, 35 | .he_attr_id = ATTR_PL_DDR2, 36 | }, 37 | 38 | { 39 | .interface_id = 3, 40 | .mem_id = XCL_MEM_DDR_BANK3, 41 | .he_attr_id = ATTR_PL_DDR3, 42 | }, 43 | }; 44 | 45 | 46 | #include "mapping.h" 47 | 48 | int he_get_mem_attr(int attr_id) 49 | { 50 | #if HAVE_APPLY 51 | int total_mapping_items = ARRAY_SIZE(attr_mapping); 52 | #else 53 | int total_mapping_items = SUB_PARTITION_NUM; 54 | #endif 55 | for (int i = 0; i < total_mapping_items; i++) 56 | { 57 | if (attr_mapping[i].he_attr_id == attr_id) 58 | { 59 | return attr_mapping[i].mem_id; 60 | } 61 | } 62 | return (attr_mapping[SUB_PARTITION_NUM - 1].mem_id); 63 | } 64 | 65 | 66 | int he_get_interface_id(int cu_id) 67 | { 68 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 69 | { 70 | if (mapping_item[i].cu_id == cu_id) 71 | { 72 | return mapping_item[i].interface_id; 73 | } 74 | } 75 | return (mapping_item[SUB_PARTITION_NUM - 1].interface_id); 76 | } 77 | 78 | 79 | int he_get_attr_by_cu(int cu_id) 80 | { 81 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 82 | { 83 | if (mapping_item[i].cu_id == cu_id) 84 | { 85 | return mapping_item[i].he_attr_id; 86 | } 87 | } 88 | return (mapping_item[SUB_PARTITION_NUM - 1].he_attr_id); 89 | } 90 | -------------------------------------------------------------------------------- /libgraph/memory/he_mem.h: -------------------------------------------------------------------------------- 1 | #ifndef __HE_MEM_H__ 2 | #define __HE_MEM_H__ 3 | 4 | 5 | #include "xcl.h" 6 | 7 | #define ATTR_HOST_ONLY (0) 8 | #define ATTR_PL_DEFAULT (1) 9 | #define ATTR_PL_DDR0 (2) 10 | #define ATTR_PL_DDR1 (3) 11 | #define ATTR_PL_DDR2 (4) 12 | #define ATTR_PL_DDR3 (5) 13 | #define ATTR_ERROR (6) 14 | 15 | typedef struct 16 | { 17 | const unsigned int size_attr; 18 | unsigned int scale; 19 | 20 | } size_attr_ctrl_t; 21 | 22 | #define clSVMAlloc(context,flag,size,alignment) memalign(alignment,size) 23 | 24 | typedef struct 25 | { 26 | unsigned int id; 27 | const char *name; 28 | unsigned int attr; 29 | unsigned int unit_size; 30 | unsigned int size_attr; 31 | 32 | unsigned int size; 33 | void *data; 34 | cl_mem device; 35 | cl_mem_ext_ptr_t ext_attr; 36 | 37 | unsigned int dirty_flags; 38 | } he_mem_t; 39 | 40 | typedef struct 41 | { 42 | unsigned int id; 43 | he_mem_t *p_mem; 44 | } he_mem_lookup_t; 45 | 46 | 47 | int register_size_attribute(unsigned int attr_id,int value); 48 | 49 | unsigned int get_size_attribute(unsigned int attr_id); 50 | 51 | int he_mem_init(cl_context &dev_context, he_mem_t * item); 52 | 53 | cl_mem* get_cl_mem_pointer(int id); 54 | 55 | void* get_host_mem_pointer(int id); 56 | 57 | he_mem_t* get_he_mem(unsigned int id); 58 | 59 | void clear_host_mem(int id); 60 | 61 | int transfer_data_from_pl(cl_context &dev_context, cl_device_id device_id, int mem_id); 62 | 63 | int transfer_data_to_pl(cl_context &dev_context, cl_device_id device_id, int* id_array, int size); 64 | 65 | 66 | /************* host & device memory coherent *******************/ 67 | int he_set_dirty(int id); 68 | 69 | int he_set_clean(int id); 70 | 71 | int he_get_dirty_flag(int id); 72 | 73 | /************* memory channel, interface mapping ***************/ 74 | int he_get_interface_id(int cu_id); 75 | 76 | int he_get_mem_attr(int attr_id); 77 | 78 | int he_get_attr_by_cu(int cu_id); 79 | 80 | #endif /* __HE_MEM_H__ */ 81 | -------------------------------------------------------------------------------- /libgraph/memory/he_mem_attr.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEM_ATTR_H__ 2 | #define __MEM_ATTR_H__ 3 | 4 | 5 | size_attr_ctrl_t local_size_ctrl[] = 6 | { 7 | { 8 | .size_attr = SIZE_IN_EDGE, 9 | }, 10 | { 11 | .size_attr = SIZE_IN_VERTEX, 12 | }, 13 | { 14 | .size_attr = SIZE_USER_DEFINE, 15 | } 16 | }; 17 | 18 | 19 | #endif /* __MEM_ATTR_H__ */ 20 | -------------------------------------------------------------------------------- /libgraph/memory/he_mem_config.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEM_CONFIG_H__ 2 | #define __MEM_CONFIG_H__ 3 | 4 | 5 | 6 | he_mem_t local_mem[] = 7 | { 8 | { 9 | MEM_ID_PROP_FOR_DATAPREPARE, 10 | "data prepare", 11 | ATTR_HOST_ONLY, 12 | sizeof(prop_t), 13 | SIZE_IN_VERTEX, 14 | }, 15 | { 16 | MEM_ID_TEST, 17 | "test", 18 | ATTR_PL_DDR1, 19 | sizeof(prop_t), 20 | SIZE_IN_VERTEX, 21 | }, 22 | 23 | // tmp property 24 | { 25 | MEM_ID_TMP_VERTEX_PROP, 26 | "tmpVertexProp", 27 | ATTR_HOST_ONLY, 28 | sizeof(prop_t), 29 | SIZE_IN_VERTEX, 30 | }, 31 | { 32 | MEM_ID_VERTEX_PROP_VERIFY, 33 | "vertexProp for verification", 34 | ATTR_HOST_ONLY, 35 | sizeof(prop_t), 36 | SIZE_IN_VERTEX, 37 | }, 38 | 39 | { 40 | MEM_ID_EDGE_TAIL, 41 | "edgesTailArray", 42 | ATTR_HOST_ONLY, 43 | sizeof(prop_t), 44 | SIZE_IN_EDGE, 45 | }, 46 | { 47 | MEM_ID_EDGE_HEAD, 48 | "edgesHeadArray", 49 | ATTR_HOST_ONLY, 50 | sizeof(int), 51 | SIZE_IN_EDGE, 52 | }, 53 | { 54 | MEM_ID_PUSHIN_PROP, 55 | "vertexPushinProp", 56 | ATTR_HOST_ONLY, 57 | sizeof(int), 58 | SIZE_IN_VERTEX, 59 | }, 60 | { 61 | MEM_ID_PUSHIN_PROP_MAPPED, 62 | "vertexPushinPropMapped", 63 | ATTR_HOST_ONLY, 64 | sizeof(int), 65 | SIZE_IN_VERTEX, 66 | }, 67 | { 68 | MEM_ID_HOST_PROP_PING, 69 | "hostVerificationPropPing", 70 | ATTR_HOST_ONLY, 71 | sizeof(prop_t), 72 | SIZE_IN_VERTEX, 73 | }, 74 | { 75 | MEM_ID_HOST_PROP_PONG, 76 | "hostVerificationPropPong", 77 | ATTR_HOST_ONLY, 78 | sizeof(prop_t), 79 | SIZE_IN_VERTEX, 80 | }, 81 | { 82 | MEM_ID_TMP_VERTEX_VERIFY, 83 | "tmpVertexPropVerify", 84 | ATTR_HOST_ONLY, 85 | sizeof(prop_t), 86 | SIZE_IN_VERTEX, 87 | }, 88 | { 89 | MEM_ID_OUT_DEG, 90 | "outDeg", 91 | ATTR_PL_DDR2, 92 | sizeof(int), 93 | SIZE_IN_VERTEX, 94 | }, 95 | { 96 | MEM_ID_OUT_DEG_ORIGIN, 97 | "outDeg origin", 98 | ATTR_HOST_ONLY, 99 | sizeof(int), 100 | SIZE_IN_VERTEX, 101 | }, 102 | { 103 | MEM_ID_RESULT_REG, 104 | "error", 105 | ATTR_PL_DDR2, 106 | sizeof(int) * 64, 107 | SIZE_USER_DEFINE, 108 | }, 109 | { 110 | MEM_ID_RPA, 111 | "rpa", 112 | ATTR_HOST_ONLY, 113 | sizeof(int) * 2, 114 | SIZE_IN_VERTEX, 115 | }, 116 | { 117 | MEM_ID_CIA, 118 | "cia", 119 | ATTR_HOST_ONLY, 120 | sizeof(int) * 2, 121 | SIZE_IN_EDGE, 122 | }, 123 | { 124 | MEM_ID_EDGE_PROP, 125 | "edgeProp", 126 | ATTR_HOST_ONLY, 127 | sizeof(int), 128 | SIZE_IN_EDGE, 129 | }, 130 | { 131 | MEM_ID_PARTITON_EDGE_PROP, 132 | "edgePartitionProp", 133 | ATTR_HOST_ONLY, 134 | sizeof(int), 135 | SIZE_IN_EDGE, 136 | }, 137 | { 138 | MEM_ID_VERTEX_INDEX_MAP, 139 | "indexMap", 140 | ATTR_HOST_ONLY, 141 | sizeof(int), 142 | SIZE_IN_VERTEX, 143 | }, 144 | { 145 | MEM_ID_VERTEX_INDEX_REMAP, 146 | "indexRemap", 147 | ATTR_HOST_ONLY, 148 | sizeof(int), 149 | SIZE_IN_VERTEX, 150 | }, 151 | { 152 | MEM_ID_VERTEX_INDEX_BIT_ORI, 153 | "sourceBitmapOri", 154 | ATTR_HOST_ONLY, 155 | sizeof(int), 156 | SIZE_IN_EDGE, 157 | }, 158 | { 159 | MEM_ID_VERTEX_INDEX_BIT, 160 | "sourceBitmap", 161 | ATTR_HOST_ONLY, 162 | sizeof(int), 163 | SIZE_IN_EDGE, 164 | }, 165 | { 166 | MEM_ID_ACTIVE_VERTEX_NUM, 167 | "activeVertexNum", 168 | ATTR_HOST_ONLY, 169 | sizeof(prop_t), 170 | SIZE_IN_VERTEX, 171 | }, 172 | }; 173 | 174 | 175 | #endif /* __MEM_CONFIG_H__ */ 176 | 177 | -------------------------------------------------------------------------------- /libgraph/memory/he_mem_id.h: -------------------------------------------------------------------------------- 1 | #ifndef __HE_MEM_ID_H__ 2 | #define __HE_MEM_ID_H__ 3 | 4 | 5 | 6 | #define SIZE_IN_EDGE (0) 7 | #define SIZE_IN_VERTEX (1) 8 | #define SIZE_USER_DEFINE (2) 9 | 10 | 11 | 12 | 13 | 14 | #define MEM_ID_EDGE_TAIL (1) 15 | #define MEM_ID_EDGE_HEAD (2) 16 | #define MEM_ID_PUSHIN_PROP (3) 17 | #define MEM_ID_PROP_FOR_DATAPREPARE (4) 18 | #define MEM_ID_TMP_VERTEX_PROP (5) 19 | #define MEM_ID_TMP_VERTEX_VERIFY (6) 20 | #define MEM_ID_OUT_DEG (7) 21 | #define MEM_ID_RESULT_REG (8) 22 | #define MEM_ID_RPA (10) 23 | #define MEM_ID_CIA (11) 24 | #define MEM_ID_EDGE_PROP (12) 25 | #define MEM_ID_VERTEX_PROP_VERIFY (14) 26 | #define MEM_ID_PUSHIN_PROP_MAPPED (15) 27 | #define MEM_ID_VERTEX_INDEX_MAP (16) 28 | #define MEM_ID_VERTEX_INDEX_REMAP (17) 29 | #define MEM_ID_VERTEX_INDEX_BIT_ORI (18) 30 | #define MEM_ID_VERTEX_INDEX_BIT (19) 31 | 32 | #define MEM_ID_ACTIVE_VERTEX (20) 33 | #define MEM_ID_ACTIVE_VERTEX_NUM (21) 34 | 35 | #define MEM_ID_OUT_DEG_ORIGIN (22) 36 | #define MEM_ID_HOST_PROP_PING (23) 37 | #define MEM_ID_HOST_PROP_PONG (24) 38 | #define MEM_ID_PARTITON_EDGE_PROP (25) 39 | 40 | #define MEM_ID_TEST (26) 41 | 42 | #define MEM_ID_PARTITION_BASE (100) 43 | 44 | #define MEM_ID_PARTITION_OFFSET (128) 45 | 46 | 47 | #define MEM_ID_GS_BASE (8192) 48 | #define MEM_ID_GS_OFFSET (128) 49 | 50 | #define MEM_ID_CUSTOM_BASE (16384) 51 | 52 | 53 | 54 | #define MEM_ID_USER_DEFINE_BASE (16384 * 2) 55 | 56 | 57 | #endif /* __HE_MEM_ID_H__ */ 58 | -------------------------------------------------------------------------------- /libgraph/misc/data_helper.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "graph.h" 4 | 5 | Graph* createGraph(const std::string &gName, const std::string &mode) { 6 | Graph* gptr; 7 | std::string dir; 8 | if (mode == "normal") dir = "/graph_data/"; 9 | 10 | else { 11 | std::cout << "unknown execution environment." << std::endl; 12 | exit(0); 13 | } 14 | 15 | if (gName == "dblp") { 16 | gptr = new Graph(dir + "dblp.ungraph.txt"); 17 | } 18 | else if (gName == "youtube") { 19 | gptr = new Graph(dir + "youtube.ungraph.txt"); 20 | } 21 | else if (gName == "lj") { 22 | gptr = new Graph(dir + "lj.ungraph.txt"); 23 | } 24 | else if (gName == "pokec") { 25 | gptr = new Graph(dir + "pokec-relationships.txt"); 26 | } 27 | else if (gName == "wiki-talk") { 28 | gptr = new Graph(dir + "wiki-Talk.txt"); 29 | } 30 | else if (gName == "lj1") { 31 | gptr = new Graph(dir + "LiveJournal1.txt"); 32 | } 33 | else if (gName == "rmat-12-8") { 34 | gptr = new Graph(dir + "rmat-12-8.txt"); 35 | } 36 | else if (gName == "rmat-21-32") { 37 | gptr = new Graph(dir + "rmat-21-32.txt"); 38 | } 39 | else if (gName == "rmat-19-32") { 40 | gptr = new Graph(dir + "rmat-19-32.txt"); 41 | } 42 | else if (gName == "rmat-21-128") { 43 | gptr = new Graph(dir + "rmat-21-128.txt"); 44 | } 45 | else if (gName == "twitter") { 46 | gptr = new Graph(dir + "twitter_rv.txt"); 47 | } 48 | else if (gName == "friendster") { 49 | gptr = new Graph(dir + "friendster.ungraph.txt"); 50 | } 51 | else if (gName == "example") { 52 | gptr = new Graph(dir + "rmat-1k-10k.txt"); 53 | } 54 | else if (gName == "rmat-12-4") { 55 | gptr = new Graph(dir + "rmat-12-4.txt"); 56 | } 57 | else if (gName == "rmat-23-4") { 58 | gptr = new Graph(dir + "rmat-23-4.txt"); 59 | } 60 | else if (gName == "rmat-23-16") { 61 | gptr = new Graph(dir + "rmat-23-16.txt"); 62 | } 63 | else if (gName == "wiki-Talk") { 64 | gptr = new Graph(dir + "soc-wiki-Talk-dir.mtx"); 65 | } 66 | else if (gName == "orkut") { 67 | gptr = new Graph(dir + "soc-orkut-dir.edges"); 68 | } 69 | else if (gName == "twitter-higgs") { 70 | gptr = new Graph(dir + "soc-twitter-higgs.edges"); 71 | } 72 | else if (gName == "twitter-2010") { 73 | gptr = new Graph(dir + "soc-twitter-2010.mtx"); 74 | } 75 | else if (gName == "google") { 76 | gptr = new Graph(dir + "web-Google.mtx"); 77 | } 78 | else if (gName == "mouse-gene") { 79 | gptr = new Graph(dir + "bio-mouse-gene.edges"); 80 | } 81 | else if (gName == "flixster") { 82 | gptr = new Graph(dir + "soc-flixster.mtx"); 83 | } 84 | else if (gName == "rmat-24-16") { 85 | gptr = new Graph(dir + "rmat-24-16.txt"); 86 | } 87 | else { 88 | gptr = new Graph(gName); 89 | } 90 | return gptr; 91 | } 92 | 93 | 94 | double getCurrentTimestamp(void) { 95 | timespec a; 96 | clock_gettime(CLOCK_MONOTONIC, &a); 97 | return (double(a.tv_nsec) * 1.0e-9) + double(a.tv_sec); 98 | } 99 | 100 | -------------------------------------------------------------------------------- /libgraph/misc/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef __GRAPH_H__ 2 | #define __GRAPH_H__ 3 | 4 | #include "global_config.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define HERE do {std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;} while(0) 16 | 17 | class Vertex { 18 | public: 19 | int idx; 20 | int inDeg; 21 | int outDeg; 22 | 23 | std::vector inVid; 24 | std::vector outVid; 25 | 26 | explicit Vertex(int _idx) { 27 | idx = _idx; 28 | } 29 | 30 | ~Vertex(){ 31 | // Nothing is done here. 32 | } 33 | 34 | }; 35 | 36 | class Graph{ 37 | public: 38 | int vertexNum; 39 | int edgeNum; 40 | std::vector vertices; 41 | 42 | Graph(const std::string &fName); 43 | ~Graph(){ 44 | for(int i = 0; i < vertexNum; i++){ 45 | delete vertices[i]; 46 | } 47 | }; 48 | void getRandomStartIndices(std::vector &startIndices); 49 | void getStat(); 50 | 51 | private: 52 | bool isUgraph; 53 | int getMaxIdx(const std::vector> &data); 54 | int getMinIdx(const std::vector> &data); 55 | void loadFile( 56 | const std::string& fName, 57 | std::vector> &data 58 | ); 59 | 60 | }; 61 | 62 | class CSR{ 63 | public: 64 | const int vertexNum; 65 | const int edgeNum; 66 | std::vector rpao; 67 | std::vector ciao; 68 | std::vector rpai; 69 | std::vector ciai; 70 | std::vector eProps; 71 | 72 | // The CSR is constructed based on the simple graph 73 | explicit CSR(const Graph &g); 74 | int save2File(const std::string & fName); 75 | ~CSR(); 76 | }; 77 | 78 | class CSR_BLOCK{ 79 | public: 80 | const int cordx; 81 | const int cordy; 82 | int vertexNum; 83 | int edgeNum; 84 | int srcStart; 85 | int srcEnd; 86 | int sinkStart; 87 | int sinkEnd; 88 | std::vector rpa; 89 | std::vector cia; 90 | std::vector eProps; 91 | explicit CSR_BLOCK(const int _cordx, const int _cordy, CSR* csr); 92 | ~CSR_BLOCK(){}; 93 | }; 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /libgraph/misc/host_graph_csv.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_CSV_HPP__ 2 | #define __HOST_GRAPH_CSV_HPP__ 3 | 4 | #include "he_mem.h" 5 | #include "he_mem_id.h" 6 | 7 | #include "host_graph_data_structure.h" 8 | 9 | extern std::vector allocate_he_mem; 10 | 11 | template 12 | T * load_from_csv(std::string file_name, int he_id, int mem_id) 13 | { 14 | T data; 15 | //DEBUG_PRINTF("id: %d\n", he_id); 16 | std::vector load_buffer; 17 | std::ifstream fhandle(file_name.c_str()); 18 | if (!fhandle.is_open()) { 19 | DEBUG_PRINTF("error: can not open %s \n", file_name.c_str()); 20 | return NULL; 21 | } 22 | int tmp_cnt = 0; 23 | while (fhandle.peek() != EOF ) 24 | { 25 | tmp_cnt ++; 26 | fhandle >> data; 27 | load_buffer.push_back(data); 28 | } 29 | fhandle.close(); 30 | 31 | he_mem_t *p_mem = get_he_mem(he_id); 32 | // create new, if not existing 33 | if (p_mem == NULL) 34 | { 35 | he_mem_t *mem = (he_mem_t *)memalign( 4, sizeof(he_mem_t)); 36 | mem->id = he_id; 37 | mem->name = "load"; 38 | mem->attr = mem_id; 39 | mem->unit_size = sizeof(T) * load_buffer.size(); 40 | mem->size_attr = SIZE_USER_DEFINE; 41 | 42 | he_mem_init(getAccelerator()->context, mem); 43 | allocate_he_mem.push_back(mem); 44 | } 45 | 46 | p_mem = get_he_mem(he_id); 47 | if (p_mem->size < sizeof(T) * load_buffer.size()) 48 | { 49 | DEBUG_PRINTF(" warning mem %d is too small\n", he_id); 50 | } 51 | int load_size = (p_mem->size < sizeof(T) * load_buffer.size()) ? 52 | (p_mem->size / sizeof(T)) : load_buffer.size(); 53 | for (int i = 0; i < load_size; i++) 54 | { 55 | (((T*)p_mem->data)[i]) = load_buffer[i]; 56 | } 57 | int id = he_id; 58 | //DEBUG_PRINTF("size %d \n", p_mem->size); 59 | transfer_data_to_pl(getAccelerator()->context, getAccelerator()->device, &id, 1); 60 | return (T*)p_mem->data; 61 | } 62 | 63 | 64 | 65 | template 66 | int output_init(int he_id, int mem_id, int ref_he_id) 67 | { 68 | he_mem_t *p_ref = get_he_mem(ref_he_id); 69 | if (p_ref == NULL) 70 | { 71 | return -1; 72 | } 73 | he_mem_t *p_mem = get_he_mem(he_id); 74 | // create new, if not existing 75 | if (p_mem == NULL) 76 | { 77 | he_mem_t *mem = (he_mem_t *)memalign( 4, sizeof(he_mem_t)); 78 | mem->id = he_id; 79 | mem->name = "output"; 80 | mem->attr = mem_id; 81 | mem->unit_size = p_ref->size; 82 | mem->size_attr = SIZE_USER_DEFINE; 83 | he_mem_init(getAccelerator()->context, mem); 84 | allocate_he_mem.push_back(mem); 85 | } 86 | 87 | p_mem = get_he_mem(he_id); 88 | int load_size = (p_mem->size) / sizeof(T); 89 | for (int i = 0; i < load_size; i++) 90 | { 91 | (((T*)p_mem->data)[i]) = 0; 92 | } 93 | return 0; 94 | } 95 | 96 | 97 | template 98 | int write_back_csv(std::string file_name, int he_id) 99 | { 100 | std::vector load_buffer; 101 | std::ofstream fhandle(file_name.c_str()); 102 | if (!fhandle.is_open()) { 103 | DEBUG_PRINTF("error: can not open %s \n", file_name.c_str()); 104 | exit(EXIT_FAILURE); 105 | } 106 | 107 | transfer_data_from_pl(getAccelerator()->context, getAccelerator()->device, he_id); 108 | const he_mem_t *p_mem = get_he_mem(he_id); 109 | const T * p_data = (T *)(p_mem->data); 110 | 111 | for (unsigned int i = 0; i < p_mem->size / sizeof(T); i++) 112 | { 113 | fhandle << p_data[i] << std::endl; 114 | } 115 | fhandle.flush(); 116 | fhandle.close(); 117 | 118 | return 0; 119 | } 120 | 121 | #endif /* __HOST_GRAPH_CSV_HPP__ */ 122 | -------------------------------------------------------------------------------- /libgraph/misc/host_graph_mem.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "he_mem_config.h" 4 | 5 | 6 | #define PARTITION_DDR (he_get_attr_by_cu(cuIndex)) 7 | #define CU_DDR (he_get_attr_by_cu(cuIndex)) 8 | 9 | void base_mem_init(cl_context &context) 10 | { 11 | for (unsigned int i = 0; i < ARRAY_SIZE(local_mem); i++) 12 | { 13 | he_mem_init(context, &local_mem[i]); 14 | } 15 | } 16 | 17 | static void gs_mem_init(cl_context &context, gatherScatterDescriptor *gsItem, int cuIndex, void *data) 18 | { 19 | gsItem->prop[0].id = MEM_ID_GS_BASE + cuIndex * MEM_ID_GS_OFFSET; 20 | gsItem->prop[0].name = "cu prop ping"; 21 | gsItem->prop[0].attr = CU_DDR; 22 | gsItem->prop[0].unit_size = sizeof(int); 23 | gsItem->prop[0].size_attr = SIZE_IN_VERTEX; 24 | he_mem_init(context, &gsItem->prop[0]); 25 | memcpy(gsItem->prop[0].data, data, gsItem->prop[0].size); 26 | 27 | gsItem->prop[1].id = MEM_ID_GS_BASE + cuIndex * MEM_ID_GS_OFFSET + 2; 28 | gsItem->prop[1].name = "cu prop pong"; 29 | gsItem->prop[1].attr = CU_DDR; 30 | gsItem->prop[1].unit_size = sizeof(int); 31 | gsItem->prop[1].size_attr = SIZE_IN_VERTEX; 32 | he_mem_init(context, &gsItem->prop[1]); 33 | memcpy(gsItem->prop[1].data, data, gsItem->prop[1].size); 34 | 35 | gsItem->tmpProp.id = MEM_ID_GS_BASE + cuIndex * MEM_ID_GS_OFFSET + 1; 36 | gsItem->tmpProp.name = "cu output tmpProp"; 37 | gsItem->tmpProp.attr = CU_DDR; 38 | gsItem->tmpProp.unit_size = sizeof(int); 39 | gsItem->tmpProp.size_attr = SIZE_IN_VERTEX; 40 | he_mem_init(context, &gsItem->tmpProp); 41 | } 42 | 43 | 44 | void process_mem_init(cl_context &context) 45 | { 46 | int *vertexPushinPropMapped = (int*)get_host_mem_pointer(MEM_ID_PUSHIN_PROP_MAPPED); 47 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 48 | { 49 | gs_mem_init(context, getGatherScatter(i), i, vertexPushinPropMapped); 50 | } 51 | } 52 | 53 | void partition_mem_init(cl_context &context, int blkIndex, int size, int cuIndex) 54 | { 55 | int i = blkIndex; 56 | subPartitionDescriptor *partitionItem = getSubPartition(i); 57 | { 58 | partitionItem->cuIndex = cuIndex; 59 | partitionItem->edgeTail.id = MEM_ID_PARTITION_BASE + i * MEM_ID_PARTITION_OFFSET; 60 | partitionItem->edgeTail.name = "partition edgeTail"; 61 | partitionItem->edgeTail.attr = PARTITION_DDR; 62 | partitionItem->edgeTail.unit_size = size * sizeof(int); 63 | partitionItem->edgeTail.size_attr = SIZE_USER_DEFINE; 64 | he_mem_init(context, &partitionItem->edgeTail); 65 | 66 | partitionItem->edgeHead.id = MEM_ID_PARTITION_BASE + i * MEM_ID_PARTITION_OFFSET + 1; 67 | partitionItem->edgeHead.name = "partition edgeHead"; 68 | partitionItem->edgeHead.attr = PARTITION_DDR; 69 | partitionItem->edgeHead.unit_size = size * sizeof(int); 70 | partitionItem->edgeHead.size_attr = SIZE_USER_DEFINE; 71 | he_mem_init(context, &partitionItem->edgeHead); 72 | 73 | partitionItem->edgeProp.id = MEM_ID_PARTITION_BASE + i * MEM_ID_PARTITION_OFFSET + 2; 74 | partitionItem->edgeProp.name = "partition edgeProp"; 75 | #if 1 76 | partitionItem->edgeProp.attr = PARTITION_DDR; 77 | #else 78 | partitionItem->edgeProp.attr = ATTR_HOST_ONLY; 79 | #endif 80 | partitionItem->edgeProp.unit_size = size * sizeof(int); 81 | partitionItem->edgeProp.size_attr = SIZE_USER_DEFINE; 82 | he_mem_init(context, &partitionItem->edgeProp); 83 | 84 | partitionItem->tmpProp.id = MEM_ID_PARTITION_BASE + i * MEM_ID_PARTITION_OFFSET + 3; 85 | partitionItem->tmpProp.name = "partition tmpProp"; 86 | partitionItem->tmpProp.attr = PARTITION_DDR; 87 | partitionItem->tmpProp.unit_size = MAX_VERTICES_IN_ONE_PARTITION * sizeof(int); 88 | partitionItem->tmpProp.size_attr = SIZE_USER_DEFINE; 89 | he_mem_init(context, &partitionItem->tmpProp); 90 | } 91 | } 92 | 93 | -------------------------------------------------------------------------------- /libgraph/misc/host_graph_mem.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_MEM_H__ 2 | #define __HOST_GRAPH_MEM_H__ 3 | 4 | extern void base_mem_init(cl_context &context); 5 | extern void process_mem_init(cl_context &context); 6 | extern void partition_mem_init(cl_context &context, int blkIndex, int size, int cuIndex); 7 | 8 | #endif /* __HOST_GRAPH_MEM_H__ */ 9 | -------------------------------------------------------------------------------- /libgraph/misc/host_graph_misc_inner.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_MISC_INNER_H__ 2 | #define __HOST_GRAPH_MISC_INNER_H__ 3 | 4 | #define DEFAULT_KERNEL_ID (0) 5 | 6 | #define checkStatus(str) { \ 7 | if (status != 0 || status != CL_SUCCESS) { \ 8 | DEBUG_PRINTF("Error code: %d\n", status); \ 9 | DEBUG_PRINTF("Error: %s\n", str); \ 10 | acceleratorDeinit(); \ 11 | exit(-1); \ 12 | } \ 13 | } 14 | 15 | Graph* createGraph(const std::string &gName, const std::string &mode); 16 | 17 | #endif /* __HOST_GRAPH_MISC_H__ */ 18 | -------------------------------------------------------------------------------- /libgraph/scheduler/host_graph_scheduler.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "host_graph_scheduler.h" 4 | 5 | static graphStaticScheduler scheduler; 6 | 7 | static int partIdTable[MAX_PARTITIONS_NUM]; 8 | 9 | int registerScheduler(graphStaticScheduler * pItem) 10 | { 11 | if (pItem == NULL) 12 | { 13 | return -1; 14 | } 15 | scheduler.init = pItem->init; 16 | scheduler.subPartionScheduler = pItem->subPartionScheduler; 17 | scheduler.partitionScheduler = pItem->partitionScheduler; 18 | return 0; 19 | } 20 | 21 | int getArrangedPartitionID(int step) 22 | { 23 | return partIdTable[step]; 24 | } 25 | 26 | int schedulerInit(void *arg) 27 | { 28 | memset(partIdTable, 0, sizeof(int) * MAX_PARTITIONS_NUM); 29 | if (scheduler.init == NULL) 30 | return 0; 31 | return scheduler.init(arg); 32 | } 33 | 34 | int schedulerSubPartitionArrangement(int partIndex) 35 | { 36 | partitionDescriptor * partition = getPartition(partIndex); 37 | if (scheduler.subPartionScheduler == NULL) 38 | { 39 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 40 | { 41 | partition->finalOrder[i] = i; 42 | } 43 | return 0; 44 | } 45 | return scheduler.subPartionScheduler(partIndex); 46 | } 47 | 48 | 49 | int schedulerPartitionArrangement(int size) 50 | { 51 | if (scheduler.partitionScheduler == NULL) 52 | { 53 | for (int i = 0; i < size; i++) 54 | { 55 | partIdTable[i] = i; 56 | } 57 | return 0; 58 | } 59 | return scheduler.partitionScheduler(partIdTable, size); 60 | } -------------------------------------------------------------------------------- /libgraph/scheduler/host_graph_scheduler.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_SCHEDULER__ 2 | #define __HOST_GRAPH_SCHEDULER__ 3 | 4 | 5 | typedef int (* schedulerInitHanlder)(void *arg); 6 | 7 | typedef int (* subPartitionArrangementHandler)(int partIndex); 8 | 9 | typedef int (* partitionArrangementHandler)(int * table, int size); 10 | 11 | 12 | 13 | typedef struct{ 14 | schedulerInitHanlder init; 15 | subPartitionArrangementHandler subPartionScheduler; 16 | partitionArrangementHandler partitionScheduler; 17 | } graphStaticScheduler; 18 | 19 | int schedulerRegister(void); 20 | 21 | int registerScheduler(graphStaticScheduler * pItem); 22 | 23 | 24 | /* internal phase */ 25 | 26 | int schedulerInit(void *arg); 27 | 28 | int schedulerSubPartitionArrangement(int partIndex); 29 | 30 | int schedulerPartitionArrangement(int size); 31 | 32 | int getArrangedPartitionID(int step); 33 | 34 | 35 | #endif /* __HOST_GRAPH_SCHEDULER__ */ 36 | -------------------------------------------------------------------------------- /libgraph/scheduler/normal/scheduler.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "host_graph_scheduler.h" 4 | 5 | 6 | static int stepCounter = 0; 7 | 8 | int normalInitHanlder(void *arg) 9 | { 10 | stepCounter = 0; 11 | return 0; 12 | } 13 | 14 | int soeSubPartitionArrangementHandler(int partIndex) 15 | { 16 | partitionDescriptor * partition =getPartition(partIndex); 17 | if (stepCounter % 2 == 0) 18 | { 19 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 20 | { 21 | partition->finalOrder[k] = k; 22 | } 23 | } 24 | else 25 | { 26 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 27 | { 28 | partition->finalOrder[k] = SUB_PARTITION_NUM - k - 1; 29 | } 30 | } 31 | 32 | return 0; 33 | } 34 | 35 | int normalSchedulerPartitionArrangement(int * table, int size) 36 | { 37 | for (int i = 0; i < size; i++) 38 | { 39 | table[i] = i; 40 | } 41 | 42 | return 0; 43 | } 44 | 45 | static graphStaticScheduler dut = { 46 | .init = normalInitHanlder, 47 | .subPartionScheduler = normalSubPartitionArrangementHandler, 48 | .partitionScheduler = normalSchedulerPartitionArrangement, 49 | }; 50 | 51 | 52 | 53 | int schedulerRegister(void) 54 | { 55 | return registerScheduler(&dut); 56 | } 57 | -------------------------------------------------------------------------------- /libgraph/scheduler/secondOrderEstimator/scheduler.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_sw.h" 2 | 3 | #include "host_graph_scheduler.h" 4 | 5 | 6 | 7 | static double cuPerformance[SUB_PARTITION_NUM]; 8 | 9 | 10 | 11 | double performanceEstimator(double vertex, double edge) 12 | { 13 | const double p00 = 3.096e+05; 14 | const double p10 = 0.6971; 15 | const double p01 = 0.2648; 16 | const double p11 = -1.076e-08; 17 | const double p02 = 4.187e-08; 18 | double x = edge; 19 | double y = vertex; 20 | double time = p00 + p10 * x + p01 * y + p11 * x * y + p02 * y * y; 21 | return time; 22 | } 23 | 24 | 25 | int soeInitHanlder(void *arg) 26 | { 27 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 28 | { 29 | cuPerformance[i] = 0; 30 | } 31 | return 0; 32 | } 33 | 34 | 35 | int soeSubPartitionArrangementHandler(int partIndex) 36 | { 37 | partitionDescriptor * partition =getPartition(partIndex); 38 | double currentEst[SUB_PARTITION_NUM]; 39 | double currentEstLut[SUB_PARTITION_NUM]; 40 | int reOrderIndexArray[SUB_PARTITION_NUM]; 41 | 42 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 43 | { 44 | unsigned int subPartitionSize = partition->subPartitionSize; 45 | unsigned int bound = subPartitionSize * (i + 1); 46 | 47 | int subTotalEdge = (bound > partition->totalEdge) ? (partition->totalEdge - (subPartitionSize * i)) : (subPartitionSize); 48 | int subTotalVertex = partition->sub[i]->srcVertexEnd - partition->sub[i]->srcVertexStart; 49 | currentEst[i] = performanceEstimator(subTotalVertex, subTotalEdge); 50 | currentEstLut[i] = currentEst[i]; 51 | reOrderIndexArray[i] = i; 52 | partition->finalOrder[i] = i; 53 | } 54 | 55 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 56 | { 57 | for (int j = 0; j < SUB_PARTITION_NUM - k - 1; j++) 58 | { 59 | if (currentEst[j] < currentEst[j + 1]) 60 | { 61 | int tmpId = reOrderIndexArray[j]; 62 | double tmpEst = currentEst[j]; 63 | 64 | reOrderIndexArray[j] = reOrderIndexArray[j + 1]; 65 | reOrderIndexArray[j + 1] = tmpId; 66 | 67 | currentEst[j] = currentEst[j + 1]; 68 | currentEst[j + 1] = tmpEst; 69 | } 70 | } 71 | } 72 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 73 | { 74 | DEBUG_PRINTF("[EST]: %d is expected to exe in %lfms\n", reOrderIndexArray[k], currentEst[k] / 1000000.0); 75 | } 76 | int tmpMap[SUB_PARTITION_NUM]; 77 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 78 | { 79 | tmpMap[k] = 0; 80 | } 81 | 82 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 83 | { 84 | double maxPerf = -1; 85 | int maxIndex = SUB_PARTITION_NUM; 86 | for (int j = 0; j < SUB_PARTITION_NUM; j++) 87 | { 88 | if (tmpMap[j] == 0) 89 | { 90 | if (maxPerf < cuPerformance[j]) 91 | { 92 | maxPerf = cuPerformance[j]; 93 | maxIndex = j; 94 | } 95 | } 96 | } 97 | tmpMap[maxIndex] = 1; 98 | partition->finalOrder[maxIndex] = reOrderIndexArray[SUB_PARTITION_NUM - k - 1]; 99 | } 100 | for (int k = 0; k < SUB_PARTITION_NUM; k++) 101 | { 102 | cuPerformance[k] += currentEstLut[partition->finalOrder[k]]; 103 | DEBUG_PRINTF("[EST]: finalOrder %d total exe: %lfms\n", partition->finalOrder[k], cuPerformance[k] / 1000000.0); 104 | } 105 | 106 | return 0; 107 | } 108 | 109 | int soeSchedulerPartitionArrangement(int * table, int size) 110 | { 111 | for (int i = 0; i < size; i++) 112 | { 113 | table[i] = i; 114 | } 115 | for (int i = 0; i < size; i++) 116 | { 117 | for (int j = 0; j < size - i - 1; j++) 118 | { 119 | if (getPartition(table[j])->totalEdge < getPartition(table[j + 1])->totalEdge) 120 | { 121 | int tmpId = table[j]; 122 | table[j] = table[j + 1]; 123 | table[j + 1] = tmpId; 124 | } 125 | } 126 | } 127 | return 0; 128 | } 129 | 130 | static graphStaticScheduler dut = { 131 | .init = soeInitHanlder, 132 | .subPartionScheduler = soeSubPartitionArrangementHandler, 133 | .partitionScheduler = soeSchedulerPartitionArrangement, 134 | }; 135 | 136 | 137 | 138 | int schedulerRegister(void) 139 | { 140 | return registerScheduler(&dut); 141 | } 142 | -------------------------------------------------------------------------------- /libgraph/test/test_col.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | 3 | #define SIZE_BY_INT 16 4 | 5 | 6 | #define INVALID_FLAG (0xffffffff) 7 | 8 | int main(int argc, char **argv) 9 | { 10 | #if 0 11 | int test_array[16] = { 12 | 12, 13 | 12, 14 | 13, 15 | 45, 16 | 45, 17 | 46, 18 | 57, 19 | 59, 20 | 59, 21 | 59, 22 | 59, 23 | 71, 24 | 72, 25 | 73, 26 | 74, 27 | 75 28 | }; 29 | #else 30 | int test_array[16] = { 31 | 12, 32 | 12, 33 | 12, 34 | 12, 35 | 12, 36 | 12, 37 | 12, 38 | 12, 39 | 12, 40 | 12, 41 | 12, 42 | 12, 43 | 12, 44 | 12, 45 | 12, 46 | 75 47 | }; 48 | #endif 49 | 50 | int ori[SIZE_BY_INT]; 51 | int array[SIZE_BY_INT][SIZE_BY_INT]; 52 | int tmp_array[SIZE_BY_INT][SIZE_BY_INT]; 53 | 54 | int mask[SIZE_BY_INT]; 55 | 56 | 57 | 58 | 59 | 60 | for (int j = 0; j < SIZE_BY_INT; j++) 61 | { 62 | ori[j] = test_array[j]; 63 | 64 | } 65 | 66 | array[0][0] = ori[0]; 67 | mask[0] = 1; 68 | for (int j = 1; j < SIZE_BY_INT; j++) 69 | { 70 | if (ori[j - 1] != ori[j]) 71 | { 72 | array[0][j] = ori[j]; 73 | mask[j] = 1; 74 | } 75 | else 76 | { 77 | array[0][j] = INVALID_FLAG; 78 | mask[j] = 0; 79 | } 80 | } 81 | int level_1_sum[8]; 82 | 83 | for (int j = 0; j < 8; j++) 84 | { 85 | level_1_sum[j] = mask[2 * j] + mask[2 * j + 1]; 86 | } 87 | 88 | int level_2_sum[4]; 89 | 90 | for (int j = 0; j < 4; j++) 91 | { 92 | level_2_sum[j] = level_1_sum[2 * j] + level_1_sum[2 * j + 1]; 93 | } 94 | 95 | int level_3_sum[2]; 96 | 97 | for (int j = 0; j < 2; j++) 98 | { 99 | level_3_sum[j] = level_2_sum[2 * j] + level_2_sum[2 * j + 1]; 100 | } 101 | 102 | int result = level_3_sum[0] + level_3_sum[1]; 103 | 104 | 105 | 106 | for (int j = 0; j < SIZE_BY_INT; j++) 107 | { 108 | printf("%d %d \n", test_array[j], array[0][j]); 109 | } 110 | printf("----------------------\n"); 111 | #if 1 112 | for (int i = 1; i < 16; i++) 113 | { 114 | //#pragma HLS PIPELINE 115 | { 116 | //#pragma HLS latency min=1 max=1 117 | for (int j = 0; j < SIZE_BY_INT - 1 ; j++) 118 | { 119 | //#pragma HLS UNROLL 120 | if (array[i - 1][j] == INVALID_FLAG) 121 | { 122 | tmp_array[i][j] = array[i - 1][j + 1]; 123 | } 124 | else 125 | { 126 | tmp_array[i][j] = array[i - 1][j]; 127 | } 128 | 129 | } 130 | if (array[i - 1][SIZE_BY_INT - 1] == INVALID_FLAG) 131 | { 132 | tmp_array[i][SIZE_BY_INT - 1] = INVALID_FLAG; 133 | } 134 | else 135 | { 136 | tmp_array[i][SIZE_BY_INT - 1] = array[i - 1][SIZE_BY_INT - 1]; 137 | } 138 | } 139 | { 140 | //#pragma HLS latency min=1 max=1 141 | array[i][0] = tmp_array[i][0]; 142 | for (int j = 1; j < SIZE_BY_INT; j++) 143 | { 144 | 145 | //#pragma HLS UNROLL 146 | if (tmp_array[i][j] == tmp_array[i][j - 1]) 147 | { 148 | array[i][j] = INVALID_FLAG; 149 | } 150 | else 151 | { 152 | array[i][j] = tmp_array[i][j]; 153 | } 154 | 155 | } 156 | } 157 | for (int k = 0; k < SIZE_BY_INT ; k++) 158 | printf("%d \n", array[i][k]); 159 | printf("----------------------\n"); 160 | } 161 | 162 | printf("size %d\n", result ); 163 | } 164 | #endif 165 | -------------------------------------------------------------------------------- /libgraph/verification/host_graph_verification.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_SW_VERIFICATION_H__ 2 | #define __HOST_GRAPH_SW_VERIFICATION_H__ 3 | 4 | 5 | int acceleratorProfile (int superStep, int runCounter, graphInfo *info, double exeTime); 6 | 7 | int acceleratorCModelDataPreprocess(graphInfo *info); 8 | 9 | int acceleratorCModelSuperStep(int superStep, graphInfo *info); 10 | 11 | 12 | #endif /* __HOST_GRAPH_SW_VERIFICATION_H__ */ 13 | -------------------------------------------------------------------------------- /libgraph/verification/host_graph_verification_apply.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "host_graph_verification_inner.h" 3 | 4 | #include "global_config.h" 5 | #include "fpga_application.h" 6 | 7 | 8 | #if CUSTOMIZE_APPLY == 0 9 | 10 | 11 | 12 | 13 | void partitionApplyCModel( 14 | cl_context &context, 15 | cl_device_id &device, 16 | int superStep, 17 | int partId, 18 | unsigned int applyArg 19 | ) 20 | { 21 | int currentPropId = superStep % 2; 22 | int resultPropId = (superStep + 1) % 2; 23 | unsigned int applyArgReg = applyArg; 24 | 25 | unsigned int infoArrayVerify[APPLY_REF_ARRAY_SIZE]; 26 | 27 | for (int i = 0; i < APPLY_REF_ARRAY_SIZE; i++) 28 | { 29 | infoArrayVerify[i] = 0; 30 | } 31 | 32 | 33 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 34 | { 35 | transfer_data_from_pl(context, device, getSubPartition(partId * SUB_PARTITION_NUM + i)->tmpProp.id); 36 | 37 | } 38 | prop_t * pCuData[SUB_PARTITION_NUM]; 39 | prop_t * updateVerify = (prop_t*)get_host_mem_pointer(MEM_ID_VERTEX_PROP_VERIFY); 40 | prop_t * outDeg = (prop_t*)get_host_mem_pointer(MEM_ID_OUT_DEG); 41 | 42 | transfer_data_from_pl(context, device, getGatherScatter(0)->prop[currentPropId].id); 43 | prop_t * propValue = (prop_t *)get_host_mem_pointer(getGatherScatter(0)->prop[currentPropId].id); 44 | 45 | subPartitionDescriptor *p_partition = getSubPartition(partId * SUB_PARTITION_NUM); 46 | 47 | for (int i = 0; i < SUB_PARTITION_NUM; i++) 48 | { 49 | pCuData[i] = (prop_t*)get_host_mem_pointer(getSubPartition(partId * SUB_PARTITION_NUM + i)->tmpProp.id); 50 | } 51 | 52 | volatile unsigned int partitionVertexNum = ((p_partition->dstVertexEnd - p_partition->dstVertexStart) 53 | / (ALIGN_SIZE) + 1 ) * (ALIGN_SIZE); 54 | DEBUG_PRINTF("[DUMP] partitionVertexNum %d\n", partitionVertexNum); 55 | 56 | int offset = getSubPartition(partId * SUB_PARTITION_NUM)->dstVertexStart; 57 | 58 | for (int i = 0; i < MAX_VERTICES_IN_ONE_PARTITION; i++) 59 | { 60 | prop_t mergeData = 0; 61 | for (int j = 0 ; j < SUB_PARTITION_NUM; j++) 62 | { 63 | prop_t* pLocal = pCuData[j]; 64 | 65 | mergeData = PROP_COMPUTE_STAGE4(mergeData, pLocal[i]); 66 | if (DATA_DUMP) 67 | { 68 | DEBUG_PRINTF("[DUMP-0] %d 0x%08x 0x%08x \n", i, 69 | pLocal[i], mergeData); 70 | } 71 | } 72 | 73 | prop_t tProp = mergeData; 74 | updateVerify[i] = applyFunc(tProp, propValue[i + offset], outDeg[i + offset], infoArrayVerify, *(unsigned int *)&applyArgReg); 75 | #if 0 76 | int dump_flag = 0; 77 | for (int k = 0 ; k < APPLY_REF_ARRAY_SIZE; k++) 78 | { 79 | if (infoArrayVerify[k] != 0) 80 | { 81 | dump_flag = 1; 82 | } 83 | } 84 | if (dump_flag) 85 | { 86 | //if (i == 18) 87 | { 88 | DEBUG_PRINTF("[DUMP-0] %d 0x%08x 0x%08x 0x%08x \n", i, 89 | tProp, propValue[i + offset], updateVerify[i]); 90 | } 91 | 92 | } 93 | #endif 94 | 95 | } 96 | 97 | int error_count = 0; 98 | transfer_data_from_pl(context, device, getGatherScatter(0)->prop[resultPropId].id); 99 | prop_t* hwUpdate = (prop_t *)get_host_mem_pointer(getGatherScatter(0)->prop[resultPropId].id); 100 | for (unsigned int i = 0; i < p_partition->dstVertexEnd - p_partition->dstVertexStart + 1; i++) 101 | { 102 | if (updateVerify[i] != hwUpdate[i + offset]) 103 | { 104 | error_count ++; 105 | if (error_count < 50) 106 | { 107 | 108 | DEBUG_PRINTF("apply error %d 0x%08x hw: 0x%08x diff 0x%08x !!!!\n", i, 109 | updateVerify[i], 110 | hwUpdate[i + offset], 111 | updateVerify[i] - hwUpdate[i + offset]); 112 | } 113 | } 114 | if (DATA_DUMP) 115 | { 116 | DEBUG_PRINTF("[DUMP] %d 0x%08x 0x%08x diff 0x%08x \n", i, 117 | updateVerify[i], 118 | hwUpdate[i + offset], 119 | updateVerify[i] - hwUpdate[i + offset]); 120 | } 121 | } 122 | DEBUG_PRINTF("[RES] apply error_count %d \n", error_count); 123 | } 124 | 125 | #endif -------------------------------------------------------------------------------- /libgraph/verification/host_graph_verification_gs.cpp: -------------------------------------------------------------------------------- 1 | #include "host_graph_verification_inner.h" 2 | 3 | #include "global_config.h" 4 | #include "fpga_application.h" 5 | 6 | 7 | 8 | 9 | void partitionGatherScatterCModel( 10 | cl_context &context, 11 | cl_device_id &device, 12 | int superStep, 13 | int cuIndex, 14 | subPartitionDescriptor *subPartitions 15 | ) 16 | { 17 | int currentPropId = superStep % 2; 18 | //int resultPropId = (superStep + 1) % 2; 19 | 20 | 21 | int *edgesTailArray = (int *)get_host_mem_pointer(subPartitions->edgeTail.id); 22 | int *edgesHeadArray = (int *)get_host_mem_pointer(subPartitions->edgeHead.id); 23 | 24 | transfer_data_from_pl(context, device, getGatherScatter(0)->prop[currentPropId].id); 25 | prop_t *propValue = (prop_t*)get_host_mem_pointer(getGatherScatter(0)->prop[currentPropId].id); 26 | prop_t *tmpVertexPropVerify = (prop_t*)get_host_mem_pointer(MEM_ID_TMP_VERTEX_VERIFY); 27 | prop_t *edgeProp = (prop_t*)get_host_mem_pointer(subPartitions->edgeProp.id); 28 | clear_host_mem(MEM_ID_TMP_VERTEX_VERIFY); 29 | DEBUG_PRINTF("partition cmodel verify:\n"); 30 | 31 | for (unsigned int i = 0; i < subPartitions->listEnd; i++) 32 | { 33 | prop_t update = 0; 34 | int address = (edgesTailArray[i] > ((int)subPartitions->dstVertexEnd)) ? subPartitions->dstVertexEnd : edgesTailArray[i]; 35 | if (IS_ACTIVE_VERTEX(propValue[edgesHeadArray[i]])) 36 | { 37 | update = PROP_COMPUTE_STAGE0(propValue[edgesHeadArray[i]]); 38 | #if HAVE_EDGE_PROP 39 | update = PROP_COMPUTE_STAGE1(update, edgeProp[i]); 40 | #else 41 | update = PROP_COMPUTE_STAGE1(update, 0); 42 | #endif 43 | tmpVertexPropVerify[address] = PROP_COMPUTE_STAGE3(tmpVertexPropVerify[address], update); 44 | } 45 | if (DATA_DUMP) 46 | { 47 | DEBUG_PRINTF("[DUMP] %d 0x%08x-->0x%08x 0x%08x with 0x%08x \n", i, 48 | edgesTailArray[i], 49 | edgesHeadArray[i], 50 | propValue[edgesHeadArray[i]], 51 | edgeProp[i]); 52 | DEBUG_PRINTF("[DUMP-2] %d 0x%08x 0x%08x [%d] 0x%08x\n", i, 53 | propValue[i], update, address, tmpVertexPropVerify[address]); 54 | } 55 | 56 | #ifdef PROBE_VERTEX 57 | if (edgesTailArray[i] == PROBE_VERTEX) 58 | { 59 | DEBUG_PRINTF("probe (%d): %x %x %x %x \n", edgesTailArray[i], i, edgesHeadArray[i], propValue[edgesHeadArray[i]], tmpVertexPropVerify[edgesTailArray[i]]); 60 | } 61 | #endif 62 | } 63 | transfer_data_from_pl(context, device, subPartitions->tmpProp.id); 64 | 65 | 66 | prop_t *tmpVertexProp = (prop_t*)get_host_mem_pointer(subPartitions->tmpProp.id); 67 | 68 | int error_count = 0; 69 | int total_count = 0; 70 | for (unsigned int i = subPartitions->dstVertexStart; i < subPartitions->dstVertexEnd; i ++) { 71 | if (tmpVertexPropVerify[i] != 0) 72 | { 73 | total_count ++; 74 | } 75 | if (tmpVertexPropVerify[i] != tmpVertexProp[i - subPartitions->dstVertexStart]) 76 | { 77 | error_count++; 78 | 79 | 80 | #ifndef SW_DEBUG 81 | if (error_count <= 100) 82 | #endif 83 | { 84 | DEBUG_PRINTF("gs error %d 0x%08x hw: 0x%08x diff 0x%08x !!!!\n", i, 85 | tmpVertexPropVerify[i], 86 | tmpVertexProp[i - subPartitions->dstVertexStart], 87 | tmpVertexPropVerify[i] - tmpVertexProp[i - subPartitions->dstVertexStart]); 88 | } 89 | 90 | } 91 | else 92 | { 93 | if (DATA_DUMP) 94 | { 95 | DEBUG_PRINTF("[DUMP] gs %d 0x%08x hw: 0x%08x\n", i, tmpVertexPropVerify[i], tmpVertexProp[i - subPartitions->dstVertexStart]); 96 | } 97 | } 98 | } 99 | DEBUG_PRINTF("[RES] error_count %d in size %d/ %d\n", error_count, total_count, 100 | subPartitions->dstVertexEnd - subPartitions->dstVertexStart + 1 ); 101 | } 102 | 103 | -------------------------------------------------------------------------------- /libgraph/verification/host_graph_verification_inner.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_GRAPH_SW_VERIFICATION_INNER_H__ 2 | #define __HOST_GRAPH_SW_VERIFICATION_INNER_H__ 3 | 4 | #include "host_graph_sw.h" 5 | 6 | 7 | //#define SW_DEBUG 8 | 9 | #define DEBUG_DUMP_VERTEX_SIZE (1024) 10 | 11 | #define DATA_DUMP 0//(i < 50) 12 | 13 | //#define PROBE_VERTEX (14562) 14 | 15 | 16 | void partitionGatherScatterCModel( 17 | cl_context &context, 18 | cl_device_id &device, 19 | int superStep, 20 | int cuIndex, 21 | subPartitionDescriptor *subPartitions 22 | ); 23 | 24 | void partitionApplyCModel( 25 | cl_context &context, 26 | cl_device_id &device, 27 | int superStep, 28 | int partId, 29 | unsigned int applyArg 30 | ); 31 | 32 | 33 | #endif /* __HOST_GRAPH_SW_VERIFICATION_H__ */ 34 | -------------------------------------------------------------------------------- /utils/automation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | make -f ./automation/auto_gen_parameters.mk app=$1 auto_para 3 | make -f ./automation/auto_gen_code.mk app=$1 code_gen 4 | make -f ./automation/auto_gen_makefile.mk app=$1 makefile_gen 5 | -------------------------------------------------------------------------------- /utils/bitstream.mk: -------------------------------------------------------------------------------- 1 | $(XCLBIN)/graph_fpga.$(TARGET).$(DSA).xclbin: $(BINARY_CONTAINER_OBJS) 2 | $(XOCC) $(CLFLAGS) -l $(LDCLFLAGS) $(BINARY_LINK_OBJS) -o'$@' $(+) 3 | 4 | # Building Host 5 | $(EXECUTABLE): $(HOST_SRCS) cleanexe 6 | mkdir -p $(XCLBIN) 7 | $(CXX) $(CXXFLAGS) $(HOST_SRCS) -o '$@' $(LDFLAGS) 8 | 9 | emconfig:$(EMCONFIG_DIR)/emconfig.json 10 | $(EMCONFIG_DIR)/emconfig.json: 11 | emconfigutil --platform $(DEVICE) --od $(EMCONFIG_DIR) 12 | 13 | .PHONY: hwemuprepare 14 | hwemuprepare: 15 | ifeq ($(TARGET),$(filter $(TARGET), hw_emu)) 16 | @echo "prepare for hw_emu" 17 | $(CP) $(EMCONFIG_DIR)/emconfig.json . 18 | $(CP) $(UTILS_PATH)/sdaccel.ini . 19 | source $(UTILS_PATH)/hw_emu.sh 20 | else 21 | @echo "prepare for hw" 22 | endif 23 | 24 | check: all 25 | ifeq ($(TARGET),$(filter $(TARGET),sw_emu hw_emu)) 26 | $(CP) $(EMCONFIG_DIR)/emconfig.json . 27 | XCL_EMULATION_MODE=$(TARGET) ./$(EXECUTABLE) 28 | else 29 | ./$(EXECUTABLE) 30 | endif 31 | sdx_analyze profile -i sdaccel_profile_summary.csv -f html 32 | -------------------------------------------------------------------------------- /utils/clean.mk: -------------------------------------------------------------------------------- 1 | cleanexe: 2 | -$(RMDIR) $(EXECUTABLE) 3 | clean: 4 | -$(RMDIR) $(EXECUTABLE) $(XCLBIN)/{*sw_emu*,*hw_emu*} 5 | -$(RMDIR) sdaccel_* TempConfig system_estimate.xtxt *.rpt 6 | -$(RMDIR) src/*.ll _xocc_* .Xil emconfig.json dltmp* xmltmp* *.log *.jou *.wcfg *.wdb 7 | -$(RMDIR) .Xil 8 | -$(RMDIR) *.zip 9 | -$(RMDIR) *.str 10 | -$(RMDIR) $(XCLBIN) 11 | -$(RMDIR) ./_x 12 | -$(RMDIR) ./membership.out 13 | -$(RMDIR) host_graph_fpga* 14 | -$(RMDIR) xclbin* 15 | -$(RMDIR) .run 16 | -$(RMDIR) tmp_fpga_top 17 | -$(RMDIR) tmp_para 18 | -$(RMDIR) para_gen 19 | -$(RMDIR) code_gen 20 | -$(RMDIR) makefile_gen 21 | -------------------------------------------------------------------------------- /utils/help.mk: -------------------------------------------------------------------------------- 1 | .PHONY: help 2 | 3 | help:: 4 | $(ECHO) "Makefile Usage:" 5 | $(ECHO) " make all TARGET= DEVICE=" 6 | $(ECHO) " Command to generate the design for specified Target and Device." 7 | $(ECHO) "" 8 | $(ECHO) " make clean " 9 | $(ECHO) " Command to remove the generated non-hardware files." 10 | $(ECHO) "" 11 | $(ECHO) " make cleanall" 12 | $(ECHO) " Command to remove all the generated files." 13 | $(ECHO) "" 14 | $(ECHO) " make check TARGET= DEVICE=" 15 | $(ECHO) " Command to run application in emulation." 16 | $(ECHO) "" 17 | -------------------------------------------------------------------------------- /utils/hw_emu.sh: -------------------------------------------------------------------------------- 1 | export XCL_EMULATION_MODE=hw_emu -------------------------------------------------------------------------------- /utils/main.mk: -------------------------------------------------------------------------------- 1 | $(shell ./utils/automation.sh $(app) > generator.log) 2 | 3 | SHELL := /bin/bash 4 | TARGET := $(TARGETS) 5 | DEVICE := $(DEVICES) 6 | 7 | COMMON_REPO = ./ 8 | ABS_COMMON_REPO = $(shell readlink -f $(COMMON_REPO)) 9 | UTILS_PATH = ./utils 10 | 11 | 12 | 13 | .PHONY:all clean exe hwemuprepare $(EXECUTABLE) emconfig 14 | all: precheck 15 | exe: precheck 16 | clean: precheck 17 | hwemuprepare: precheck 18 | 19 | precheck: 20 | ifndef app 21 | $(error app is undefined) 22 | else 23 | APP = $(app) 24 | APPCONFIG = ./application/$(APP) 25 | 26 | include $(UTILS_PATH)/help.mk 27 | include $(UTILS_PATH)/utils.mk 28 | 29 | include tmp_para/para.mk 30 | 31 | 32 | include $(APPCONFIG)/config.mk 33 | include $(APPCONFIG)/build.mk 34 | include ./application/common.mk 35 | 36 | include tmp_fpga_top/gs_kernel.mk 37 | include tmp_fpga_top/apply_kernel_1.mk 38 | 39 | include $(UTILS_PATH)/bitstream.mk 40 | include $(UTILS_PATH)/clean.mk 41 | 42 | 43 | all: code_gen $(EXECUTABLE) $(BINARY_CONTAINERS) emconfig 44 | 45 | 46 | exe: $(EXECUTABLE) 47 | 48 | 49 | endif 50 | -------------------------------------------------------------------------------- /utils/opencl.mk: -------------------------------------------------------------------------------- 1 | # Definition of include file locations 2 | OPENCL_INCLUDE:= $(XILINX_XRT)/include/ 3 | 4 | opencl_CXXFLAGS=-I$(OPENCL_INCLUDE) 5 | 6 | OPENCL_LIB:=$(XILINX_XRT)/lib/ 7 | opencl_LDFLAGS=-L$(OPENCL_LIB) -lOpenCL -lpthread 8 | -------------------------------------------------------------------------------- /utils/report_usage.tcl: -------------------------------------------------------------------------------- 1 | open_project [lindex $argv 0] 2 | open_run impl_1 3 | report_utilization -file util_full.report 4 | report_utilization -hierarchical -hierarchical_depth 3 -file util_slr.report 5 | report_power -file power.report -------------------------------------------------------------------------------- /utils/resetfpga.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | xbutil reset -h 3 | xbutil program -p /data/graph_fpga.xclbin 4 | 5 | xbutil dmatest 6 | 7 | make exe -------------------------------------------------------------------------------- /utils/sdaccel.ini: -------------------------------------------------------------------------------- 1 | 2 | [Debug] 3 | profile=true 4 | timeline_trace=true 5 | data_transfer_trace=fine 6 | 7 | 8 | #Start of Runtime group 9 | [Runtime] 10 | runtime_log = console 11 | 12 | 13 | [Emulation] 14 | launch_waveform = gui -------------------------------------------------------------------------------- /utils/tool_compile_check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | make app=pr exe 3 | make app=bfs exe 4 | make app=sssp exe 5 | make app=ar exe 6 | make app=wcc exe 7 | make app=spmv exe 8 | make app=cc exe -------------------------------------------------------------------------------- /utils/tool_grep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # $1 xclbin 4 | # $2 app 5 | DATASET=( 'rmat-19-32.txt' \ 6 | 'rmat-21-32.txt' \ 7 | 'rmat-24-16.txt' \ 8 | 'bio-mouse-gene.edges' \ 9 | 'web-Google.mtx'\ 10 | 'wiki-Talk.txt'\ 11 | 'amazon-2008.mtx' \ 12 | 'web-hudong.edges' \ 13 | 'web-baidu-baike.edges' \ 14 | 'wiki-topcats.mtx' \ 15 | 'soc-flickr-und.edges' \ 16 | 'pokec-relationships.txt' \ 17 | 'LiveJournal1.txt' \ 18 | 'soc-twitter-2010.mtx' \ 19 | 'wikipedia-20070206.mtx' \ 20 | 'ca-hollywood-2009.mtx' \ 21 | 'graph500-scale23-ef16_adj.edges' \ 22 | 'graph500-scale24-ef16_adj.edges' \ 23 | 'graph500-scale25-ef16_adj.edges' \ 24 | ) 25 | 26 | for dataset in "${DATASET[@]}" 27 | do 28 | echo "$dataset" 29 | cat $1/$2_$dataset.log | grep $3 30 | 31 | done -------------------------------------------------------------------------------- /utils/tool_profile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sdx_analyze profile sdaccel_profile_summary.csv 3 | sdx_analyze profile -i sdaccel_profile_summary.csv -f html -------------------------------------------------------------------------------- /utils/tool_rebuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source /xtra/env/sdx_env.sh 4 | make cleanall 5 | 6 | make all -j 7 | 8 | -------------------------------------------------------------------------------- /utils/tool_release.sh: -------------------------------------------------------------------------------- 1 | date_str=`date +%Y%m%d%T` 2 | 3 | 4 | release_path=release_${date_str} 5 | mkdir -p ${release_path} 6 | 7 | cp -r ./_x/link/int ./${release_path} 8 | cp -r ./_x/reports ./${release_path} 9 | cp _x/link/vivado/vivado.log ./${release_path} 10 | cp host_graph_fpga ./${release_path} 11 | 12 | 13 | git status > ${release_path}/git_status.log 14 | git diff > ${release_path}/code_diff.diff 15 | git diff --cached > ${release_path}/code_cached.diff 16 | git log --graph -10 > ${release_path}/git_log.log 17 | git show HEAD > ${release_path}/git_show.diff 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /utils/tool_report.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | prj_file="" 4 | if [ -e "_x/link/vivado/prj/prj.xpr" ] 5 | then 6 | prj_file=" _x/link/vivado/prj/prj.xpr" 7 | fi 8 | 9 | if [ -e "_x/link/vivado/vpl/prj/prj.xpr" ] 10 | then 11 | prj_file=" _x/link/vivado/vpl/prj/prj.xpr" 12 | fi 13 | 14 | 15 | 16 | if [ -z ${prj_file} ]; then 17 | echo "no prj file" 18 | else 19 | vivado -mode batch -source utils/report_usage.tcl -tclargs ${prj_file} 20 | fi 21 | -------------------------------------------------------------------------------- /utils/tool_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tmp_string=`date +%Y%m%d%T` 4 | date_str=${tmp_string//:} 5 | log_path=./test_log_$2_${date_str} 6 | 7 | mkdir -p ${log_path} 8 | 9 | 10 | if [ $# -lt 2 ]; 11 | then 12 | echo "[FAILD] missing config for start test" 13 | echo "eg. ------>" 14 | echo "./tool_test.sh xx.xclbin cc " 15 | exit -1 16 | fi 17 | 18 | 19 | # $1 xclbin 20 | # $2 app 21 | 22 | DATASET=( 'rmat-19-32.txt' \ 23 | 'rmat-21-32.txt' \ 24 | 'rmat-24-16.txt' \ 25 | 'bio-mouse-gene.edges' \ 26 | 'web-Google.mtx'\ 27 | 'wiki-Talk.txt'\ 28 | 'amazon-2008.mtx' \ 29 | 'web-hudong.edges' \ 30 | 'web-baidu-baike.edges' \ 31 | 'wiki-topcats.mtx' \ 32 | 'soc-flickr-und.edges' \ 33 | 'pokec-relationships.txt' \ 34 | 'LiveJournal1.txt' \ 35 | 'wikipedia-20070206.mtx' \ 36 | 'ca-hollywood-2009.mtx' \ 37 | 'graph500-scale23-ef16_adj.edges' \ 38 | 'soc-twitter-2010.mtx' \ 39 | 'graph500-scale24-ef16_adj.edges' \ 40 | 'graph500-scale25-ef16_adj.edges' \ 41 | ) 42 | 43 | #make app=$2 exe 44 | 45 | for dataset in "${DATASET[@]}" 46 | do 47 | echo "/graph_data/$dataset" 48 | 49 | ./host_graph_fpga $1 /graph_data/$dataset > ./${log_path}/$2_$dataset.log 50 | 51 | done 52 | 53 | 54 | -------------------------------------------------------------------------------- /utils/tool_test_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./utils/tool_test.sh $1 pr 4 | ./utils/tool_test.sh $1 bfs 5 | ./utils/tool_test.sh $1 sssp 6 | ./utils/tool_test.sh $1 ar 7 | ./utils/tool_test.sh $1 wcc 8 | ./utils/tool_test.sh $1 spmv 9 | ./utils/tool_test.sh $1 cc -------------------------------------------------------------------------------- /utils/tool_test_app.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | tmp_string=`date +%Y%m%d%T` 4 | date_str=${tmp_string//:} 5 | log_path=test_log_${date_str}_$2 6 | 7 | mkdir -p ${log_path} 8 | 9 | 10 | if [ $# -lt 2 ]; 11 | then 12 | echo "[FAILD] missing config for start test" 13 | echo "eg. ------>" 14 | echo "./tool_test.sh xx.xclbin cc " 15 | exit -1 16 | fi 17 | 18 | 19 | # $1 xclbin 20 | # $2 app 21 | 22 | DATASET=( 'rmat-19-32.txt' \ 23 | 'graph500-scale25-ef16_adj.edges'\ 24 | ) 25 | 26 | make app=$2 exe 27 | 28 | for dataset in "${DATASET[@]}" 29 | do 30 | echo "/graph_data/$dataset" 31 | 32 | ./host_graph_fpga_$2 ~/Dropbox/Experiment/$1/$2_$1.xclbin /graph_data/$dataset > ./${log_path}/$2_$dataset.log 33 | 34 | done 35 | 36 | 37 | -------------------------------------------------------------------------------- /utils/tool_timing.sh: -------------------------------------------------------------------------------- 1 | log_file="" 2 | if [ -e "_x/link/vivado/vivado.log" ] 3 | then 4 | log_file="_x/link/vivado/vivado.log" 5 | fi 6 | 7 | if [ -e "_x/link/vivado/vpl/vivado.log" ] 8 | then 9 | log_file="_x/link/vivado/vpl/vivado.log" 10 | fi 11 | 12 | 13 | 14 | if [ -z ${log_file} ]; then 15 | echo "no log file" 16 | else 17 | cat ${log_file} | grep TNS 18 | cat ${log_file} | grep "The frequency is being automatically changed to" 19 | cat ${log_file} | grep scaled 20 | 21 | fi 22 | -------------------------------------------------------------------------------- /utils/utils.mk: -------------------------------------------------------------------------------- 1 | #+------------------------------------------------------------------------------- 2 | # The following parameters are assigned with default values. These parameters can 3 | # be overridden through the make command line 4 | #+------------------------------------------------------------------------------- 5 | 6 | PROFILE := no 7 | 8 | #Generates profile summary report 9 | ifeq ($(PROFILE), yes) 10 | LDCLFLAGS += --profile_kernel data:all:all:all 11 | endif 12 | 13 | DEBUG := no 14 | 15 | #Generates debug summary report 16 | ifeq ($(DEBUG), yes) 17 | CLFLAGS += --dk protocol:all:all:all 18 | endif 19 | 20 | #Checks for XILINX_SDX 21 | ifndef XILINX_SDX 22 | $(error XILINX_SDX variable is not set, please set correctly and rerun) 23 | endif 24 | 25 | #Checks for XILINX_XRT 26 | check-xrt: 27 | ifndef XILINX_XRT 28 | $(error XILINX_XRT variable is not set, please set correctly and rerun) 29 | endif 30 | 31 | check-devices: 32 | ifndef DEVICE 33 | $(error DEVICE not set. Please set the DEVICE properly and rerun. Run "make help" for more details.) 34 | endif 35 | 36 | check-aws_repo: 37 | ifndef SDACCEL_DIR 38 | $(error SDACCEL_DIR not set. Please set it properly and rerun. Run "make help" for more details.) 39 | endif 40 | 41 | # sanitize_dsa - create a filesystem friendly name from dsa name 42 | # $(1) - name of dsa 43 | COLON=: 44 | PERIOD=. 45 | UNDERSCORE=_ 46 | sanitize_dsa = $(strip $(subst $(PERIOD),$(UNDERSCORE),$(subst $(COLON),$(UNDERSCORE),$(1)))) 47 | 48 | device2dsa = $(if $(filter $(suffix $(1)),.xpfm),$(shell $(COMMON_REPO)/utility/parsexpmf.py $(1) dsa 2>/dev/null),$(1)) 49 | device2sandsa = $(call sanitize_dsa,$(call device2dsa,$(1))) 50 | device2dep = $(if $(filter $(suffix $(1)),.xpfm),$(dir $(1))/$(shell $(COMMON_REPO)/utility/parsexpmf.py $(1) hw 2>/dev/null) $(1),) 51 | 52 | # Cleaning stuff 53 | RM = rm -f 54 | RMDIR = rm -rf 55 | 56 | ECHO:= @echo 57 | 58 | docs: README.md 59 | 60 | README.md: description.json 61 | $(ABS_COMMON_REPO)/utility/readme_gen/readme_gen.py description.json 62 | -------------------------------------------------------------------------------- /utils/xcl/xcl.mk: -------------------------------------------------------------------------------- 1 | xcl_SRCS:=${UTILS_PATH}/xcl/xcl.c 2 | xcl_HDRS:=${UTILS_PATH}/xcl/xcl.h 3 | 4 | xcl_CXXFLAGS:=-I${UTILS_PATH}/xcl 5 | --------------------------------------------------------------------------------