├── .clang-format ├── .clangd ├── .gitattributes ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── compile_souffle.sh ├── data ├── CA-HepTH │ └── edge.facts ├── Gnutella31 │ └── edge.facts ├── JOIN.pdf ├── SF.cedge │ └── edge.facts ├── com-dblp │ └── edge.facts ├── cspa │ ├── httpd │ │ ├── assign.facts │ │ └── dereference.facts │ ├── linux │ │ ├── assign.facts │ │ └── dereference.facts │ └── postgresql │ │ ├── assign.facts │ │ └── dereference.facts ├── data_10.txt ├── data_22.txt ├── data_39994.txt ├── ego-Facebook │ └── edge.facts ├── employee.txt ├── fc_ocean │ └── edge.facts ├── fe-sphere │ └── edge.facts ├── fe_body │ └── edge.facts ├── hpc_talk.txt ├── loc-Brightkite │ └── edge.facts ├── tc.png ├── tc_new.png ├── usroad │ └── edge.facts └── vsp_finan │ └── edge.facts ├── docker └── runpod.dockerfile ├── include ├── dynamic_dispatch.h ├── exception.cuh ├── lie.cuh ├── print.cuh ├── relation.cuh ├── relational_algebra.cuh ├── timer.cuh └── tuple.cuh ├── install_souffle.sh ├── run_cspa_all.sh ├── run_sg_all.sh ├── run_tc_all.sh ├── src ├── acopy.cu ├── copy.cu ├── join.cu ├── lie.cu ├── print.cu ├── relation.cu └── tuple.cu └── test ├── cspa.cu ├── cuDF ├── load_test.py ├── reachability.py ├── sg.json └── sg.py ├── datastructure.cu ├── merge.cu ├── path_length.cu ├── sg.cu ├── sort.cu ├── souffle ├── bip.dl ├── choice_total.dl ├── cspa ├── cspa.cpp ├── cspa.dl ├── cspa.slog ├── cspa.slogc ├── path_length.dl ├── sg.cpp ├── sg.dl ├── spanning.dl ├── tc ├── tc.cpp └── tc.dl └── tc.cu /.clang-format: -------------------------------------------------------------------------------- 1 | IndentWidth: 4 2 | -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | CompileFlags: 2 | Remove: 3 | - -forward-unknown-to-host-compiler 4 | - -rdc=true 5 | - --generate-code* 6 | Add: 7 | - -xcuda 8 | - -std=c++20 9 | - --cuda-gpu-arch=sm_60 10 | - --cuda-path=/opt/cuda 11 | - -L/opt/cuda/lib 12 | - -I/opt/cuda/include 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.facts filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .cache 3 | .vscode 4 | */*.csv 5 | */**/log 6 | .idea/ 7 | cmake-build-debug/ 8 | cluster 9 | test/cuDF/*.log -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9 FATAL_ERROR) 2 | project(tc_gpu LANGUAGES CXX CUDA) 3 | 4 | set(gpu_ra_src "${PROJECT_SOURCE_DIR}/src") 5 | set(gpu_ra_include "${PROJECT_SOURCE_DIR}/include") 6 | set(test_dir "${PROJECT_SOURCE_DIR}/test") 7 | 8 | file(GLOB source_file_gpu_ra 9 | "${gpu_ra_include}/exception.cuh" 10 | "${gpu_ra_include}/dynamic_dispatch.h" 11 | "${gpu_ra_include}/print.cuh" 12 | "${gpu_ra_include}/relation.cuh" 13 | "${gpu_ra_include}/relational_algebra.cuh" 14 | "${gpu_ra_include}/timer.cuh" 15 | "${gpu_ra_include}/tuple.cuh" 16 | "${gpu_ra_include}/lie.cuh" 17 | "${gpu_ra_src}/tuple.cu" 18 | "${gpu_ra_src}/print.cu" 19 | "${gpu_ra_src}/relation.cu" 20 | "${gpu_ra_src}/lie.cu" 21 | "${gpu_ra_src}/copy.cu" 22 | "${gpu_ra_src}/acopy.cu" 23 | "${gpu_ra_src}/join.cu" 24 | ) 25 | 26 | file(GLOB source_file_tc 27 | "${test_dir}/tc.cu" 28 | ) 29 | 30 | file(GLOB source_path_lenght 31 | "${test_dir}/path_length.cu" 32 | ) 33 | 34 | file(GLOB source_cspa 35 | "${test_dir}/cspa.cu" 36 | ) 37 | 38 | file(GLOB source_file_datastructure 39 | "${test_dir}/datastructure.cu" 40 | ) 41 | 42 | file(GLOB source_file_sg 43 | "${test_dir}/sg.cu" 44 | ) 45 | 46 | file(GLOB source_file_test 47 | "${test_dir}/sort.cu" 48 | ) 49 | 50 | file(GLOB source_file_merge 51 | "${test_dir}/merge.cu" 52 | ) 53 | 54 | add_library(gpu_ra "${source_file_gpu_ra}") 55 | target_compile_features(gpu_ra PUBLIC cxx_std_20) 56 | set_target_properties(gpu_ra PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 57 | 58 | add_executable(TC ${source_file_tc}) 59 | target_link_libraries(TC gpu_ra) 60 | target_compile_features(TC PUBLIC cxx_std_20) 61 | set_target_properties(TC PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 62 | 63 | add_executable(PLEN ${source_path_lenght}) 64 | target_link_libraries(PLEN gpu_ra) 65 | target_compile_features(PLEN PUBLIC cxx_std_20) 66 | set_target_properties(PLEN PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 67 | 68 | add_executable(CSPA ${source_cspa}) 69 | target_link_libraries(CSPA gpu_ra) 70 | target_compile_features(CSPA PUBLIC cxx_std_20) 71 | set_target_properties(CSPA PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 72 | 73 | add_executable(DATASTRUCTURE ${source_file_datastructure}) 74 | target_link_libraries(DATASTRUCTURE gpu_ra) 75 | target_compile_features(DATASTRUCTURE PUBLIC cxx_std_20) 76 | set_target_properties(DATASTRUCTURE PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 77 | 78 | add_executable(SG ${source_file_sg}) 79 | target_link_libraries(SG gpu_ra) 80 | target_compile_features(SG PUBLIC cxx_std_20) 81 | set_target_properties(SG PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 82 | 83 | add_executable(TEST ${source_file_test}) 84 | target_link_libraries(TEST gpu_ra) 85 | target_compile_features(TEST PUBLIC cxx_std_20) 86 | set_target_properties(TEST PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 87 | 88 | add_executable(MERGE ${source_file_merge}) 89 | target_link_libraries(MERGE gpu_ra) 90 | target_compile_features(MERGE PUBLIC cxx_std_20) 91 | set_target_properties(MERGE PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 92 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 HARP Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Datasets 3 | - Datasets are listed in [data folder](data). 4 | 5 | ## Dependencies 6 | ### Hardware 7 | - The complete benchmark of the CUDA-based transitive closure computation experiment can be executed on an Nvidia A100 GPU with a minimum of 40 GB GPU memory. The ThetaGPU single-GPU node is a suitable choice. 8 | - Partial benchmarks can be run on other Nvidia GPUs, but they may result in program termination for certain datasets due to limited GPU memory, leading to an instance of the `std::bad_alloc: cudaErrorMemoryAllocation: out of memory` error. 9 | 10 | ### NVIDIA CUDA Toolkit (version 11.4.2 or later) 11 | - Download and install the NVIDIA CUDA Toolkit from the NVIDIA website: [https://developer.nvidia.com/cuda-toolkit-archive](https://developer.nvidia.com/cuda-toolkit-archive) 12 | - Follow the installation instructions for your operating system. Make sure to install version 11.4.2 or later. 13 | ### CMake 14 | - Download and install CMake(version 3.9 or later) from the CMake website: [https://cmake.org/download/](https://cmake.org/download/) 15 | ## Thrust 16 | - need apply patch https://github.com/NVIDIA/thrust/pull/1832/files to fix integer overflow in `thrust::reduce` 17 | 18 | ## Transitive Closure Computation 19 | - Transitive closure computation is a fundamental operation in graph analytics and relational algebra. 20 | - We present a CUDA-based implementation of transitive closure computation that is optimized for sparse graphs. 21 | - Build and run instructions are provided below: 22 | ```shell 23 | cmake --no-warn-unused-cli -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE -S./ -B./build 24 | cd build 25 | make 26 | ``` 27 | This will build the `TC` executable using the nvcc compiler. 28 | - The `TC` executable takes a single argument, which is the path to the input file containing the graph data. The input file should be in the following format: 29 | ```shell 30 | ./TC ../data/data_5.txt 31 | ``` 32 | ### Run instructions for Polaris 33 | - Run using Interactive node: 34 | ```shell 35 | ssh @polaris.alcf.anl.gov 36 | qsub -I -l select=1 -l filesystems=home:eagle -l walltime=1:00:00 -q debug -A dist_relational_alg 37 | module load gcc 38 | cd slog-gpu-backend 39 | git fetch 40 | git reset --hard origin/main 41 | rm -rf build 42 | module purge 43 | module load gcc 44 | module load cmake 45 | module load cudatoolkit-standalone 46 | cmake --no-warn-unused-cli -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE -S./ -B./build 47 | cd build 48 | make 49 | ./TC ../data/data_5.txt 50 | ``` 51 | - Transfer a file from local machine to Polaris: 52 | ```shell 53 | scp data_68993773.txt arsho@polaris.alcf.anl.gov:/home/arsho/slog-gpu-backend/data/ 54 | ``` 55 | ### (Optional) Memory check: 56 | - After creating the build folder and `TC` executable, run the following commands to check for memory leaks and errors: 57 | ```shell 58 | cuda-memcheck ./TC ../data/data_7035.txt 59 | ========= CUDA-MEMCHECK 60 | ... 61 | TC time: 48.691 62 | ========= ERROR SUMMARY: 0 errors 63 | compute-sanitizer ./TC ../data/data_7035.txt 64 | ========= COMPUTE-SANITIZER 65 | ... 66 | TC time: 0.668892 67 | ========= ERROR SUMMARY: 0 errors 68 | ``` 69 | 70 | ### Run cuDF on Polaris 71 | ```shell 72 | ssh @polaris.alcf.anl.gov 73 | qsub -I -l select=1 -l filesystems=home:grand:eagle -l walltime=1:00:00 -q debug -A dist_relational_alg 74 | module purge 75 | module load conda/2023-10-04 76 | conda activate 77 | pip install --extra-index-url https://pypi.nvidia.com cudf-cu11 78 | python test/cuDF/sg.py 79 | 80 | (2022-09-08/base) arsho::x3004c0s7b0n0 { ~/slog-gpu-backend/test/cuDF }-> python sg.py 81 | | Dataset | Number of rows | SG size | Iterations | Time (s) | 82 | | --- | --- | --- | --- | --- | 83 | | hipc | 5 | 4 | 3 | 0.016371 | 84 | Error in fe_body. Message: std::bad_alloc: out_of_memory: CUDA error at: /__w/rmm/rmm/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorMemoryAllocation out of memory 85 | Error in loc-Brightkite. Message: std::bad_alloc: out_of_memory: CUDA error at: /__w/rmm/rmm/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorMemoryAllocation out of memory 86 | Error in fe_sphere. Message: std::bad_alloc: out_of_memory: CUDA error at: /__w/rmm/rmm/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorMemoryAllocation out of memory 87 | | CA-HepTh | 51971 | 74618689 | 9 | 21.241212 | 88 | | Dataset | Number of rows | SG size | Iterations | Time (s) | 89 | | --- | --- | --- | --- | --- | 90 | | ego-Facebook | 88234 | 15018986 | 13 | 19.074940 | 91 | | wiki-Vote | 103689 | 5376338 | 4 | 2.603751 | 92 | | luxembourg_osm | 119666 | 245221 | 326 | 2.215113 | 93 | | cti | 48232 | 14503742 | 44 | 3.857438 | 94 | | fe_ocean | 409593 | 65941441 | 77 | 45.979235 | 95 | | wing | 121544 | 647999 | 8 | 0.204277 | 96 | | delaunay_n16 | 196575 | 25994011 | 85 | 14.832548 | 97 | Error in usroads. Message: std::bad_alloc: out_of_memory: CUDA error at: /__w/rmm/rmm/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorMemoryAllocation out of memory 98 | Error in p2p-Gnutella31. Message: std::bad_alloc: out_of_memory: CUDA error at: /__w/rmm/rmm/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorMemoryAllocation out of memory 99 | | p2p-Gnutella09 | 26013 | 62056583 | 14 | 13.705286 | 100 | | p2p-Gnutella04 | 39994 | 116931333 | 18 | 48.947088 | 101 | | cal.cedge | 21693 | 23519 | 58 | 0.259069 | 102 | | TG.cedge | 23874 | 608090 | 54 | 0.719743 | 103 | | OL.cedge | 7035 | 285431 | 56 | 0.385674 | 104 | ``` 105 | 106 | ### Examples 107 | a TC example 108 | ``` 109 | Relation *edge_2__2_1 = new Relation(); 110 | Relation *path_2__1_2 = new Relation(); 111 | 112 | load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 113 | graph_edge_counts, 1, 0, grid_size, block_size); 114 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 115 | graph_edge_counts, 1, 0, grid_size, block_size); 116 | 117 | LIE tc_scc(grid_size, block_size); 118 | tc_scc.add_relations(edge_2__2_1, true); 119 | tc_scc.add_relations(path_2__1_2, false); 120 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 121 | tuple_generator_hook reorder_path_host; 122 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 123 | sizeof(tuple_generator_hook)); 124 | tuple_copy_hook cp_1_host; 125 | cudaMemcpyFromSymbol(&cp_1_host, cp_1_device, sizeof(tuple_copy_hook)); 126 | tc_scc.add_ra(RelationalJoin(edge_2__2_1, FULL, path_2__1_2, DELTA, 127 | path_2__1_2, reorder_path_host, nullptr, 128 | LEFT, grid_size, block_size, join_detail)); 129 | 130 | tc_scc.fixpoint_loop(); 131 | ``` 132 | 133 | ### References 134 | - [Getting Started on ThetaGPU](https://docs.alcf.anl.gov/theta-gpu/getting-started/) 135 | - [Getting Started on Polaris](https://docs.alcf.anl.gov/polaris/getting-started/) 136 | - [CUDA — Memory Model blog](https://medium.com/analytics-vidhya/cuda-memory-model-823f02cef0bf) 137 | - [CUDA - Pinned memory](https://developer.nvidia.com/blog/how-optimize-data-transfers-cuda-cc/) 138 | - [Stanford Large Network Dataset Collection](https://snap.stanford.edu/data/index.html) 139 | -------------------------------------------------------------------------------- /compile_souffle.sh: -------------------------------------------------------------------------------- 1 | echo "Compiling souffle Queries" 2 | souffle -o build/TC.souffle -j 32 3 | 4 | -------------------------------------------------------------------------------- /data/CA-HepTH/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8edcd3ddb7db3ad8306c07ebe8e57b3fb558f559f3dfd32b5a6e430529772257 3 | size 658567 4 | -------------------------------------------------------------------------------- /data/Gnutella31/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9c5735f9868a150251841ba0b42a6b47655e61feac849232c469157c9f37bdd6 3 | size 1852859 4 | -------------------------------------------------------------------------------- /data/JOIN.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harp-lab/gdlog/65a6ee960ced8d04bc725ccdfa68f004f8479226/data/JOIN.pdf -------------------------------------------------------------------------------- /data/SF.cedge/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a9947ea406d23dada5de5dc26f20780a255a68b7d7bab0ff83adf01598ed8949 3 | size 2838411 4 | -------------------------------------------------------------------------------- /data/com-dblp/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d98b4fc436446c28e452071813cf205e39f333e74e79a286a4b8edabe5ff680e 3 | size 13931327 4 | -------------------------------------------------------------------------------- /data/cspa/httpd/assign.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:31402653628de04569ee38d5dd079dab4f84b36ab685a0b567261b42a6cb4af6 3 | size 5336279 4 | -------------------------------------------------------------------------------- /data/cspa/httpd/dereference.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f391e13c3b42cb0acad7e0fa816d43ac987f0e0de1e7a9fef498045a9ff8e26c 3 | size 16879392 4 | -------------------------------------------------------------------------------- /data/cspa/linux/assign.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:bbf8b176590d6cf2c2296c4135bd8de6bb71237b4207e3a52c8aa53b238e5d83 3 | size 31737404 4 | -------------------------------------------------------------------------------- /data/cspa/linux/dereference.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5aca2cb3aabfecd209b823b687df409a861936728d8dacd177d4a190137d91b0 3 | size 120237633 4 | -------------------------------------------------------------------------------- /data/cspa/postgresql/assign.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1227deac9deeb851fbe5e1e4266100b03c88a70c9f5f046483824accae98b41a 3 | size 18852299 4 | -------------------------------------------------------------------------------- /data/cspa/postgresql/dereference.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0493fe455145ec8aa6d0d3543c9dfc35282493fa8174ff69f2128630988b6a0b 3 | size 54020989 4 | -------------------------------------------------------------------------------- /data/data_10.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 | 1 5 3 | 1 6 4 | 2 3 5 | 2 6 6 | 3 4 7 | 3 7 8 | 4 5 9 | 4 6 10 | 5 6 11 | -------------------------------------------------------------------------------- /data/data_22.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 | 2 3 3 | 3 4 4 | 4 5 5 | 5 6 6 | 6 7 7 | 7 8 8 | 8 9 9 | 9 10 10 | 10 11 11 | 11 12 12 | 12 13 13 | 13 14 14 | 14 15 15 | 15 16 16 | 16 17 17 | 17 18 18 | 18 19 19 | 19 20 20 | 20 21 21 | 21 22 22 | 22 23 23 | -------------------------------------------------------------------------------- /data/ego-Facebook/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a23ba0e1930d856fe71c3355969ca2a53756de3ea9ccae486fd7cb4294a59567 3 | size 854362 4 | -------------------------------------------------------------------------------- /data/employee.txt: -------------------------------------------------------------------------------- 1 | 1,1 2 | 1,2 3 | 1,3 4 | 1,55 5 | 1,539 6 | 2,1 7 | 2,2 8 | 2,3 -------------------------------------------------------------------------------- /data/fc_ocean/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0232e4991188da19342d6d25196e1ce5afd739fb54e47e3d589b4d80a3f6da7a 3 | size 5098905 4 | -------------------------------------------------------------------------------- /data/fe-sphere/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f96dfcb180d181e81edcb6c52dd9948dc20c9b3ec2adae9190ba349a73064a85 3 | size 523204 4 | -------------------------------------------------------------------------------- /data/fe_body/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:86e184c5c5092f963b421def09cc0a8dae3afdfa26113d14384a76c069455c3e 3 | size 1884551 4 | -------------------------------------------------------------------------------- /data/hpc_talk.txt: -------------------------------------------------------------------------------- 1 | 1 2 2 | 1 3 3 | 2 4 4 | 3 4 5 | 4 5 6 | -------------------------------------------------------------------------------- /data/loc-Brightkite/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:cc2eebc46f5c1ecd84122d5b02fde29d5b534226a214a7ea6608401815f29a16 3 | size 2289578 4 | -------------------------------------------------------------------------------- /data/tc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harp-lab/gdlog/65a6ee960ced8d04bc725ccdfa68f004f8479226/data/tc.png -------------------------------------------------------------------------------- /data/tc_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harp-lab/gdlog/65a6ee960ced8d04bc725ccdfa68f004f8479226/data/tc_new.png -------------------------------------------------------------------------------- /data/usroad/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a77677c832e2a217187e8ba5d9b23794eba814eecb77dce3386acac043ff4cb4 3 | size 2031370 4 | -------------------------------------------------------------------------------- /data/vsp_finan/edge.facts: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fb16bab5380960ff88e9bb748e35735c722d6cc9a88595d7692b57e8eb97b80a 3 | size 6724114 4 | -------------------------------------------------------------------------------- /docker/runpod.dockerfile: -------------------------------------------------------------------------------- 1 | FROM stargazermiao/gdlog-env:11.8 2 | 3 | COPY --chown=gdlog:gdlog . /opt/gdlog 4 | WORKDIR /opt/gdlog 5 | 6 | # RUN rm -r build 7 | RUN cmake -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -Bbuild . && cd build && make -j 8 | RUN chmod -R 757 /opt/gdlog 9 | 10 | # CMD [ "/start.sh" ] 11 | -------------------------------------------------------------------------------- /include/dynamic_dispatch.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | template 4 | struct dynamic_dispatch : Variants... { 5 | using Variants::operator()...; 6 | }; 7 | template 8 | dynamic_dispatch(Variants...) -> dynamic_dispatch; 9 | -------------------------------------------------------------------------------- /include/exception.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | // #include 4 | #include 5 | 6 | #define checkCuda(ans) \ 7 | { gpuAssert((ans), __FILE__, __LINE__); } 8 | 9 | inline void gpuAssert(cudaError_t code, const char *file, int line, 10 | bool abort = true) { 11 | if (code != cudaSuccess) { 12 | fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, 13 | line); 14 | if (abort) { 15 | cudaDeviceReset(); 16 | exit(code); 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /include/lie.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "relational_algebra.cuh" 3 | #include 4 | #include 5 | 6 | /** 7 | * @brief Logical inference engine(LIE). Compute fixpoint for a datalog rule SCC 8 | * (Strongly Connected Component). 9 | * 10 | */ 11 | struct LIE { 12 | // all relation operator used in this LIE 13 | std::vector ra_ops; 14 | 15 | // all relations may have new data in this SCC 16 | std::vector update_relations; 17 | // all relation won't be changed in this SCC 18 | std::vector static_relations; 19 | 20 | // temporary relations, these relations's FULL version won't be stored, 21 | // delta version of these relation will be cleared after used in join 22 | std::vector tmp_relations; 23 | 24 | // GPU grid size 25 | int grid_size; 26 | // GPU block size 27 | int block_size; 28 | 29 | bool reload_full_flag = true; 30 | int max_iteration = INT_MAX; 31 | 32 | LIE(int grid_size, int block_size) 33 | : grid_size(grid_size), block_size(block_size) {} 34 | 35 | /** 36 | * @brief compute fixpoint for current LIE 37 | * 38 | */ 39 | void fixpoint_loop(); 40 | 41 | /** 42 | * @brief Add a relation to SCC, all relation must be added before fixpoint 43 | * loop begin 44 | * 45 | * @param rel relation to add 46 | * @param static_flag whether a relation appears in output relation position 47 | * or not 48 | */ 49 | void add_relations(Relation *rel, bool static_flag); 50 | 51 | /** 52 | * @brief add a temporary relation (a relation only have DELTA/NEWT) 53 | * 54 | * @param rel 55 | */ 56 | void add_tmp_relation(Relation *rel); 57 | 58 | /** 59 | * @brief add a Relation Algebra operation 60 | * 61 | * @param op 62 | */ 63 | void add_ra(ra_op op); 64 | // void ra(ra_op op); 65 | }; 66 | -------------------------------------------------------------------------------- /include/print.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "relation.cuh" 3 | #include "tuple.cuh" 4 | // test helper 5 | 6 | void print_hashes(GHashRelContainer* target, const char *rel_name); 7 | 8 | void print_tuple_rows(GHashRelContainer* target, const char *rel_name); 9 | 10 | void print_tuple_raw_data(GHashRelContainer* target, const char *rel_name); 11 | 12 | void print_memory_usage(); 13 | 14 | void print_tuple_list(tuple_type* tuples, tuple_size_t rows, tuple_size_t arity); 15 | 16 | tuple_size_t get_free_memory(); 17 | 18 | tuple_size_t get_total_memory(); 19 | -------------------------------------------------------------------------------- /include/relation.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "tuple.cuh" 3 | #include 4 | #include 5 | 6 | #ifndef RADIX_SORT_THRESHOLD 7 | #define RADIX_SORT_THRESHOLD 0 8 | #endif 9 | #ifndef FULL_BUFFER_VEC_MULTIPLIER 10 | #define FULL_BUFFER_VEC_MULTIPLIER 8 11 | #endif 12 | 13 | enum RelationVersion { DELTA, FULL, NEWT }; 14 | 15 | /** 16 | * @brief A hash table entry 17 | * TODO: no need for struct actually, a u64[2] should be enough, easier to init 18 | * 19 | */ 20 | struct MEntity { 21 | // index position in actual index_arrary 22 | u64 key; 23 | // tuple position in actual data_arrary 24 | tuple_size_t value; 25 | }; 26 | 27 | #define EMPTY_HASH_ENTRY ULONG_MAX 28 | /** 29 | * @brief a C-style hashset indexing based relation container. 30 | * Actual data is still stored using sorted set. 31 | * Different from normal btree relation, using hash table storing the 32 | * index to accelarte range fetch. Good: 33 | * - fast range fetch, in Shovon's ATC paper it shows great 34 | * performance. 35 | * - fast serialization, its very GPU friendly and also easier for MPI 36 | * inter-rank comm transmission. Bad: 37 | * - need reconstruct index very time tuple is inserted (need more 38 | * reasonable algorithm). 39 | * - sorting is a issue, each update need resort everything seems 40 | * stupid. 41 | * 42 | */ 43 | struct GHashRelContainer { 44 | // open addressing hashmap for indexing 45 | MEntity *index_map = nullptr; 46 | tuple_size_t index_map_size = 0; 47 | float index_map_load_factor; 48 | 49 | // index prefix length 50 | // don't have to be u64,int is enough 51 | // u64 *index_columns; 52 | tuple_size_t index_column_size; 53 | 54 | // dependent postfix column always at the end of tuple 55 | int dependent_column_size = 0; 56 | 57 | // the pointer to flatten tuple, all tuple pointer here need to be sorted 58 | tuple_type *tuples = nullptr; 59 | // flatten tuple data 60 | column_type *data_raw = nullptr; 61 | // number of tuples 62 | tuple_size_t tuple_counts = 0; 63 | // actual tuple rows in flatten data, this maybe different from 64 | // tuple_counts when deduplicated 65 | tuple_size_t data_raw_row_size = 0; 66 | int arity; 67 | bool tmp_flag = false; 68 | 69 | GHashRelContainer(int arity, int indexed_column_size, 70 | int dependent_column_size, bool tmp_flag = false) 71 | : arity(arity), index_column_size(indexed_column_size), 72 | dependent_column_size(dependent_column_size), tmp_flag(tmp_flag){}; 73 | }; 74 | 75 | enum JoinDirection { LEFT, RIGHT }; 76 | 77 | /** 78 | * @brief fill in index hash table for a relation in parallel, assume index is 79 | * correctly initialized, data has been loaded , deduplicated and sorted 80 | * 81 | * @param target the hashtable to init 82 | * @return dedeuplicated_bitmap 83 | */ 84 | __global__ void calculate_index_hash(GHashRelContainer *target, 85 | tuple_indexed_less cmp); 86 | 87 | /** 88 | * @brief count how many non empty hash entry in index map 89 | * 90 | * @param target target relation hash table 91 | * @param size return the size 92 | * @return __global__ 93 | */ 94 | __global__ void count_index_entry_size(GHashRelContainer *target, 95 | tuple_size_t *size); 96 | 97 | /** 98 | * @brief rehash to make index map more compact, the new index hash size is 99 | * already update in target new index already inited to empty table and have new 100 | * size. 101 | * 102 | * @param target 103 | * @param old_index_map index map before compaction 104 | * @param old_index_map_size original size of index map before compaction 105 | * @return __global__ 106 | */ 107 | __global__ void shrink_index_map(GHashRelContainer *target, 108 | MEntity *old_index_map, 109 | tuple_size_t old_index_map_size); 110 | 111 | /** 112 | * @brief a CUDA kernel init the index entry map of a hashtabl 113 | * 114 | * @param target the hashtable to init 115 | * @return void 116 | */ 117 | __global__ void init_index_map(GHashRelContainer *target); 118 | 119 | /** 120 | * @brief a helper function to init an unsorted tuple arrary from raw data. This 121 | * function turn a flatten raw data array into a tuple array contains pointers 122 | * to raw data array 123 | * 124 | * @param tuples result tuple array 125 | * @param raw_data flatten raw tuples 1-D array 126 | * @param arity arity of reltaion 127 | * @param rows tuple number 128 | * @return void 129 | */ 130 | __global__ void init_tuples_unsorted(tuple_type *tuples, column_type *raw_data, 131 | int arity, tuple_size_t rows); 132 | 133 | /** 134 | * @brief for all tuples in outer table, match same prefix with inner table 135 | * 136 | * @note can we use pipeline here? since many matching may acually missing 137 | * 138 | * @param inner_table the hashtable to iterate 139 | * @param outer_table the hashtable to match 140 | * @param join_column_counts number of join columns (inner and outer must agree 141 | * on this) 142 | * @param return value stored here, size of joined tuples 143 | * @return void 144 | */ 145 | __global__ void get_join_result_size(GHashRelContainer *inner_table, 146 | GHashRelContainer *outer_table, 147 | int join_column_counts, 148 | tuple_generator_hook tp_gen, 149 | tuple_predicate tp_pred, 150 | tuple_size_t *join_result_size); 151 | 152 | /** 153 | * @brief compute the join result 154 | * 155 | * @param inner_table 156 | * @param outer_table 157 | * @param join_column_counts 158 | * @param output_reorder_array reorder array for output relation column 159 | * selection, arrary pos < inner->arity is index in inner, > is index in outer. 160 | * @param output_arity output relation arity 161 | * @param output_raw_data join result, need precompute the size 162 | * @return __global__ 163 | */ 164 | __global__ void 165 | get_join_result(GHashRelContainer *inner_table, GHashRelContainer *outer_table, 166 | int join_column_counts, tuple_generator_hook tp_gen, 167 | tuple_predicate tp_pred, int output_arity, 168 | column_type *output_raw_data, tuple_size_t *res_count_array, 169 | tuple_size_t *res_offset, JoinDirection direction); 170 | 171 | __global__ void flatten_tuples_raw_data(tuple_type *tuple_pointers, 172 | column_type *raw, 173 | tuple_size_t tuple_counts, int arity); 174 | 175 | __global__ void get_copy_result(tuple_type *src_tuples, 176 | column_type *dest_raw_data, int output_arity, 177 | tuple_size_t tuple_counts, 178 | tuple_copy_hook tp_gen); 179 | 180 | ////////////////////////////////////////////////////// 181 | // CPU functions 182 | 183 | /** 184 | * @brief load raw data into relation container 185 | * 186 | * @param target hashtable struct in host 187 | * @param arity 188 | * @param data raw data on host 189 | * @param data_row_size 190 | * @param index_columns index columns id in host 191 | * @param index_column_size 192 | * @param index_map_load_factor 193 | * @param grid_size 194 | * @param block_size 195 | * @param gpu_data_flag if data is a GPU memory address directly assign to 196 | * target's data_raw 197 | * @param sorted_flag whether input raw data tuples are sorted (use sorted array 198 | * will be fasted, avoid extra sorting) 199 | * @param build_index_flag whether this relation container need indexing. 200 | */ 201 | void load_relation_container( 202 | GHashRelContainer *target, int arity, column_type *data, 203 | tuple_size_t data_row_size, tuple_size_t index_column_size, 204 | int dependent_column_size, float index_map_load_factor, int grid_size, 205 | int block_size, float *detail_time, bool gpu_data_flag = false, 206 | bool sorted_flag = false, bool build_index_flag = true, 207 | bool tuples_array_flag = true); 208 | 209 | void repartition_relation_index(GHashRelContainer *target, int arity, 210 | column_type *data, tuple_size_t data_row_size, 211 | tuple_size_t index_column_size, 212 | int dependent_column_size, 213 | float index_map_load_factor, int grid_size, 214 | int block_size, float *detail_time); 215 | 216 | /** 217 | * @brief copy a relation into an **empty** relation 218 | * 219 | * @param dst 220 | * @param src 221 | */ 222 | void copy_relation_container(GHashRelContainer *dst, GHashRelContainer *src, 223 | int grid_size, int block_size); 224 | 225 | /** 226 | * @brief recreate index for a full relation container 227 | * 228 | * @param target 229 | * @param arity 230 | * @param tuples 231 | * @param data_row_size 232 | * @param index_column_size 233 | * @param dependent_column_size 234 | * @param index_map_load_factor 235 | * @param grid_size 236 | * @param block_size 237 | */ 238 | void reload_full_temp(GHashRelContainer *target, int arity, tuple_type *tuples, 239 | tuple_size_t data_row_size, 240 | tuple_size_t index_column_size, int dependent_column_size, 241 | float index_map_load_factor, int grid_size, 242 | int block_size); 243 | 244 | /** 245 | * @brief clean all data in a relation container 246 | * 247 | * @param target 248 | */ 249 | void free_relation_container(GHashRelContainer *target); 250 | 251 | enum MonotonicOrder { DESC, ASC, UNSPEC }; 252 | 253 | /** 254 | * @brief actual relation class used in semi-naive eval 255 | * 256 | */ 257 | struct Relation { 258 | int arity; 259 | // the first columns of a relation will be use to 260 | // build relation index, and only indexed columns can be used to join 261 | int index_column_size; 262 | std::string name; 263 | 264 | // the last will be used a dependant columns, 265 | // these column can be used to store recurisve aggreagtion/choice 266 | // domain's result, these columns can't be used as index columns 267 | int dependent_column_size = 0; 268 | bool index_flag = true; 269 | bool tmp_flag = false; 270 | 271 | GHashRelContainer *delta; 272 | GHashRelContainer *newt; 273 | GHashRelContainer *full; 274 | 275 | // TODO: out dataed remove these, directly use GHashRelContainer 276 | // **full** a buffer for tuple pointer in full 277 | tuple_size_t current_full_size = 0; 278 | tuple_type *tuple_full; 279 | 280 | tuple_type *tuple_merge_buffer; 281 | tuple_size_t tuple_merge_buffer_size = 0; 282 | bool pre_allocated_merge_buffer_flag = true; 283 | bool fully_disable_merge_buffer_flag = true; 284 | // 285 | 286 | // delta relation generate in each iteration, all index stripped 287 | std::vector buffered_delta_vectors; 288 | 289 | // reserved properties for monotonic aggregation 290 | MonotonicOrder monotonic_order = MonotonicOrder::DESC; 291 | 292 | /** 293 | * @brief store the data in DELTA into full relation (this won't free 294 | * delta) 295 | * 296 | * @param grid_size 297 | * @param block_size 298 | */ 299 | void flush_delta(int grid_size, int block_size, float *detail_time); 300 | }; 301 | 302 | /** 303 | * @brief load tuples to FULL relation of target relation 304 | * 305 | * @param target target relation 306 | * @param name name of relation 307 | * @param arity 308 | * @param data raw flatten tuple need loaded into target relation 309 | * @param data_row_size number of tuples to load 310 | * @param index_column_size number of columns used to index 311 | * @param dependent_column_size 312 | * @param grid_size 313 | * @param block_size 314 | */ 315 | void load_relation(Relation *target, std::string name, int arity, 316 | column_type *data, tuple_size_t data_row_size, 317 | tuple_size_t index_column_size, int dependent_column_size, 318 | int grid_size, int block_size, bool tmp_flag = false); 319 | -------------------------------------------------------------------------------- /include/relational_algebra.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "relation.cuh" 3 | #include "tuple.cuh" 4 | #include 5 | #include 6 | 7 | // for fixing 8 | #ifndef MAX_REDUCE_SIZE 9 | #define MAX_REDUCE_SIZE 80000000 10 | #endif 11 | 12 | // function hook describ how inner and outer tuple are reordered to result tuple 13 | 14 | /** 15 | * @brief Relation Algerbra kernal for JOIN ⋈ 16 | * 17 | */ 18 | struct RelationalJoin { 19 | 20 | // relation to compare, this relation must has index 21 | Relation *inner_rel; 22 | RelationVersion inner_ver; 23 | // serialized relation, every tuple in this relation will be iterated and 24 | // joined with tuples in inner relation 25 | Relation *outer_rel; 26 | RelationVersion outer_ver; 27 | 28 | // the relation to store the generated join result 29 | Relation *output_rel; 30 | // hook function will be mapped on every join result tuple 31 | tuple_generator_hook tuple_generator; 32 | // filter to be applied on every join result tuple 33 | tuple_predicate tuple_pred; 34 | 35 | // TODO: reserved for optimization 36 | JoinDirection direction; 37 | int grid_size; 38 | int block_size; 39 | 40 | // flag for benchmark, this will disable sorting on result 41 | bool disable_load = false; 42 | 43 | // join time for debug and profiling 44 | float *detail_time; 45 | 46 | RelationalJoin(Relation *inner_rel, RelationVersion inner_ver, 47 | Relation *outer_rel, RelationVersion outer_ver, 48 | Relation *output_rel, tuple_generator_hook tp_gen, 49 | tuple_predicate tp_pred, JoinDirection direction, 50 | int grid_size, int block_size, float *detail_time) 51 | : inner_rel(inner_rel), inner_ver(inner_ver), outer_rel(outer_rel), 52 | outer_ver(outer_ver), output_rel(output_rel), tuple_generator(tp_gen), 53 | tuple_pred(tp_pred), direction(direction), grid_size(grid_size), 54 | block_size(block_size), detail_time(detail_time){}; 55 | 56 | void operator()(); 57 | }; 58 | 59 | /** 60 | * @brief Relation Algerbra kernal for PROJECTION Π 61 | * 62 | */ 63 | struct RelationalCopy { 64 | Relation *src_rel; 65 | RelationVersion src_ver; 66 | Relation *dest_rel; 67 | tuple_copy_hook tuple_generator; 68 | tuple_predicate tuple_pred; 69 | 70 | int grid_size; 71 | int block_size; 72 | bool copied = false; 73 | 74 | RelationalCopy(Relation *src, RelationVersion src_ver, Relation *dest, 75 | tuple_copy_hook tuple_generator, tuple_predicate tuple_pred, 76 | int grid_size, int block_size) 77 | : src_rel(src), src_ver(src_ver), dest_rel(dest), 78 | tuple_generator(tuple_generator), tuple_pred(tuple_pred), 79 | grid_size(grid_size), block_size(block_size) {} 80 | 81 | void operator()(); 82 | }; 83 | 84 | /** 85 | * @brief Relation Algebra kernel for sync up different indices of the same 86 | * relation. This RA operator must be added in the end of each SCC, it will 87 | * directly change the DELTA version of dest relation 88 | * 89 | */ 90 | struct RelationalACopy { 91 | Relation *src_rel; 92 | Relation *dest_rel; 93 | // function will be mapped on all tuple copied 94 | tuple_copy_hook tuple_generator; 95 | // filter for copied tuple 96 | tuple_predicate tuple_pred; 97 | 98 | int grid_size; 99 | int block_size; 100 | 101 | RelationalACopy(Relation *src, Relation *dest, 102 | tuple_copy_hook tuple_generator, tuple_predicate tuple_pred, 103 | int grid_size, int block_size) 104 | : src_rel(src), dest_rel(dest), tuple_generator(tuple_generator), 105 | tuple_pred(tuple_pred), grid_size(grid_size), block_size(block_size) { 106 | } 107 | 108 | void operator()(); 109 | }; 110 | 111 | /** 112 | * @brief possible RA types 113 | * 114 | */ 115 | using ra_op = std::variant; 116 | 117 | enum RAtypes { JOIN, COPY, ACOPY }; 118 | -------------------------------------------------------------------------------- /include/timer.cuh: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | // #include 4 | 5 | struct KernelTimer { 6 | cudaEvent_t start; 7 | cudaEvent_t stop; 8 | 9 | KernelTimer() { 10 | cudaEventCreate(&start); 11 | cudaEventCreate(&stop); 12 | } 13 | 14 | ~KernelTimer() { 15 | cudaEventDestroy(start); 16 | cudaEventDestroy(stop); 17 | } 18 | 19 | void start_timer() { cudaEventRecord(start, 0); } 20 | 21 | void stop_timer() { cudaEventRecord(stop, 0); } 22 | 23 | float get_spent_time() { 24 | float elapsed; 25 | cudaEventSynchronize(stop); 26 | cudaEventElapsedTime(&elapsed, start, stop); 27 | elapsed /= 1000.0; 28 | return elapsed; 29 | } 30 | }; 31 | 32 | struct Output { 33 | int block_size; 34 | int grid_size; 35 | long int input_rows; 36 | long int hashtable_rows; 37 | double load_factor; 38 | double initialization_time; 39 | double memory_clear_time; 40 | double read_time; 41 | double reverse_time; 42 | double hashtable_build_time; 43 | long int hashtable_build_rate; 44 | double join_time; 45 | double projection_time; 46 | double deduplication_time; 47 | double union_time; 48 | double total_time; 49 | const char *dataset_name; 50 | }; 51 | -------------------------------------------------------------------------------- /include/tuple.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // #include 3 | #include 4 | 5 | using u64 = unsigned long long; 6 | using u32 = unsigned long; 7 | 8 | using column_type = u32; 9 | using tuple_type = column_type *; 10 | using tuple_size_t = u64; 11 | 12 | // TODO: use thrust vector as tuple type?? 13 | // using t_gpu_index = thrust::device_vector; 14 | // using t_gpu_tuple = thrust::device_vector; 15 | 16 | // using t_data_internal = thrust::device_vector; 17 | /** 18 | * @brief u64* to store the actual relation tuples, for serialize concern 19 | * 20 | */ 21 | using t_data_internal = u64 *; 22 | 23 | typedef void (*tuple_generator_hook)(tuple_type, tuple_type, tuple_type); 24 | typedef void (*tuple_copy_hook)(tuple_type, tuple_type); 25 | typedef bool (*tuple_predicate)(tuple_type); 26 | 27 | // struct tuple_generator_hook { 28 | // __host__ __device__ 29 | // void operator()(tuple_type inner, tuple_type outer, tuple_type newt) {}; 30 | // }; 31 | 32 | /** 33 | * @brief TODO: remove this use comparator function 34 | * 35 | * @param t1 36 | * @param t2 37 | * @param l 38 | * @return true 39 | * @return false 40 | */ 41 | __host__ __device__ inline bool tuple_eq(tuple_type t1, tuple_type t2, 42 | tuple_size_t l) { 43 | for (int i = 0; i < l; i++) { 44 | if (t1[i] != t2[i]) { 45 | return false; 46 | } 47 | } 48 | return true; 49 | } 50 | 51 | struct t_equal { 52 | u64 arity; 53 | 54 | t_equal(tuple_size_t arity) { this->arity = arity; } 55 | 56 | __host__ __device__ bool operator()(const tuple_type &lhs, 57 | const tuple_type &rhs) { 58 | for (int i = 0; i < arity; i++) { 59 | if (lhs[i] != rhs[i]) { 60 | return false; 61 | } 62 | } 63 | return true; 64 | } 65 | }; 66 | 67 | /** 68 | * @brief fnv1-a hash used in original slog backend 69 | * 70 | * @param start_ptr 71 | * @param prefix_len 72 | * @return __host__ __device__ 73 | */ 74 | __host__ __device__ inline column_type prefix_hash(tuple_type start_ptr, 75 | column_type prefix_len) { 76 | const column_type base = 2166136261U; 77 | const column_type prime = 16777619U; 78 | 79 | column_type hash = base; 80 | for (column_type i = 0; i < prefix_len; ++i) { 81 | column_type chunk = (column_type)start_ptr[i]; 82 | hash ^= chunk & 255U; 83 | hash *= prime; 84 | for (char j = 0; j < 3; ++j) { 85 | chunk = chunk >> 8; 86 | hash ^= chunk & 255U; 87 | hash *= prime; 88 | } 89 | } 90 | return hash; 91 | } 92 | 93 | // change to std 94 | struct tuple_indexed_less { 95 | 96 | // u64 *index_columns; 97 | tuple_size_t index_column_size; 98 | int arity; 99 | 100 | tuple_indexed_less(tuple_size_t index_column_size, int arity) { 101 | // this->index_columns = index_columns; 102 | this->index_column_size = index_column_size; 103 | this->arity = arity; 104 | } 105 | 106 | __host__ __device__ bool operator()(const tuple_type &lhs, 107 | const tuple_type &rhs) { 108 | // fetch the index 109 | // compare hash first, could be index very different but share the same 110 | // hash 111 | // same hash 112 | if (lhs == 0) { 113 | return false; 114 | } 115 | if (rhs == 0) { 116 | return true; 117 | } 118 | for (tuple_size_t i = 0; i < arity; i++) { 119 | if (lhs[i] < rhs[i]) { 120 | return true; 121 | } else if (lhs[i] > rhs[i]) { 122 | return false; 123 | } 124 | } 125 | return false; 126 | } 127 | }; 128 | 129 | struct tuple_indexed_less2 { 130 | 131 | // u64 *index_columns; 132 | tuple_size_t index_column_size; 133 | int arity; 134 | 135 | tuple_indexed_less2(tuple_size_t index_column_size, int arity) { 136 | // this->index_columns = index_columns; 137 | this->index_column_size = index_column_size; 138 | this->arity = arity; 139 | } 140 | 141 | __host__ __device__ bool operator()(const tuple_type &lhs, 142 | const tuple_type &rhs) { 143 | // fetch the index 144 | // compare hash first, could be index very different but share the same 145 | // hash 146 | // same hash 147 | if (lhs == 0) { 148 | return false; 149 | } 150 | if (rhs == 0) { 151 | return true; 152 | } 153 | if (lhs[0] < rhs[0]) { 154 | return true; 155 | } else if (lhs[0] > rhs[0]) { 156 | return false; 157 | } else { 158 | return lhs[1] < rhs[1]; 159 | } 160 | return false; 161 | } 162 | }; 163 | 164 | 165 | struct tuple_weak_less { 166 | 167 | int arity; 168 | 169 | tuple_weak_less(int arity) { this->arity = arity; } 170 | 171 | __host__ __device__ bool operator()(const tuple_type &lhs, 172 | const tuple_type &rhs) { 173 | 174 | for (u64 i = 0; i < arity; i++) { 175 | if (lhs[i] < rhs[i]) { 176 | return true; 177 | } else if (lhs[i] > rhs[i]) { 178 | return false; 179 | } 180 | } 181 | return false; 182 | }; 183 | }; 184 | 185 | // cuda kernel extract the k th column from tuples 186 | __global__ void extract_column(tuple_type *tuples, tuple_size_t rows, 187 | tuple_size_t k, column_type *column); 188 | 189 | __global__ void compute_hash(tuple_type *tuples, tuple_size_t rows, 190 | tuple_size_t index_column_size, 191 | column_type *hashes); 192 | 193 | void sort_tuples(tuple_type *tuples, tuple_size_t rows, tuple_size_t arity, 194 | tuple_size_t index_column_size, int grid_size, int block_size); 195 | 196 | void sort_tuple_by_hash(tuple_type *tuples, tuple_size_t rows, 197 | tuple_size_t arity, tuple_size_t index_column_size, 198 | int grid_size, int block_size); 199 | -------------------------------------------------------------------------------- /install_souffle.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harp-lab/gdlog/65a6ee960ced8d04bc725ccdfa68f004f8479226/install_souffle.sh -------------------------------------------------------------------------------- /run_cspa_all.sh: -------------------------------------------------------------------------------- 1 | 2 | echo "Preparing code and building CSPA" 3 | git stash && git checkout hash_diff 4 | cmake -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -Bbuild . && cd build && make -j 5 | cd .. 6 | echo ">>>>>>>>>>>>>>>>>> Testing SG >>>>>>>>>>>>>>>>>" 7 | echo " >>>>> Testing GDlog: " 8 | echo "Generating result for TABEL IV" 9 | echo "Dataset : httpd" 10 | ./build/CSPA ./data/cspa/httpd 11 | echo "Dataset : linux" 12 | ./build/CSPA ./data/cspa/linux 13 | echo "Dataset : postgresql" 14 | ./build/CSPA ./data/cspa/postgresql 15 | 16 | -------------------------------------------------------------------------------- /run_sg_all.sh: -------------------------------------------------------------------------------- 1 | echo "Preparing code and building SG" 2 | git stash && git checkout hash_diff 3 | cmake -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -Bbuild . && cd build && make -j 4 | cd .. 5 | echo ">>>>>>>>>>>>>>>>>> Testing SG >>>>>>>>>>>>>>>>>" 6 | echo " >>>>> Testing GDlog: " 7 | echo "Generating result for TABEL III" 8 | echo "Dataset : fe_body" 9 | ./build/SG ./data/fe_body/edge.facts 10 | echo "Dataset : loc-Brightkite" 11 | ./build/SG ./data/loc_Brightkite/edge.facts 12 | echo "Dataset : fe-sphere" 13 | ./build/SG ./data/fe-sphere/edge.facts 14 | echo "Dataset : CA-HepTH" 15 | ./build/SG ./data/CA-HepTH/edge.facts 16 | echo "Dataset : SF.cedge" 17 | ./build/SG ./data/SF.cedge/edge.facts 18 | echo "Dataset : ego-Facebook" 19 | ./build/SG ./data/ego-Facebook/edge.facts 20 | -------------------------------------------------------------------------------- /run_tc_all.sh: -------------------------------------------------------------------------------- 1 | echo "Preparing code and building TC" 2 | git stash && git checkout main 3 | cmake -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -Bbuild . && cd build && make -j 4 | cd .. 5 | echo ">>>>>>>>>>>>>>>>>> Testing REACH >>>>>>>>>>>>>>>>>" 6 | echo " >>>>> Testing GDlog: " 7 | echo " >>>>> Generating result for TABEL I" 8 | echo " >>>>> Dataset : usroad with EBM" 9 | ./build/TC ./data/usroad/edge.facts 0 10 | echo " >>>>> Dataset : usroad without EBM" 11 | ./build/TC ./data/usroad/edge.facts 1 12 | echo " >>>>> " 13 | echo " >>>>> Dataset : vsp_finan with EBM" 14 | ./build/TC ./data/vsp_finan/edge.facts 0 15 | echo " >>>>> Dataset : vsp_finan without EBM" 16 | ./build/TC ./data/vsp_finan/edge.facts 1 17 | echo " >>>>> " 18 | echo " >>>>> Dataset : fc_ocean with EBM" 19 | ./build/TC ./data/fc_ocean/edge.facts 0 20 | echo " >>>>> Dataset : fc_ocean without EBM" 21 | ./build/TC ./data/fc_ocean/edge.facts 1 22 | echo " >>>>> Dataset : com-dblp with EBM" 23 | ./build/TC ./data/com-dblp/edge.facts 0 24 | echo " >>>>> Dataset : com-dblp without EBM" 25 | ./build/TC ./data/com-dblp/edge.facts 1 26 | echo " >>>>> " 27 | echo " >>>>> Dataset : Gnutella31 with EBM" 28 | ./build/TC ./data/Gnutella31/edge.facts 0 29 | echo " >>>>> Dataset : Gnutella31 without EBM" 30 | ./build/TC ./data/Gnutella31/edge.facts 1 31 | echo " >>>>> " 32 | 33 | echo " >>>>> Testing GDlog: " 34 | echo " >>>>> Generating result for TABEL II" 35 | # echo "Dataset : usroad" 36 | # ./build/TC ./data/data_165435.txt 37 | echo "Dataset : fc_ocean" 38 | ./build/TC ./data/fc_ocean/edge.facts 0 39 | echo "Dataset : com-dblp" 40 | ./build/TC ./data/com-dblp/edge.facts 0 41 | echo "Dataset : vsp_finan" 42 | ./build/TC ./data/vsp_finan/edge.facts 0 43 | echo "Dataset : Gnutella31" 44 | ./build/TC ./data/Gnutella31/edge.facts 0 45 | echo "Dataset : fe_body" 46 | ./build/TC ./data/fe_body/edge.facts 0 47 | echo "Dataset : SF.cedge" 48 | ./build/TC ./data/SF.cedge/edge.facts 0 49 | 50 | -------------------------------------------------------------------------------- /src/acopy.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../include/exception.cuh" 8 | #include "../include/print.cuh" 9 | #include "../include/relational_algebra.cuh" 10 | #include "../include/timer.cuh" 11 | 12 | void RelationalACopy::operator()() { 13 | 14 | GHashRelContainer *src = src_rel->newt; 15 | GHashRelContainer *dest = dest_rel->newt; 16 | std::cout << "ACopy " << src_rel->name << " to " << dest_rel->name 17 | << std::endl; 18 | 19 | if (src->tuple_counts == 0) { 20 | free_relation_container(dest); 21 | dest->tuple_counts = 0; 22 | return; 23 | } 24 | 25 | int output_arity = dest_rel->arity; 26 | column_type *copied_raw_data; 27 | u64 copied_raw_data_size = 28 | src->tuple_counts * output_arity * sizeof(column_type); 29 | checkCuda(cudaMalloc((void **)&copied_raw_data, copied_raw_data_size)); 30 | checkCuda(cudaMemset(copied_raw_data, 0, copied_raw_data_size)); 31 | get_copy_result<<>>(src->tuples, copied_raw_data, 32 | output_arity, src->tuple_counts, 33 | tuple_generator); 34 | checkCuda(cudaGetLastError()); 35 | checkCuda(cudaDeviceSynchronize()); 36 | 37 | free_relation_container(dest); 38 | float detail_time[5] = {0, 0, 0, 0, 0}; 39 | // TODO: swap to repartition_relation_index in future 40 | load_relation_container(dest, dest->arity, copied_raw_data, 41 | src->tuple_counts, src->index_column_size, 42 | dest->dependent_column_size, 0.8, grid_size, 43 | block_size, detail_time, true, false, true); 44 | checkCuda(cudaDeviceSynchronize()); 45 | // print_tuple_rows(dest, "delta"); 46 | // merge delta to full immediately here 47 | // dest_rel->flush_delta(grid_size, block_size); 48 | // std::cout << dest->tuple_counts << std::endl; 49 | // print_tuple_rows(dest, "acopied"); 50 | } 51 | -------------------------------------------------------------------------------- /src/copy.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../include/exception.cuh" 8 | #include "../include/print.cuh" 9 | #include "../include/relational_algebra.cuh" 10 | #include "../include/timer.cuh" 11 | 12 | void RelationalCopy::operator()() { 13 | checkCuda(cudaDeviceSynchronize()); 14 | GHashRelContainer *src; 15 | if (src_ver == DELTA) { 16 | src = src_rel->delta; 17 | } else { 18 | src = src_rel->full; 19 | } 20 | GHashRelContainer *dest = dest_rel->newt; 21 | // std::cout << "Copy " << src_rel->name << " to " << dest_rel->name 22 | // << std::endl; 23 | 24 | if (src->tuple_counts == 0) { 25 | dest_rel->newt->tuple_counts = 0; 26 | return; 27 | } 28 | 29 | int output_arity = dest_rel->arity; 30 | column_type *copied_raw_data; 31 | u64 copied_raw_data_size = 32 | src->tuple_counts * output_arity * sizeof(column_type); 33 | checkCuda(cudaMalloc((void **)&copied_raw_data, copied_raw_data_size)); 34 | checkCuda(cudaMemset(copied_raw_data, 0, copied_raw_data_size)); 35 | get_copy_result<<>>(src->tuples, copied_raw_data, 36 | output_arity, src->tuple_counts, 37 | tuple_generator); 38 | checkCuda(cudaGetLastError()); 39 | checkCuda(cudaDeviceSynchronize()); 40 | float load_relation_container_time[5] = {0, 0, 0, 0, 0}; 41 | 42 | if (dest->tuples == nullptr || dest->tuple_counts == 0) { 43 | free_relation_container(dest); 44 | load_relation_container( 45 | dest, dest->arity, copied_raw_data, src->tuple_counts, 46 | src->index_column_size, dest->dependent_column_size, 0.8, grid_size, 47 | block_size, load_relation_container_time, true, false, false); 48 | } else { 49 | GHashRelContainer *tmp = new GHashRelContainer( 50 | dest->arity, dest->index_column_size, dest->dependent_column_size); 51 | load_relation_container( 52 | tmp, dest->arity, copied_raw_data, src->tuple_counts, 53 | src->index_column_size, dest->dependent_column_size, 0.8, grid_size, 54 | block_size, load_relation_container_time, true, false, false); 55 | checkCuda(cudaDeviceSynchronize()); 56 | // merge to newt 57 | GHashRelContainer *old_newt = dest; 58 | tuple_type *tp_buffer; 59 | u64 tp_buffer_mem_size = 60 | (old_newt->tuple_counts + src->tuple_counts) * sizeof(tuple_type); 61 | checkCuda(cudaMalloc((void **)&tp_buffer, tp_buffer_mem_size)); 62 | checkCuda(cudaMemset(tp_buffer, 0, tp_buffer_mem_size)); 63 | tuple_type *tp_buffer_end = thrust::merge( 64 | thrust::device, old_newt->tuples, 65 | old_newt->tuples + old_newt->tuple_counts, tmp->tuples, 66 | tmp->tuples + tmp->tuple_counts, tp_buffer, 67 | tuple_indexed_less(dest->index_column_size, output_arity)); 68 | // checkCuda(cudaDeviceSynchronize()); 69 | // checkCuda(cudaFree(tmp->tuples)); 70 | // checkCuda(cudaFree(old_newt->tuples)); 71 | tp_buffer_end = thrust::unique(thrust::device, tp_buffer, tp_buffer_end, 72 | t_equal(output_arity)); 73 | checkCuda(cudaDeviceSynchronize()); 74 | tuple_size_t new_newt_counts = tp_buffer_end - tp_buffer; 75 | // std::cout << " >>>>>>>>>> " << new_newt_counts << std::endl; 76 | column_type *new_newt_raw; 77 | u64 new_newt_raw_mem_size = 78 | new_newt_counts * output_arity * sizeof(column_type); 79 | checkCuda(cudaMalloc((void **)&new_newt_raw, new_newt_raw_mem_size)); 80 | flatten_tuples_raw_data<<>>( 81 | tp_buffer, new_newt_raw, new_newt_counts, output_arity); 82 | checkCuda(cudaGetLastError()); 83 | checkCuda(cudaDeviceSynchronize()); 84 | checkCuda(cudaFree(tp_buffer)); 85 | free_relation_container(old_newt); 86 | free_relation_container(tmp); 87 | load_relation_container(dest, output_arity, new_newt_raw, 88 | new_newt_counts, dest->index_column_size, 89 | dest->dependent_column_size, 0.8, grid_size, 90 | block_size, load_relation_container_time, true, 91 | true, false); 92 | // delete tmp; 93 | } 94 | // std::cout << "copy finish " << std::endl; 95 | } 96 | -------------------------------------------------------------------------------- /src/join.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../include/exception.cuh" 8 | #include "../include/print.cuh" 9 | #include "../include/relational_algebra.cuh" 10 | #include "../include/timer.cuh" 11 | 12 | void RelationalJoin::operator()() { 13 | 14 | bool output_is_tmp = output_rel->tmp_flag; 15 | GHashRelContainer *inner; 16 | if (inner_ver == DELTA) { 17 | inner = inner_rel->delta; 18 | } else { 19 | inner = inner_rel->full; 20 | } 21 | GHashRelContainer *outer; 22 | if (outer_ver == DELTA) { 23 | outer = outer_rel->delta; 24 | } else if (outer_ver == FULL) { 25 | outer = outer_rel->full; 26 | } else { 27 | // temp relation can be outer relation 28 | outer = outer_rel->newt; 29 | } 30 | int output_arity = output_rel->arity; 31 | // GHashRelContainer* output = output_rel->newt; 32 | 33 | // std::cout << "inner " << inner_rel->name << " : " << inner->tuple_counts 34 | // << " outer " << outer_rel->name << " : " << outer->tuple_counts 35 | // << std::endl; 36 | // print_tuple_rows(inner, "inner"); 37 | // print_tuple_rows(outer, "outer"); 38 | if (outer->tuples == nullptr || outer->tuple_counts == 0) { 39 | outer->tuple_counts = 0; 40 | return; 41 | } 42 | if (inner->tuples == nullptr || inner->tuple_counts == 0) { 43 | outer->tuple_counts = 0; 44 | return; 45 | } 46 | 47 | KernelTimer timer; 48 | // checkCuda(cudaDeviceSynchronize()); 49 | GHashRelContainer *inner_device; 50 | checkCuda(cudaMalloc((void **)&inner_device, sizeof(GHashRelContainer))); 51 | checkCuda(cudaMemcpy(inner_device, inner, sizeof(GHashRelContainer), 52 | cudaMemcpyHostToDevice)); 53 | GHashRelContainer *outer_device; 54 | checkCuda(cudaMalloc((void **)&outer_device, sizeof(GHashRelContainer))); 55 | checkCuda(cudaMemcpy(outer_device, outer, sizeof(GHashRelContainer), 56 | cudaMemcpyHostToDevice)); 57 | 58 | tuple_size_t *result_counts_array; 59 | checkCuda(cudaMalloc((void **)&result_counts_array, 60 | outer->tuple_counts * sizeof(tuple_size_t))); 61 | checkCuda(cudaMemset(result_counts_array, 0, 62 | outer->tuple_counts * sizeof(tuple_size_t))); 63 | 64 | // print_tuple_rows(outer, "inber"); 65 | // checkCuda(cudaDeviceSynchronize()); 66 | timer.start_timer(); 67 | checkCuda(cudaDeviceSynchronize()); 68 | get_join_result_size<<>>( 69 | inner_device, outer_device, outer->index_column_size, tuple_generator, 70 | tuple_pred, result_counts_array); 71 | checkCuda(cudaGetLastError()); 72 | checkCuda(cudaDeviceSynchronize()); 73 | timer.stop_timer(); 74 | this->detail_time[0] += timer.get_spent_time(); 75 | 76 | timer.start_timer(); 77 | tuple_size_t total_result_rows = 0; 78 | for (tuple_size_t i = 0; i < outer->tuple_counts; i = i + MAX_REDUCE_SIZE) { 79 | tuple_size_t reduce_size = MAX_REDUCE_SIZE; 80 | if (i + MAX_REDUCE_SIZE > outer->tuple_counts) { 81 | reduce_size = outer->tuple_counts - i; 82 | } 83 | tuple_size_t reduce_v = thrust::reduce( 84 | thrust::device, result_counts_array + i, 85 | result_counts_array + i + reduce_size, 0); 86 | total_result_rows += reduce_v; 87 | // checkCuda(cudaDeviceSynchronize()); 88 | } 89 | 90 | // std::cout << output_rel->name << " " << outer->index_column_size 91 | // << " join result size(non dedup) " << total_result_rows 92 | // << std::endl; 93 | // print_memory_usage(); 94 | tuple_size_t *result_counts_offset; 95 | checkCuda(cudaMalloc((void **)&result_counts_offset, 96 | outer->tuple_counts * sizeof(tuple_size_t))); 97 | checkCuda(cudaMemcpy(result_counts_offset, result_counts_array, 98 | outer->tuple_counts * sizeof(tuple_size_t), 99 | cudaMemcpyDeviceToDevice)); 100 | thrust::exclusive_scan(thrust::device, result_counts_offset, 101 | result_counts_offset + outer->tuple_counts, 102 | result_counts_offset); 103 | 104 | checkCuda(cudaDeviceSynchronize()); 105 | timer.stop_timer(); 106 | detail_time[1] += timer.get_spent_time(); 107 | 108 | timer.start_timer(); 109 | column_type *join_res_raw_data; 110 | u64 join_res_raw_data_mem_size = 111 | total_result_rows * output_arity * sizeof(column_type); 112 | checkCuda( 113 | cudaMalloc((void **)&join_res_raw_data, join_res_raw_data_mem_size)); 114 | checkCuda(cudaMemset(join_res_raw_data, 0, join_res_raw_data_mem_size)); 115 | get_join_result<<>>( 116 | inner_device, outer_device, outer->index_column_size, tuple_generator, 117 | tuple_pred, output_arity, join_res_raw_data, result_counts_array, 118 | result_counts_offset, direction); 119 | checkCuda(cudaGetLastError()); 120 | checkCuda(cudaDeviceSynchronize()); 121 | timer.stop_timer(); 122 | detail_time[2] += timer.get_spent_time(); 123 | checkCuda(cudaFree(result_counts_array)); 124 | checkCuda(cudaFree(result_counts_offset)); 125 | 126 | float load_relation_container_time[5] = {0, 0, 0, 0, 0}; 127 | // // reload newt 128 | // free_relation(output_newt); 129 | // newt don't need index 130 | if (output_rel->newt->tuples == nullptr || 131 | output_rel->newt->tuple_counts == 0) { 132 | if (disable_load) { 133 | return; 134 | } 135 | if (!output_is_tmp) { 136 | load_relation_container( 137 | output_rel->newt, output_arity, join_res_raw_data, 138 | total_result_rows, output_rel->index_column_size, 139 | output_rel->dependent_column_size, 0.8, grid_size, block_size, 140 | load_relation_container_time, true, false, false); 141 | } else { 142 | // temporary relation doesn't need index nor sort 143 | // std::cout << "use tmp >>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << 144 | // std::endl; 145 | load_relation_container( 146 | output_rel->newt, output_arity, join_res_raw_data, 147 | total_result_rows, output_rel->index_column_size, 148 | output_rel->dependent_column_size, 0.8, grid_size, block_size, 149 | load_relation_container_time, true, true, false); 150 | output_rel->newt->tmp_flag = true; 151 | } 152 | checkCuda(cudaDeviceSynchronize()); 153 | detail_time[3] += load_relation_container_time[0]; 154 | detail_time[4] += load_relation_container_time[1]; 155 | detail_time[5] += load_relation_container_time[2]; 156 | // print_tuple_rows(output_rel->newt, "newt after join"); 157 | } else { 158 | // TODO: handle the case out put relation is temp relation 159 | // data in current newt, merge 160 | if (!output_is_tmp) { 161 | GHashRelContainer *newt_tmp = new GHashRelContainer( 162 | output_rel->arity, output_rel->index_column_size, 163 | output_rel->dependent_column_size); 164 | GHashRelContainer *old_newt = output_rel->newt; 165 | load_relation_container( 166 | newt_tmp, output_arity, join_res_raw_data, total_result_rows, 167 | output_rel->index_column_size, 168 | output_rel->dependent_column_size, 0.8, grid_size, block_size, 169 | load_relation_container_time, true, false, false); 170 | detail_time[3] += load_relation_container_time[0]; 171 | detail_time[4] += load_relation_container_time[1]; 172 | detail_time[5] += load_relation_container_time[2]; 173 | // checkCuda(cudaDeviceSynchronize()); 174 | tuple_type *tp_buffer; 175 | u64 tp_buffer_mem_size = 176 | (newt_tmp->tuple_counts + old_newt->tuple_counts) * 177 | sizeof(tuple_type); 178 | checkCuda(cudaMalloc((void **)&tp_buffer, tp_buffer_mem_size)); 179 | cudaMemset(tp_buffer, 0, tp_buffer_mem_size); 180 | timer.start_timer(); 181 | tuple_type *tp_buffer_end = thrust::merge( 182 | thrust::device, newt_tmp->tuples, 183 | newt_tmp->tuples + newt_tmp->tuple_counts, old_newt->tuples, 184 | old_newt->tuples + old_newt->tuple_counts, tp_buffer, 185 | tuple_indexed_less(output_rel->index_column_size, 186 | output_rel->arity)); 187 | // checkCuda(cudaDeviceSynchronize()); 188 | timer.stop_timer(); 189 | detail_time[6] += timer.get_spent_time(); 190 | // cudaFree(newt_tmp->tuples); 191 | // cudaFree(old_newt->tuples); 192 | timer.start_timer(); 193 | tp_buffer_end = 194 | thrust::unique(thrust::device, tp_buffer, tp_buffer_end, 195 | t_equal(output_rel->arity)); 196 | checkCuda(cudaDeviceSynchronize()); 197 | timer.stop_timer(); 198 | detail_time[7] += timer.get_spent_time(); 199 | tuple_size_t new_newt_counts = tp_buffer_end - tp_buffer; 200 | // std::cout << " >>>>>>>>>> " << new_newt_counts * 201 | // output_rel->arity * sizeof(column_type) << std::endl; 202 | 203 | timer.start_timer(); 204 | column_type *new_newt_raw; 205 | u64 new_newt_raw_mem_size = 206 | new_newt_counts * output_rel->arity * sizeof(column_type); 207 | checkCuda( 208 | cudaMalloc((void **)&new_newt_raw, new_newt_raw_mem_size)); 209 | checkCuda(cudaMemset(new_newt_raw, 0, new_newt_raw_mem_size)); 210 | flatten_tuples_raw_data<<>>( 211 | tp_buffer, new_newt_raw, new_newt_counts, output_rel->arity); 212 | checkCuda(cudaGetLastError()); 213 | checkCuda(cudaDeviceSynchronize()); 214 | timer.stop_timer(); 215 | detail_time[4] += timer.get_spent_time(); 216 | checkCuda(cudaFree(tp_buffer)); 217 | free_relation_container(old_newt); 218 | free_relation_container(newt_tmp); 219 | // TODO: free newt_tmp pointer 220 | load_relation_container( 221 | output_rel->newt, output_arity, new_newt_raw, new_newt_counts, 222 | output_rel->index_column_size, 223 | output_rel->dependent_column_size, 0.8, grid_size, block_size, 224 | load_relation_container_time, true, true, false); 225 | checkCuda(cudaDeviceSynchronize()); 226 | } else { 227 | // output relation is tmp relation, directly merge without sort 228 | GHashRelContainer *old_newt = output_rel->newt; 229 | column_type *newt_tmp_raw; 230 | u64 newt_tmp_raw_mem_size = 231 | (old_newt->tuple_counts + total_result_rows) * 232 | output_rel->arity * sizeof(column_type); 233 | tuple_size_t new_newt_counts = 234 | old_newt->tuple_counts + total_result_rows; 235 | checkCuda( 236 | cudaMalloc((void **)&newt_tmp_raw, newt_tmp_raw_mem_size)); 237 | checkCuda(cudaMemcpy(newt_tmp_raw, old_newt->data_raw, 238 | old_newt->tuple_counts * old_newt->arity * 239 | sizeof(column_type), 240 | cudaMemcpyDeviceToDevice)); 241 | checkCuda(cudaMemcpy( 242 | &(newt_tmp_raw[old_newt->tuple_counts * old_newt->arity]), 243 | join_res_raw_data, 244 | total_result_rows * output_rel->arity * sizeof(column_type), 245 | cudaMemcpyDeviceToDevice)); 246 | free_relation_container(old_newt); 247 | checkCuda(cudaFree(join_res_raw_data)); 248 | load_relation_container( 249 | output_rel->newt, output_arity, newt_tmp_raw, new_newt_counts, 250 | output_rel->index_column_size, 251 | output_rel->dependent_column_size, 0.8, grid_size, block_size, 252 | load_relation_container_time, true, true, false); 253 | checkCuda(cudaDeviceSynchronize()) 254 | } 255 | 256 | detail_time[3] += load_relation_container_time[0]; 257 | detail_time[4] += load_relation_container_time[1]; 258 | detail_time[5] += load_relation_container_time[2]; 259 | // print_tuple_rows(output_rel->newt, "join merge newt"); 260 | // delete newt_tmp; 261 | } 262 | 263 | // print_tuple_rows(output_rel->newt, "output_newtr"); 264 | // checkCuda(cudaDeviceSynchronize()); 265 | // std::cout << output_rel->name << " join result size " << 266 | // output_rel->newt->tuple_counts < 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | void LIE::add_ra(ra_op op) { ra_ops.push_back(op); } 14 | 15 | void LIE::add_relations(Relation *rel, bool static_flag) { 16 | if (static_flag) { 17 | static_relations.push_back(rel); 18 | } else { 19 | update_relations.push_back(rel); 20 | // add delta and newt for it 21 | } 22 | } 23 | 24 | void LIE::add_tmp_relation(Relation *rel) { tmp_relations.push_back(rel); } 25 | 26 | void LIE::fixpoint_loop() { 27 | 28 | int iteration_counter = 0; 29 | float join_time = 0; 30 | float merge_time = 0; 31 | float rebuild_time = 0; 32 | float flatten_time = 0; 33 | float set_diff_time = 0; 34 | float rebuild_delta_time = 0; 35 | float flatten_full_time = 0; 36 | float memory_alloc_time = 0; 37 | 38 | float join_get_size_time = 0; 39 | float join_get_result_time = 0; 40 | float rebuild_newt_time = 0; 41 | KernelTimer timer; 42 | 43 | float rebuild_rel_sort_time = 0; 44 | float rebuild_rel_unique_time = 0; 45 | float rebuild_rel_index_time = 0; 46 | 47 | // std::cout << "start lie .... " << std::endl; 48 | // init full tuple buffer for all relation involved 49 | for (Relation *rel : update_relations) { 50 | checkCuda(cudaMalloc((void **)&rel->tuple_full, 51 | rel->full->tuple_counts * sizeof(tuple_type))); 52 | checkCuda(cudaMemcpy(rel->tuple_full, rel->full->tuples, 53 | rel->full->tuple_counts * sizeof(tuple_type), 54 | cudaMemcpyDeviceToDevice)); 55 | rel->current_full_size = rel->full->tuple_counts; 56 | copy_relation_container(rel->delta, rel->full, grid_size, block_size); 57 | checkCuda(cudaDeviceSynchronize()); 58 | // std::cout << "wwwwwwwwww" << rel->delta->tuple_counts << std::endl; 59 | } 60 | 61 | while (true) { 62 | for (auto &ra_op : ra_ops) { 63 | timer.start_timer(); 64 | std::visit(dynamic_dispatch{[](RelationalJoin &op) { 65 | // timer.start_timer(); 66 | op(); 67 | }, 68 | [](RelationalACopy &op) { op(); }, 69 | [](RelationalCopy &op) { 70 | if (op.src_ver == FULL) { 71 | if (!op.copied) { 72 | op(); 73 | op.copied = true; 74 | } 75 | } else { 76 | op(); 77 | } 78 | }}, 79 | ra_op); 80 | timer.stop_timer(); 81 | join_time += timer.get_spent_time(); 82 | } 83 | 84 | // clean tmp relation 85 | for (Relation *rel : tmp_relations) { 86 | free_relation_container(rel->newt); 87 | } 88 | 89 | // std::cout << "Iteration " << iteration_counter 90 | // << " popluating new tuple" << std::endl; 91 | // merge delta into full 92 | bool fixpoint_flag = true; 93 | for (Relation *rel : update_relations) { 94 | // std::cout << rel->name << std::endl; 95 | // if (rel->newt->tuple_counts != 0) { 96 | // fixpoint_flag = false; 97 | // } 98 | if (iteration_counter == 0) { 99 | free_relation_container(rel->delta); 100 | } 101 | // drop the index of delta once merged, because it won't be used in 102 | // next iter when migrate more general case, this operation need to 103 | // be put off to end of all RA operation in current iteration 104 | if (rel->delta->index_map != nullptr) { 105 | checkCuda(cudaFree(rel->delta->index_map)); 106 | rel->delta->index_map = nullptr; 107 | } 108 | if (rel->delta->tuples != nullptr) { 109 | checkCuda(cudaFree(rel->delta->tuples)); 110 | rel->delta->tuples = nullptr; 111 | } 112 | 113 | timer.start_timer(); 114 | if (rel->newt->tuple_counts == 0) { 115 | rel->delta = 116 | new GHashRelContainer(rel->arity, rel->index_column_size, 117 | rel->dependent_column_size); 118 | // std::cout << "iteration " << iteration_counter << " relation " 119 | // << rel->name << " no new tuple added" << std::endl; 120 | continue; 121 | } 122 | tuple_type *deduplicated_newt_tuples; 123 | u64 deduplicated_newt_tuples_mem_size = 124 | rel->newt->tuple_counts * sizeof(tuple_type); 125 | checkCuda(cudaMalloc((void **)&deduplicated_newt_tuples, 126 | deduplicated_newt_tuples_mem_size)); 127 | checkCuda(cudaMemset(deduplicated_newt_tuples, 0, 128 | deduplicated_newt_tuples_mem_size)); 129 | ////// 130 | 131 | tuple_type *deuplicated_end = thrust::set_difference( 132 | thrust::device, rel->newt->tuples, 133 | rel->newt->tuples + rel->newt->tuple_counts, rel->tuple_full, 134 | rel->tuple_full + rel->current_full_size, 135 | deduplicated_newt_tuples, 136 | tuple_indexed_less(rel->full->index_column_size, 137 | rel->full->arity - 138 | rel->dependent_column_size)); 139 | // checkCuda(cudaDeviceSynchronize()); 140 | tuple_size_t deduplicate_size = 141 | deuplicated_end - deduplicated_newt_tuples; 142 | 143 | if (deduplicate_size != 0) { 144 | fixpoint_flag = false; 145 | } 146 | timer.stop_timer(); 147 | set_diff_time += timer.get_spent_time(); 148 | 149 | column_type *deduplicated_raw; 150 | u64 dedeuplicated_raw_mem_size = 151 | deduplicate_size * rel->newt->arity * sizeof(column_type); 152 | checkCuda(cudaMalloc((void **)&deduplicated_raw, 153 | dedeuplicated_raw_mem_size)); 154 | checkCuda( 155 | cudaMemset(deduplicated_raw, 0, dedeuplicated_raw_mem_size)); 156 | flatten_tuples_raw_data<<>>( 157 | deduplicated_newt_tuples, deduplicated_raw, deduplicate_size, 158 | rel->newt->arity); 159 | checkCuda(cudaGetLastError()); 160 | checkCuda(cudaDeviceSynchronize()); 161 | checkCuda(cudaFree(deduplicated_newt_tuples)); 162 | 163 | free_relation_container(rel->newt); 164 | 165 | timer.start_timer(); 166 | float load_detail_time[5] = {0, 0, 0, 0, 0}; 167 | rel->delta = new GHashRelContainer( 168 | rel->arity, rel->index_column_size, rel->dependent_column_size); 169 | load_relation_container( 170 | rel->delta, rel->full->arity, deduplicated_raw, 171 | deduplicate_size, rel->full->index_column_size, 172 | rel->full->dependent_column_size, 173 | rel->full->index_map_load_factor, grid_size, block_size, 174 | load_detail_time, true, true, true); 175 | // checkCuda(cudaDeviceSynchronize()); 176 | timer.stop_timer(); 177 | rebuild_delta_time += timer.get_spent_time(); 178 | rebuild_rel_sort_time += load_detail_time[0]; 179 | rebuild_rel_unique_time += load_detail_time[1]; 180 | rebuild_rel_index_time += load_detail_time[2]; 181 | 182 | // auto old_full = rel->tuple_full; 183 | float flush_detail_time[5] = {0, 0, 0, 0, 0}; 184 | timer.start_timer(); 185 | rel->flush_delta(grid_size, block_size, flush_detail_time); 186 | timer.stop_timer(); 187 | merge_time += flush_detail_time[1]; 188 | memory_alloc_time += flush_detail_time[0]; 189 | memory_alloc_time += flush_detail_time[2]; 190 | // checkCuda(cudaFree(old_full)); 191 | 192 | // print_tuple_rows(rel->full, "Path full after load newt"); 193 | // std::cout << "iteration " << iteration_counter << " relation " 194 | // << rel->name 195 | // << " finish dedup new tuples : " << deduplicate_size 196 | // << " delta tuple size: " << rel->delta->tuple_counts 197 | // << " full counts " << rel->current_full_size << std::endl; 198 | } 199 | checkCuda(cudaDeviceSynchronize()); 200 | // std::cout << "Iteration " << iteration_counter << " finish populating" 201 | // << std::endl; 202 | 203 | iteration_counter++; 204 | // if (iteration_counter >= 3) { 205 | // break; 206 | // } 207 | 208 | if (fixpoint_flag || iteration_counter > max_iteration) { 209 | // print_memory_usage(); 210 | // std::cout << "Iteration : " << iteration_counter 211 | // << "Join time: " << join_time 212 | // << " ; merge full time: " << merge_time 213 | // << " ; memory alloc time: " << memory_alloc_time 214 | // << " ; rebuild delta time: " << rebuild_delta_time 215 | // << " ; set diff time: " << set_diff_time << std::endl; 216 | break; 217 | } 218 | } 219 | // merge full after reach fixpoint 220 | timer.start_timer(); 221 | if (reload_full_flag) { 222 | // std::cout << "Start merge full" << std::endl; 223 | for (Relation *rel : update_relations) { 224 | // if (rel->current_full_size <= rel->full->tuple_counts) { 225 | // continue; 226 | // } 227 | column_type *new_full_raw_data; 228 | u64 new_full_raw_data_mem_size = 229 | rel->current_full_size * rel->full->arity * sizeof(column_type); 230 | checkCuda(cudaMalloc((void **)&new_full_raw_data, 231 | new_full_raw_data_mem_size)); 232 | checkCuda(cudaMemset(new_full_raw_data, 0, new_full_raw_data_mem_size)); 233 | flatten_tuples_raw_data<<>>( 234 | rel->tuple_full, new_full_raw_data, rel->current_full_size, 235 | rel->full->arity); 236 | checkCuda(cudaGetLastError()); 237 | checkCuda(cudaDeviceSynchronize()); 238 | // cudaFree(tuple_merge_buffer); 239 | float load_detail_time[5] = {0, 0, 0, 0, 0}; 240 | load_relation_container( 241 | rel->full, rel->full->arity, new_full_raw_data, 242 | rel->current_full_size, rel->full->index_column_size, 243 | rel->full->dependent_column_size, rel->full->index_map_load_factor, 244 | grid_size, block_size, load_detail_time, true, true, true); 245 | checkCuda(cudaDeviceSynchronize()); 246 | rebuild_rel_sort_time += load_detail_time[0]; 247 | rebuild_rel_unique_time += load_detail_time[1]; 248 | rebuild_rel_index_time += load_detail_time[2]; 249 | // std::cout << "Finished! " << rel->name << " has " 250 | // << rel->full->tuple_counts << std::endl; 251 | for (auto &delta_b : rel->buffered_delta_vectors) { 252 | free_relation_container(delta_b); 253 | } 254 | free_relation_container(rel->delta); 255 | free_relation_container(rel->newt); 256 | } 257 | } else { 258 | // for (Relation *rel : update_relations) { 259 | // std::cout << "Finished! " << rel->name << " has " 260 | // << rel->full->tuple_counts << std::endl; 261 | // } 262 | } 263 | timer.stop_timer(); 264 | float merge_full_time = timer.get_spent_time(); 265 | 266 | std::cout << " memory alloc time: " << memory_alloc_time 267 | << " ; Join time: " << join_time 268 | << " ; merge full time: " << merge_time 269 | << " ; rebuild full time: " << merge_full_time 270 | << " ; rebuild delta time: " << rebuild_delta_time 271 | << " ; set diff time: " << set_diff_time << std::endl; 272 | std::cout << "Rebuild relation detail time : rebuild rel sort time: " 273 | << rebuild_rel_sort_time 274 | << " ; rebuild rel unique time: " << rebuild_rel_unique_time 275 | << " ; rebuild rel index time: " << rebuild_rel_index_time 276 | << std::endl; 277 | } 278 | -------------------------------------------------------------------------------- /src/print.cu: -------------------------------------------------------------------------------- 1 | #include "../include/print.cuh" 2 | #include 3 | #include 4 | 5 | void print_hashes(GHashRelContainer *target, const char *rel_name) { 6 | MEntity *host_map; 7 | cudaMallocHost((void **)&host_map, 8 | target->index_map_size * sizeof(MEntity)); 9 | cudaMemcpy(host_map, target->index_map, 10 | target->index_map_size * sizeof(MEntity), 11 | cudaMemcpyDeviceToHost); 12 | std::cout << "Relation hash >>> " << rel_name << std::endl; 13 | for (tuple_size_t i = 0; i < target->index_map_size; i++) { 14 | std::cout << host_map[i].key << " " << host_map[i].value 15 | << std::endl; 16 | } 17 | std::cout << "end <<<" << std::endl; 18 | cudaFreeHost(host_map); 19 | } 20 | 21 | void print_tuple_rows(GHashRelContainer* target, const char *rel_name) { 22 | // sort first 23 | tuple_type* natural_ordered; 24 | cudaMalloc((void**) &natural_ordered, target->tuple_counts * sizeof(tuple_type)); 25 | cudaMemcpy(natural_ordered, target->tuples, target->tuple_counts * sizeof(tuple_type), 26 | cudaMemcpyDeviceToDevice); 27 | thrust::sort(thrust::device, natural_ordered, natural_ordered+target->tuple_counts, 28 | tuple_weak_less(target->arity)); 29 | 30 | tuple_type* tuples_host; 31 | cudaMallocHost((void**) &tuples_host, target->tuple_counts * sizeof(tuple_type)); 32 | cudaMemcpy(tuples_host, natural_ordered, target->tuple_counts * sizeof(tuple_type), 33 | cudaMemcpyDeviceToHost); 34 | std::cout << "Relation tuples >>> " << rel_name << std::endl; 35 | std::cout << "Total tuples counts: " << target->tuple_counts << std::endl; 36 | u32 pt_size = target->tuple_counts; 37 | if (target->tuple_counts > 3000) { 38 | pt_size = 3000; 39 | } 40 | for (tuple_size_t i = 0; i < pt_size; i++) { 41 | tuple_type cur_tuple = tuples_host[i]; 42 | 43 | tuple_type cur_tuple_host; 44 | cudaMallocHost((void**) &cur_tuple_host, target->arity * sizeof(column_type)); 45 | cudaMemcpy(cur_tuple_host, cur_tuple, target->arity * sizeof(column_type), 46 | cudaMemcpyDeviceToHost); 47 | // if (cur_tuple_host[0] != 1966) { 48 | // continue; 49 | // } 50 | for (int j = 0; j < target->arity; j++) { 51 | 52 | std::cout << cur_tuple_host[j] << "\t"; 53 | } 54 | std::cout << std::endl; 55 | cudaFreeHost(cur_tuple_host); 56 | } 57 | if (target->tuple_counts > 3000) { 58 | std::cout << "........." << std::endl; 59 | } 60 | std::cout << "end <<<" << std::endl; 61 | 62 | cudaFreeHost(tuples_host); 63 | cudaFree(natural_ordered); 64 | } 65 | 66 | void print_tuple_raw_data(GHashRelContainer* target, const char *rel_name) { 67 | column_type* raw_data_host; 68 | u64 mem_raw = target->data_raw_row_size * target->arity * sizeof(column_type); 69 | cudaMallocHost((void**) &raw_data_host, mem_raw); 70 | cudaMemcpy(raw_data_host, target->data_raw, mem_raw, cudaMemcpyDeviceToHost); 71 | std::cout << "Relation raw tuples >>> " << rel_name << std::endl; 72 | std::cout << "Total raw tuples counts: " << target->data_raw_row_size << std::endl; 73 | for (tuple_size_t i = 0; i < target->data_raw_row_size; i++) { 74 | if (raw_data_host[i*target->arity] != 3) { 75 | continue; 76 | } 77 | for (int j = 0; j < target->arity; j++) { 78 | std::cout << raw_data_host[i*target->arity + j] << " "; 79 | } 80 | std::cout << std::endl; 81 | } 82 | cudaFreeHost(raw_data_host); 83 | } 84 | 85 | void print_memory_usage(){ 86 | int num_gpus; 87 | size_t free, total; 88 | cudaGetDeviceCount( &num_gpus ); 89 | for ( int gpu_id = 0; gpu_id < num_gpus; gpu_id++ ) { 90 | cudaSetDevice( gpu_id ); 91 | int id; 92 | cudaGetDevice( &id ); 93 | cudaMemGetInfo( &free, &total ); 94 | std::cout << "GPU " << id << " memory: free=" << free << ", total=" << total << std::endl; 95 | } 96 | } 97 | 98 | tuple_size_t get_free_memory() { 99 | int num_gpus; 100 | size_t free, total; 101 | cudaGetDeviceCount( &num_gpus ); 102 | for ( int gpu_id = 0; gpu_id < num_gpus; gpu_id++ ) { 103 | cudaSetDevice( gpu_id ); 104 | int id; 105 | cudaGetDevice( &id ); 106 | cudaMemGetInfo( &free, &total ); 107 | return free; 108 | } 109 | return 0; 110 | } 111 | 112 | tuple_size_t get_total_memory() { 113 | int num_gpus; 114 | size_t free, total; 115 | cudaGetDeviceCount( &num_gpus ); 116 | for ( int gpu_id = 0; gpu_id < num_gpus; gpu_id++ ) { 117 | cudaSetDevice( gpu_id ); 118 | int id; 119 | cudaGetDevice( &id ); 120 | cudaMemGetInfo( &free, &total ); 121 | return total; 122 | } 123 | return 0; 124 | } 125 | 126 | void print_tuple_list(tuple_type* tuples, tuple_size_t rows, tuple_size_t arity) { 127 | tuple_type* tuples_host; 128 | cudaMallocHost((void**) &tuples_host, rows * sizeof(tuple_type)); 129 | cudaMemcpy(tuples_host, tuples, rows * sizeof(tuple_type), 130 | cudaMemcpyDeviceToHost); 131 | if (rows > 100) { 132 | rows = 100; 133 | } 134 | for (tuple_size_t i = 0; i < rows; i++) { 135 | tuple_type cur_tuple = tuples_host[i]; 136 | 137 | tuple_type cur_tuple_host; 138 | cudaMallocHost((void**) &cur_tuple_host, arity * sizeof(column_type)); 139 | cudaMemcpy(cur_tuple_host, cur_tuple, arity * sizeof(column_type), 140 | cudaMemcpyDeviceToHost); 141 | for (tuple_size_t j = 0; j < arity; j++) { 142 | std::cout << cur_tuple_host[j] << " "; 143 | } 144 | std::cout << std::endl; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/tuple.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "../include/exception.cuh" 3 | #include "../include/tuple.cuh" 4 | #include 5 | 6 | __global__ void extract_column(tuple_type *tuples, tuple_size_t rows, 7 | tuple_size_t k, column_type *column) { 8 | int index = (blockIdx.x * blockDim.x) + threadIdx.x; 9 | if (index >= rows) 10 | return; 11 | 12 | int stride = blockDim.x * gridDim.x; 13 | for (tuple_size_t i = index; i < rows; i += stride) { 14 | column[i] = tuples[i][k]; 15 | } 16 | } 17 | 18 | __global__ void compute_hash(tuple_type *tuples, tuple_size_t rows, 19 | tuple_size_t index_column_size, 20 | column_type *hashes) { 21 | int index = (blockIdx.x * blockDim.x) + threadIdx.x; 22 | if (index >= rows) 23 | return; 24 | 25 | int stride = blockDim.x * gridDim.x; 26 | for (tuple_size_t i = index; i < rows; i += stride) { 27 | hashes[i] = (column_type)prefix_hash(tuples[i], index_column_size); 28 | } 29 | } 30 | 31 | void sort_tuples(tuple_type *tuples, tuple_size_t rows, tuple_size_t arity, 32 | tuple_size_t index_column_size, int grid_size, 33 | int block_size) { 34 | 35 | column_type *col_tmp; 36 | cudaMalloc((void **)&col_tmp, rows * sizeof(column_type)); 37 | for (int k = arity - 1; k >= 0; k--) { 38 | extract_column<<>>(tuples, rows, k, col_tmp); 39 | checkCuda(cudaGetLastError()); 40 | checkCuda(cudaDeviceSynchronize()); 41 | thrust::stable_sort_by_key(thrust::device, col_tmp, col_tmp + rows, 42 | tuples); 43 | checkCuda(cudaDeviceSynchronize()); 44 | } 45 | cudaFree(col_tmp); 46 | } 47 | 48 | void sort_tuple_by_hash(tuple_type *tuples, tuple_size_t rows, 49 | tuple_size_t arity, tuple_size_t index_column_size, 50 | int grid_size, int block_size) { 51 | column_type *col_tmp; 52 | cudaMalloc((void **)&col_tmp, rows * sizeof(column_type)); 53 | compute_hash<<>>(tuples, rows, index_column_size, 54 | col_tmp); 55 | checkCuda(cudaGetLastError()); 56 | checkCuda(cudaDeviceSynchronize()); 57 | thrust::stable_sort_by_key(thrust::device, col_tmp, col_tmp + rows, tuples); 58 | checkCuda(cudaDeviceSynchronize()); 59 | cudaFree(col_tmp); 60 | } 61 | -------------------------------------------------------------------------------- /test/cspa.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "../include/exception.cuh" 12 | #include "../include/lie.cuh" 13 | #include "../include/print.cuh" 14 | #include "../include/timer.cuh" 15 | 16 | ////////////////////////////////////////////////////// 17 | 18 | long int get_row_size(const char *data_path) { 19 | std::ifstream f; 20 | f.open(data_path); 21 | char c; 22 | long i = 0; 23 | while (f.get(c)) 24 | if (c == '\n') 25 | ++i; 26 | f.close(); 27 | return i; 28 | } 29 | 30 | enum ColumnT { U64, U32 }; 31 | 32 | column_type *get_relation_from_file(const char *file_path, int total_rows, 33 | int total_columns, char separator, 34 | ColumnT ct) { 35 | column_type *data = 36 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 37 | FILE *data_file = fopen(file_path, "r"); 38 | for (int i = 0; i < total_rows; i++) { 39 | for (int j = 0; j < total_columns; j++) { 40 | if (j != (total_columns - 1)) { 41 | if (ct == U64) { 42 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 43 | &separator); 44 | } else { 45 | fscanf(data_file, "%ld%c", &data[(i * total_columns) + j], 46 | &separator); 47 | } 48 | } else { 49 | if (ct == U64) { 50 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 51 | } else { 52 | fscanf(data_file, "%ld", &data[(i * total_columns) + j]); 53 | } 54 | } 55 | } 56 | } 57 | return data; 58 | } 59 | 60 | ////////////////////////////////////////////////////////////////// 61 | 62 | __device__ void cp_2_1__1(tuple_type input, tuple_type outpt) { 63 | outpt[0] = input[0]; 64 | outpt[1] = input[0]; 65 | }; 66 | __device__ tuple_copy_hook cp_2_1__1_device = cp_2_1__1; 67 | __device__ void cp_2_1__2(tuple_type input, tuple_type outpt) { 68 | outpt[0] = input[1]; 69 | outpt[1] = input[1]; 70 | }; 71 | __device__ tuple_copy_hook cp_2_1__2_device = cp_2_1__2; 72 | 73 | __device__ void cp_2_1__1_2(tuple_type input, tuple_type outpt) { 74 | outpt[0] = input[1]; 75 | outpt[1] = input[0]; 76 | }; 77 | __device__ tuple_copy_hook cp_2_1__1_2_device = cp_2_1__1_2; 78 | __device__ void cp_2_1__2_1(tuple_type input, tuple_type outpt) { 79 | outpt[0] = input[0]; 80 | outpt[1] = input[1]; 81 | }; 82 | __device__ tuple_copy_hook cp_2_1__2_1_device = cp_2_1__2_1; 83 | 84 | __device__ void join_10_11(tuple_type inner, tuple_type outer, 85 | tuple_type output) { 86 | output[1] = inner[1]; 87 | output[0] = outer[1]; 88 | } 89 | __device__ tuple_generator_hook join_10_11_device = join_10_11; 90 | 91 | __device__ void join_01_11(tuple_type inner, tuple_type outer, 92 | tuple_type output) { 93 | output[0] = inner[1]; 94 | output[1] = outer[1]; 95 | } 96 | __device__ tuple_generator_hook join_01_11_device = join_01_11; 97 | 98 | //////////////////////////////////////////////////////////////// 99 | 100 | void analysis_bench(const char *dataset_path, int block_size, int grid_size) { 101 | KernelTimer timer; 102 | int relation_columns = 2; 103 | std::chrono::high_resolution_clock::time_point time_point_begin; 104 | std::chrono::high_resolution_clock::time_point time_point_end; 105 | 106 | double spent_time; 107 | 108 | // load the input relation 109 | std::stringstream assign_fact_ss; 110 | assign_fact_ss << dataset_path << "/assign.facts"; 111 | std::stringstream dereference_fact_ss; 112 | dereference_fact_ss << dataset_path << "/dereference.facts"; 113 | // std::cout << assign_fact_ss.str() << std::endl; 114 | tuple_size_t assign_counts = get_row_size(assign_fact_ss.str().c_str()); 115 | std::cout << "Input assign rows: " << assign_counts << std::endl; 116 | column_type *raw_assign_data = get_relation_from_file( 117 | assign_fact_ss.str().c_str(), assign_counts, 2, '\t', U32); 118 | std::cout << "reversing assign ... " << std::endl; 119 | column_type *raw_reverse_assign_data = 120 | (column_type *)malloc(assign_counts * 2 * sizeof(column_type)); 121 | for (tuple_size_t i = 0; i < assign_counts; i++) { 122 | raw_reverse_assign_data[i * 2 + 1] = raw_assign_data[i * 2]; 123 | raw_reverse_assign_data[i * 2] = raw_assign_data[i * 2 + 1]; 124 | } 125 | 126 | tuple_size_t dereference_counts = 127 | get_row_size(dereference_fact_ss.str().c_str()); 128 | std::cout << "Input dereference rows: " << dereference_counts << std::endl; 129 | column_type *raw_dereference_data = get_relation_from_file( 130 | dereference_fact_ss.str().c_str(), dereference_counts, 2, '\t', U32); 131 | std::cout << "reversing dereference ... " << std::endl; 132 | column_type *raw_reverse_dereference_data = 133 | (column_type *)malloc(dereference_counts * 2 * sizeof(column_type)); 134 | for (tuple_size_t i = 0; i < dereference_counts; i++) { 135 | raw_reverse_dereference_data[i * 2 + 1] = raw_dereference_data[i * 2]; 136 | raw_reverse_dereference_data[i * 2] = raw_dereference_data[i * 2 + 1]; 137 | } 138 | 139 | timer.start_timer(); 140 | 141 | Relation *assign_2__2_1 = new Relation(); 142 | load_relation(assign_2__2_1, "assign_2__2_1", 2, raw_reverse_assign_data, 143 | assign_counts, 1, 0, grid_size, block_size); 144 | 145 | Relation *dereference_2__1_2 = new Relation(); 146 | load_relation(dereference_2__1_2, "dereference_2__1_2", 2, 147 | raw_dereference_data, dereference_counts, 1, 0, grid_size, 148 | block_size); 149 | Relation *dereference_2__2_1 = new Relation(); 150 | load_relation(dereference_2__2_1, "dereference_2__2_1", 2, 151 | raw_reverse_dereference_data, dereference_counts, 1, 0, 152 | grid_size, block_size); 153 | timer.stop_timer(); 154 | std::cout << "Build hash table time: " << timer.get_spent_time() 155 | << std::endl; 156 | 157 | // scc init 158 | Relation *value_flow_2__1_2 = new Relation(); 159 | load_relation(value_flow_2__1_2, "value_flow_2__1_2", 2, nullptr, 0, 1, 0, 160 | grid_size, block_size); 161 | Relation *value_flow_2__2_1 = new Relation(); 162 | load_relation(value_flow_2__2_1, "value_flow_2__2_1", 2, nullptr, 0, 1, 0, 163 | grid_size, block_size); 164 | 165 | Relation *memory_alias_2__1_2 = new Relation(); 166 | load_relation(memory_alias_2__1_2, "memory_alias_2__1_2", 2, nullptr, 0, 1, 167 | 0, grid_size, block_size); 168 | Relation *memory_alias_2__2_1 = new Relation(); 169 | load_relation(memory_alias_2__2_1, "memory_alias_2__2_1", 2, nullptr, 0, 1, 170 | 0, grid_size, block_size); 171 | 172 | timer.start_timer(); 173 | time_point_begin = std::chrono::high_resolution_clock::now(); 174 | LIE init_scc(grid_size, block_size); 175 | init_scc.add_relations(value_flow_2__1_2, false); 176 | init_scc.add_relations(value_flow_2__2_1, false); 177 | init_scc.add_relations(memory_alias_2__1_2, false); 178 | init_scc.add_relations(memory_alias_2__2_1, false); 179 | init_scc.add_relations(assign_2__2_1, true); 180 | tuple_copy_hook cp_2_1__1_host; 181 | checkCuda(cudaMemcpyFromSymbol(&cp_2_1__1_host, cp_2_1__1_device, 182 | sizeof(tuple_copy_hook))); 183 | tuple_copy_hook cp_2_1__2_host; 184 | checkCuda(cudaMemcpyFromSymbol(&cp_2_1__2_host, cp_2_1__2_device, 185 | sizeof(tuple_copy_hook))); 186 | tuple_copy_hook cp_2_1__1_2_host; 187 | checkCuda(cudaMemcpyFromSymbol(&cp_2_1__1_2_host, cp_2_1__1_2_device, 188 | sizeof(tuple_copy_hook))); 189 | tuple_copy_hook cp_2_1__2_1_host; 190 | checkCuda(cudaMemcpyFromSymbol(&cp_2_1__1_host, cp_2_1__1_device, 191 | sizeof(tuple_copy_hook))); 192 | init_scc.add_ra(RelationalCopy(assign_2__2_1, FULL, value_flow_2__1_2, 193 | cp_2_1__1_host, nullptr, grid_size, 194 | block_size)); 195 | init_scc.add_ra(RelationalCopy(assign_2__2_1, FULL, value_flow_2__1_2, 196 | cp_2_1__2_host, nullptr, grid_size, 197 | block_size)); 198 | init_scc.add_ra(RelationalCopy(assign_2__2_1, FULL, value_flow_2__1_2, 199 | cp_2_1__1_2_host, nullptr, grid_size, 200 | block_size)); 201 | 202 | init_scc.add_ra(RelationalCopy(assign_2__2_1, FULL, memory_alias_2__1_2, 203 | cp_2_1__1_host, nullptr, grid_size, 204 | block_size)); 205 | init_scc.add_ra(RelationalCopy(assign_2__2_1, FULL, memory_alias_2__1_2, 206 | cp_2_1__2_host, nullptr, grid_size, 207 | block_size)); 208 | 209 | init_scc.add_ra(RelationalCopy(value_flow_2__1_2, DELTA, value_flow_2__2_1, 210 | cp_2_1__1_2_host, nullptr, grid_size, 211 | block_size)); 212 | init_scc.add_ra(RelationalCopy(memory_alias_2__1_2, DELTA, memory_alias_2__2_1, 213 | cp_2_1__1_2_host, nullptr, grid_size, 214 | block_size)); 215 | init_scc.fixpoint_loop(); 216 | 217 | timer.stop_timer(); 218 | time_point_end = std::chrono::high_resolution_clock::now(); 219 | std::cout << "init scc time: " << timer.get_spent_time() << std::endl; 220 | std::cout << "init scc time (chono): " 221 | << std::chrono::duration_cast( 222 | time_point_end - time_point_begin) 223 | .count() 224 | << std::endl; 225 | 226 | // scc analysis 227 | Relation *value_flow_forward_2__1_2 = new Relation(); 228 | load_relation(value_flow_forward_2__1_2, "value_flow_forward_2__1_2", 2, 229 | nullptr, 0, 1, 0, grid_size, block_size); 230 | 231 | Relation *value_flow_forward_2__2_1 = new Relation(); 232 | load_relation(value_flow_forward_2__2_1, "value_flow_forward_2__2_1", 2, 233 | nullptr, 0, 1, 0, grid_size, block_size); 234 | 235 | Relation *value_alias_2__1_2 = new Relation(); 236 | value_alias_2__1_2->index_flag = false; 237 | load_relation(value_alias_2__1_2, "value_alias_2__1_2", 2, nullptr, 0, 1, 0, 238 | grid_size, block_size); 239 | 240 | Relation *tmp_rel_def = new Relation(); 241 | tmp_rel_def->index_flag = false; 242 | load_relation(tmp_rel_def, "tmp_rel_def", 2, nullptr, 0, 1, 0, grid_size, 243 | block_size); 244 | Relation *tmp_rel_ma1 = new Relation(); 245 | tmp_rel_ma1->index_flag = false; 246 | load_relation(tmp_rel_ma1, "tmp_rel_ma1", 2, nullptr, 0, 1, 0, grid_size, 247 | block_size, true); 248 | Relation *tmp_rel_ma2 = new Relation(); 249 | tmp_rel_ma2->index_flag = false; 250 | load_relation(tmp_rel_ma2, "tmp_rel_ma2", 2, nullptr, 0, 1, 0, grid_size, 251 | block_size, true); 252 | 253 | LIE analysis_scc(grid_size, block_size); 254 | 255 | analysis_scc.add_relations(assign_2__2_1, true); 256 | analysis_scc.add_relations(dereference_2__1_2, true); 257 | analysis_scc.add_relations(dereference_2__2_1, true); 258 | 259 | analysis_scc.add_relations(value_flow_2__1_2, false); 260 | analysis_scc.add_relations(value_flow_2__2_1, false); 261 | analysis_scc.add_relations(memory_alias_2__1_2, false); 262 | analysis_scc.add_relations(memory_alias_2__2_1, false); 263 | analysis_scc.add_relations(value_alias_2__1_2, false); 264 | 265 | // join order matters for temp! 266 | analysis_scc.add_tmp_relation(tmp_rel_def); 267 | analysis_scc.add_tmp_relation(tmp_rel_ma1); 268 | analysis_scc.add_tmp_relation(tmp_rel_ma2); 269 | 270 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 271 | 272 | // join_vf_vfvf: ValueFlow(x, y) :- ValueFlow(x, z), ValueFlow(z, y). 273 | tuple_generator_hook join_10_11_host; 274 | checkCuda(cudaMemcpyFromSymbol(&join_10_11_host, join_10_11_device, 275 | sizeof(tuple_generator_hook))); 276 | tuple_generator_hook join_01_11_host; 277 | checkCuda(cudaMemcpyFromSymbol(&join_01_11_host, join_01_11_device, 278 | sizeof(tuple_generator_hook))); 279 | analysis_scc.add_ra( 280 | RelationalJoin(value_flow_2__1_2, FULL, value_flow_2__2_1, DELTA, 281 | value_flow_2__1_2, join_10_11_host, nullptr, LEFT, 282 | grid_size, block_size, join_detail)); 283 | analysis_scc.add_ra( 284 | RelationalJoin(value_flow_2__2_1, FULL, value_flow_2__1_2, DELTA, 285 | value_flow_2__1_2, join_01_11_host, nullptr, LEFT, 286 | grid_size, block_size, join_detail)); 287 | 288 | // join_va_vf_vf: ValueAlias(x, y) :- ValueFlow(z, x), ValueFlow(z, y). 289 | // v1 290 | analysis_scc.add_ra( 291 | RelationalJoin(value_flow_2__1_2, FULL, value_flow_2__1_2, DELTA, 292 | value_alias_2__1_2, join_01_11_host, nullptr, LEFT, 293 | grid_size, block_size, join_detail)); 294 | // v2 295 | analysis_scc.add_ra( 296 | RelationalJoin(value_flow_2__1_2, FULL, value_flow_2__1_2, DELTA, 297 | value_alias_2__1_2, join_10_11_host, nullptr, LEFT, 298 | grid_size, block_size, join_detail)); 299 | 300 | // join_vf_am: ValueFlow(x, y) :- Assign(x, z), MemoryAlias(z, y). 301 | analysis_scc.add_ra( 302 | RelationalJoin(assign_2__2_1, FULL, memory_alias_2__1_2, DELTA, 303 | value_flow_2__1_2, join_01_11_host, nullptr, LEFT, 304 | grid_size, block_size, join_detail)); 305 | 306 | // tmp_rel_def(z, x) :- Dereference(y, x), ValueAlias(y, z) 307 | analysis_scc.add_ra( 308 | RelationalJoin(dereference_2__1_2, FULL, value_alias_2__1_2, DELTA, 309 | tmp_rel_def, join_10_11_host, nullptr, LEFT, grid_size, 310 | block_size, join_detail)); 311 | 312 | // WARNING: tmp relation can only in outer because it doesn't include 313 | // index! 314 | // join_ma_d_tmp: MemoryAlias(x, w) :- Dereference(z, w) , tmp_rel_def(z,x) 315 | analysis_scc.add_ra( 316 | RelationalJoin(dereference_2__1_2, FULL, tmp_rel_def, NEWT, 317 | memory_alias_2__1_2, join_10_11_host, nullptr, LEFT, 318 | grid_size, block_size, join_detail)); 319 | 320 | // ValueAlias(x,y) :- 321 | // ValueFlow(z,x), 322 | // MemoryAlias(z,w), 323 | // ValueFlow(w,y). 324 | // ValueFlow DELTA 1, 2 <> MemoryAlias FULL 1, 2 <> ValueFlow FULL 2, 1 325 | // ValueFlow FULL 1, 2 <> MemoryAlias DELTA 1, 2 <> ValueFlow FULL 2, 1 326 | // ValueFlow FULL 1, 2 <> MemoryAlias FULL 1, 2 <> ValueFlow DELTA 2, 1 327 | // join_tmp_vf_ma : tmp_rel_ma(w, x) :- ValueFlow(z, x), MemoryAlias(z, w). 328 | // join_va_tmp_vf : ValueAlias(x, y) :- tmp_rel_ma(w, x), ValueFlow(w,y). 329 | // v1 330 | analysis_scc.add_ra( 331 | RelationalJoin(memory_alias_2__1_2, FULL , value_flow_2__1_2, DELTA, 332 | tmp_rel_ma1, join_01_11_host, nullptr, LEFT, grid_size, 333 | block_size, join_detail)); 334 | analysis_scc.add_ra( 335 | RelationalJoin(value_flow_2__1_2, FULL, memory_alias_2__1_2, DELTA, 336 | tmp_rel_ma1, join_10_11_host, nullptr, LEFT, grid_size, 337 | block_size, join_detail)); 338 | 339 | analysis_scc.add_ra( 340 | RelationalJoin(value_flow_2__1_2, FULL, tmp_rel_ma1, NEWT, 341 | value_alias_2__1_2, join_10_11_host, nullptr, LEFT, 342 | grid_size, block_size, join_detail)); 343 | 344 | analysis_scc.add_ra( 345 | RelationalJoin(memory_alias_2__2_1, FULL , value_flow_2__1_2, DELTA, 346 | tmp_rel_ma2, join_01_11_host, nullptr, LEFT, grid_size, 347 | block_size, join_detail)); 348 | analysis_scc.add_ra( 349 | RelationalJoin(value_flow_2__1_2, FULL, tmp_rel_ma2, NEWT, 350 | value_alias_2__1_2, join_01_11_host, nullptr, LEFT, 351 | grid_size, block_size, join_detail)); 352 | 353 | analysis_scc.add_ra(RelationalACopy(value_flow_2__1_2, value_flow_2__2_1, 354 | cp_2_1__1_2_host, nullptr, grid_size, 355 | block_size)); 356 | analysis_scc.add_ra(RelationalACopy(memory_alias_2__1_2, memory_alias_2__2_1, 357 | cp_2_1__1_2_host, nullptr, grid_size, 358 | block_size)); 359 | time_point_begin = std::chrono::high_resolution_clock::now(); 360 | timer.start_timer(); 361 | analysis_scc.fixpoint_loop(); 362 | // print_tuple_rows(value_flow_2__1_2->full, "value_flow_2__1_2"); 363 | timer.stop_timer(); 364 | time_point_end = std::chrono::high_resolution_clock::now(); 365 | std::cout << "analysis scc time: " << timer.get_spent_time() << std::endl; 366 | std::cout << "analysis scc time (chono): " 367 | << std::chrono::duration_cast( 368 | time_point_end - time_point_begin) 369 | .count() 370 | << std::endl; 371 | std::cout << "join detail: " << std::endl; 372 | std::cout << "compute size time: " << join_detail[0] << std::endl; 373 | std::cout << "reduce + scan time: " << join_detail[1] << std::endl; 374 | std::cout << "fetch result time: " << join_detail[2] << std::endl; 375 | std::cout << "sort time: " << join_detail[3] << std::endl; 376 | std::cout << "build index time: " << join_detail[5] << std::endl; 377 | std::cout << "merge time: " << join_detail[6] << std::endl; 378 | std::cout << "unique time: " << join_detail[4] + join_detail[7] << std::endl; 379 | } 380 | 381 | int main(int argc, char *argv[]) { 382 | int device_id; 383 | int number_of_sm; 384 | cudaGetDevice(&device_id); 385 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 386 | device_id); 387 | int max_threads_per_block; 388 | cudaDeviceGetAttribute(&max_threads_per_block, cudaDevAttrMaxThreadsPerBlock, 0); 389 | std::cout << "num of sm " << number_of_sm << " num of thread per block " << max_threads_per_block << std::endl; 390 | std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 391 | << std::endl; 392 | int block_size, grid_size; 393 | block_size = 512; 394 | grid_size = 32 * number_of_sm; 395 | std::locale loc(""); 396 | analysis_bench(argv[1], block_size, grid_size); 397 | return 0; 398 | } 399 | -------------------------------------------------------------------------------- /test/cuDF/load_test.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | import cudf 4 | import time 5 | import json 6 | import cupy 7 | 8 | REPEAT = 1000 9 | 10 | def get_dataset(filename, column_names=['column 1', 'column 2'], 11 | rows=None): 12 | if rows != None: 13 | nrows = rows 14 | else: 15 | nrows = int(re.search('\d+|$', filename).group()) 16 | return cudf.read_csv(filename, sep='\t', header=None, 17 | names=column_names, nrows=nrows) 18 | 19 | 20 | def loading_time_benchmark(dataset, rows=None): 21 | graph = get_dataset(dataset, rows=rows) 22 | arr_cupy = cudf.to_cupy() 23 | size = len(arr_cupy) 24 | time_start = time.time() 25 | for _ in range(REPEAT): 26 | re_constructed = cudf.from_cupy(arr_cupy) 27 | time_end = time.time() 28 | return time_end - time_start, size 29 | 30 | def main(): 31 | datasets = { 32 | "ego-Facebook": "../data/data_88234.txt", 33 | "wiki-Vote": "../data/data_103689.txt", 34 | "luxembourg_osm": "../data/data_119666.txt", 35 | "fe_sphere": "../data/data_49152.txt", 36 | "fe_body": "../data/data_163734.txt", 37 | "cti": "../data/data_48232.txt", 38 | "fe_ocean": "../data/data_409593.txt", 39 | "wing": "../data/data_121544.txt", 40 | "loc-Brightkite": "../data/data_214078.txt", 41 | "delaunay_n16": "../data/data_196575.txt", 42 | "usroads": "../data/data_165435.txt", 43 | "CA-HepTh": "../data/data_51971.txt", 44 | "SF.cedge": "../data/data_223001.txt", 45 | "p2p-Gnutella31": "../data/data_147892.txt", 46 | "p2p-Gnutella09": "../data/data_26013.txt", 47 | "p2p-Gnutella04": "../data/data_39994.txt", 48 | "cal.cedge": "../data/data_21693.txt", 49 | "TG.cedge": "../data/data_23874.txt", 50 | "OL.cedge": "../data/data_7035.txt", 51 | } 52 | results = {} 53 | for dataset_name, dataset_path in datasets.items(): 54 | loading_time, size = loading_time_benchmark(dataset_path) 55 | results[dataset_name] = { 56 | "loading_time": loading_time, 57 | "tuple/s: ": size * REPEAT / loading_time 58 | } 59 | print(json.dumps(results, indent=4)) 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /test/cuDF/reachability.py: -------------------------------------------------------------------------------- 1 | import re 2 | import cudf 3 | import time 4 | import json 5 | 6 | 7 | def display_time(time_start, time_end, message): 8 | time_took = time_end - time_start 9 | print(f"Debug: {message}: {time_took:.6f}s") 10 | 11 | 12 | def get_join(relation_1, relation_2, column_names=['column 1', 'column 2']): 13 | return relation_1.merge(relation_2, on=column_names[0], 14 | how="inner", 15 | suffixes=('_relation_1', '_relation_2')) 16 | 17 | 18 | def get_projection(result, column_names=['column 1', 'column 2']): 19 | temp = result.drop([column_names[0]], axis=1).drop_duplicates() 20 | temp.columns = column_names 21 | return temp 22 | 23 | 24 | def get_union(relation_1, relation_2): 25 | return cudf.concat([relation_1, relation_2], 26 | ignore_index=True).drop_duplicates() 27 | 28 | 29 | def get_dataset(filename, column_names=['column 1', 'column 2'], 30 | rows=None): 31 | if rows != None: 32 | nrows = rows 33 | else: 34 | nrows = int(re.search('\d+|$', filename).group()) 35 | return cudf.read_csv(filename, sep='\t', header=None, 36 | names=column_names, nrows=nrows) 37 | 38 | 39 | def get_transitive_closure(dataset): 40 | COLUMN_NAMES = ['column 1', 'column 2'] 41 | rows = int(re.search('\d+|$', dataset).group()) 42 | start_time_outer = time.perf_counter() 43 | relation_1 = get_dataset(dataset, COLUMN_NAMES, rows) 44 | relation_2 = relation_1.copy() 45 | relation_2.columns = COLUMN_NAMES[::-1] 46 | temp_result = relation_1 47 | i = 0 48 | while True: 49 | temp_projection = get_projection(get_join(relation_2, relation_1, 50 | COLUMN_NAMES), COLUMN_NAMES) 51 | x = len(temp_projection) 52 | previous_result_size = len(temp_result) 53 | temp_result = get_union(temp_result, temp_projection) 54 | current_result_size = len(temp_result) 55 | if previous_result_size == current_result_size: 56 | i += 1 57 | break 58 | del relation_2 59 | relation_2 = temp_projection 60 | relation_2.columns = COLUMN_NAMES[::-1] 61 | i += 1 62 | del temp_projection 63 | # print(f"i: {i}, projection size: {x}, rows: {current_result_size}") 64 | end_time_outer = time.perf_counter() 65 | time_took = end_time_outer - start_time_outer 66 | time_took = f"{time_took:.6f}" 67 | # print(temp_result) 68 | return rows, len(temp_result), i, time_took 69 | 70 | 71 | def generate_benchmark(iterative=True, datasets=None): 72 | result = [] 73 | if iterative: 74 | print("| Number of rows | TC size | Iterations | Time (s) |") 75 | print("| --- | --- | --- | --- |") 76 | increment = 1000 77 | n = 990 78 | count = 0 79 | while n < 11000: 80 | try: 81 | dataset = f"../data/data_{n}.txt" 82 | n = int(re.search('\d+|$', dataset).group()) 83 | record = get_transitive_closure(dataset) 84 | result.append(record) 85 | print( 86 | f"| {record[0]} | {record[1]} | {record[2]} | {record[3]:.6f} |") 87 | n += increment 88 | except Exception as ex: 89 | print(str(ex)) 90 | break 91 | count += 1 92 | if datasets: 93 | print("| Dataset | Number of rows | TC size | Iterations | Time (s) |") 94 | print("| --- | --- | --- | --- | --- |") 95 | for key, dataset in datasets.items(): 96 | try: 97 | record = get_transitive_closure(dataset) 98 | record = list(record) 99 | record.insert(0, key) 100 | result.append(record) 101 | message = " | ".join([str(s) for s in record]) 102 | message = "| " + message + " |" 103 | print(message) 104 | except Exception as ex: 105 | print(str(ex)) 106 | break 107 | print("\n") 108 | with open('transitive_closure.json', 'w') as f: 109 | json.dump(result, f) 110 | 111 | 112 | if __name__ == "__main__": 113 | generate_benchmark(iterative=False, datasets={ 114 | "ego-Facebook": "../data/data_88234.txt", 115 | "wiki-Vote": "../data/data_103689.txt", 116 | "luxembourg_osm": "../data/data_119666.txt", 117 | "fe_sphere": "../data/data_49152.txt", 118 | # "fe_body": "../data/data_163734.txt", 119 | "cti": "../data/data_48232.txt", 120 | # "fe_ocean": "../data/data_409593.txt", 121 | "wing": "../data/data_121544.txt", 122 | # "loc-Brightkite": "../data/data_214078.txt", 123 | "delaunay_n16": "../data/data_196575.txt", 124 | # "usroads": "../data/data_165435.txt", 125 | "CA-HepTh": "../data/data_51971.txt", 126 | "SF.cedge": "../data/data_223001.txt", 127 | # "p2p-Gnutella31": "../data/data_147892.txt", 128 | "p2p-Gnutella09": "../data/data_26013.txt", 129 | "p2p-Gnutella04": "../data/data_39994.txt", 130 | "cal.cedge": "../data/data_21693.txt", 131 | "TG.cedge": "../data/data_23874.txt", 132 | "OL.cedge": "../data/data_7035.txt", 133 | }) 134 | 135 | 136 | -------------------------------------------------------------------------------- /test/cuDF/sg.json: -------------------------------------------------------------------------------- 1 | [ 2 | [ 3 | "CA-HepTh", 4 | 51971, 5 | 74618689, 6 | 9, 7 | "21.241212" 8 | ], 9 | [ 10 | "ego-Facebook", 11 | 88234, 12 | 15018986, 13 | 13, 14 | "19.074940" 15 | ], 16 | [ 17 | "wiki-Vote", 18 | 103689, 19 | 5376338, 20 | 4, 21 | "2.603751" 22 | ], 23 | [ 24 | "luxembourg_osm", 25 | 119666, 26 | 245221, 27 | 326, 28 | "2.215113" 29 | ], 30 | [ 31 | "cti", 32 | 48232, 33 | 14503742, 34 | 44, 35 | "3.857438" 36 | ], 37 | [ 38 | "fe_ocean", 39 | 409593, 40 | 65941441, 41 | 77, 42 | "45.979235" 43 | ], 44 | [ 45 | "wing", 46 | 121544, 47 | 647999, 48 | 8, 49 | "0.204277" 50 | ], 51 | [ 52 | "delaunay_n16", 53 | 196575, 54 | 25994011, 55 | 85, 56 | "14.832548" 57 | ], 58 | [ 59 | "p2p-Gnutella09", 60 | 26013, 61 | 62056583, 62 | 14, 63 | "13.705286" 64 | ], 65 | [ 66 | "p2p-Gnutella04", 67 | 39994, 68 | 116931333, 69 | 18, 70 | "48.947088" 71 | ], 72 | [ 73 | "cal.cedge", 74 | 21693, 75 | 23519, 76 | 58, 77 | "0.259069" 78 | ], 79 | [ 80 | "TG.cedge", 81 | 23874, 82 | 608090, 83 | 54, 84 | "0.719743" 85 | ], 86 | [ 87 | "OL.cedge", 88 | 7035, 89 | 285431, 90 | 56, 91 | "0.385674" 92 | ] 93 | ] -------------------------------------------------------------------------------- /test/cuDF/sg.py: -------------------------------------------------------------------------------- 1 | import re 2 | import cudf 3 | import time 4 | import json 5 | 6 | REPEAT = 3 7 | 8 | def display_time(time_start, time_end, message): 9 | time_took = time_end - time_start 10 | print(f"Debug: {message}: {time_took:.6f}s") 11 | 12 | 13 | def get_join(relation_1, relation_2, column_names=['column 1', 'column 2']): 14 | return relation_1.merge(relation_2, on=column_names[0], 15 | how="inner", 16 | suffixes=('_relation_1', '_relation_2')) 17 | 18 | 19 | def get_projection(result, column_names=['column 1', 'column 2'], remove_same_val=False): 20 | temp = result.drop([column_names[0]], axis=1).drop_duplicates() 21 | temp.columns = column_names 22 | if remove_same_val: 23 | temp = temp.loc[(temp[column_names[0]] != temp[column_names[1]])] 24 | return temp 25 | 26 | 27 | def get_union(relation_1, relation_2): 28 | return cudf.concat([relation_1, relation_2], 29 | ignore_index=True).drop_duplicates() 30 | 31 | 32 | def get_dataset(filename, column_names=['column 1', 'column 2'], 33 | rows=None): 34 | if rows != None: 35 | nrows = rows 36 | else: 37 | nrows = int(re.search('\d+|$', filename).group()) 38 | return cudf.read_csv(filename, sep='\t', header=None, 39 | names=column_names, nrows=nrows) 40 | 41 | 42 | def get_sg(dataset): 43 | COLUMN_NAMES = ['column 1', 'column 2'] 44 | rows = int(re.search('\d+|$', dataset).group()) 45 | start_time_outer = time.perf_counter() 46 | relation_1 = get_dataset(dataset, COLUMN_NAMES, rows) 47 | relation_2 = relation_1.copy() 48 | # sg(x, y): - edge(p, x), edge(p, y), x != y. 49 | temp_result = get_projection(get_join(relation_1, relation_2, 50 | COLUMN_NAMES), COLUMN_NAMES, remove_same_val=True) 51 | i = 0 52 | relation_2 = temp_result 53 | while True: 54 | # tmp(b, x): - edge(a, x), sg(a, b). 55 | temp_projection = get_projection(get_join(relation_1, relation_2, 56 | COLUMN_NAMES), COLUMN_NAMES) 57 | temp_projection.columns = COLUMN_NAMES[::-1] 58 | # sg(x, y): - tmp(b, x), edge(b, y). 59 | temp_projection_2 = get_projection(get_join(temp_projection, relation_1, 60 | COLUMN_NAMES), COLUMN_NAMES) 61 | relation_2 = temp_projection_2 62 | previous_result_size = len(temp_result) 63 | temp_result = get_union(temp_result, relation_2) 64 | current_result_size = len(temp_result) 65 | if previous_result_size == current_result_size: 66 | i += 1 67 | break 68 | i += 1 69 | del temp_projection 70 | del temp_projection_2 71 | end_time_outer = time.perf_counter() 72 | time_took = end_time_outer - start_time_outer 73 | time_took = f"{time_took:.6f}" 74 | return rows, len(temp_result), int(i), time_took 75 | 76 | 77 | def generate_benchmark(datasets=None): 78 | result = [] 79 | print("| Dataset | Number of rows | SG size | Iterations | Time (s) |") 80 | print("| --- | --- | --- | --- | --- |") 81 | for key, dataset in datasets.items(): 82 | time_took = [] 83 | record = None 84 | try: 85 | # Omit the warm up round timing 86 | warm_up = get_sg(dataset) 87 | for i in range(REPEAT): 88 | try: 89 | record = get_sg(dataset) 90 | time_took.append(float(record[3])) 91 | except Exception as ex: 92 | print(str(ex)) 93 | record = list(record) 94 | record[3] = f"{(sum(time_took) / REPEAT):.6f}" 95 | record.insert(0, key) 96 | result.append(record) 97 | message = " | ".join([str(s) for s in record]) 98 | message = "| " + message + " |" 99 | print(message) 100 | except Exception as ex: 101 | print(f"Error in {key}. Message: {str(ex)}") 102 | print("\n") 103 | with open('sg.json', 'w') as f: 104 | json.dump(result, f) 105 | 106 | 107 | if __name__ == "__main__": 108 | generate_benchmark(datasets={ 109 | "hipc": "../../data/data_5.txt", 110 | "fe_body": "../../data/data_163734.txt", 111 | "loc-Brightkite": "../../data/data_214078.txt", 112 | "fe_sphere": "../../data/data_49152.txt", 113 | "CA-HepTh": "../../data/data_51971.txt", 114 | # "SF.cedge": "../../data/data_223001.txt", 115 | "ego-Facebook": "../../data/data_88234.txt", 116 | "wiki-Vote": "../../data/data_103689.txt", 117 | "luxembourg_osm": "../../data/data_119666.txt", 118 | "cti": "../../data/data_48232.txt", 119 | "fe_ocean": "../../data/data_409593.txt", 120 | "wing": "../../data/data_121544.txt", 121 | "delaunay_n16": "../../data/data_196575.txt", 122 | "usroads": "../../data/data_165435.txt", 123 | "p2p-Gnutella31": "../../data/data_147892.txt", 124 | "p2p-Gnutella09": "../../data/data_26013.txt", 125 | "p2p-Gnutella04": "../../data/data_39994.txt", 126 | "cal.cedge": "../../data/data_21693.txt", 127 | "TG.cedge": "../../data/data_23874.txt", 128 | "OL.cedge": "../../data/data_7035.txt", 129 | }) 130 | -------------------------------------------------------------------------------- /test/datastructure.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "../include/exception.cuh" 12 | #include "../include/lie.cuh" 13 | #include "../include/print.cuh" 14 | #include "../include/timer.cuh" 15 | 16 | long int get_row_size(const char *data_path) { 17 | std::ifstream f; 18 | f.open(data_path); 19 | char c; 20 | long i = 0; 21 | while (f.get(c)) 22 | if (c == '\n') 23 | ++i; 24 | f.close(); 25 | return i; 26 | } 27 | 28 | enum ColumnT { U64, U32 }; 29 | 30 | column_type *get_relation_from_file(const char *file_path, int total_rows, 31 | int total_columns, char separator, 32 | ColumnT ct) { 33 | column_type *data = 34 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 35 | FILE *data_file = fopen(file_path, "r"); 36 | for (int i = 0; i < total_rows; i++) { 37 | for (int j = 0; j < total_columns; j++) { 38 | if (j != (total_columns - 1)) { 39 | if (ct == U64) { 40 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 41 | &separator); 42 | } else { 43 | fscanf(data_file, "%ld%c", &data[(i * total_columns) + j], 44 | &separator); 45 | } 46 | } else { 47 | if (ct == U64) { 48 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 49 | } else { 50 | fscanf(data_file, "%ld", &data[(i * total_columns) + j]); 51 | } 52 | } 53 | } 54 | } 55 | return data; 56 | } 57 | 58 | __device__ void reorder_path(tuple_type inner, tuple_type outer, 59 | tuple_type newt) { 60 | newt[0] = inner[1]; 61 | newt[1] = outer[1]; 62 | }; 63 | __device__ tuple_generator_hook reorder_path_device = reorder_path; 64 | 65 | void datastructure_bench(const char *dataset_path, int block_size, 66 | int grid_size) { 67 | KernelTimer timer; 68 | int relation_columns = 2; 69 | std::chrono::high_resolution_clock::time_point time_point_begin; 70 | std::chrono::high_resolution_clock::time_point time_point_end; 71 | time_point_begin = std::chrono::high_resolution_clock::now(); 72 | double spent_time; 73 | 74 | // load the raw graph 75 | tuple_size_t graph_edge_counts = get_row_size(dataset_path); 76 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 77 | // u64 graph_edge_counts = 2100; 78 | column_type *raw_graph_data = 79 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t', U32); 80 | column_type *raw_reverse_graph_data = 81 | (column_type *)malloc(graph_edge_counts * 2 * sizeof(column_type)); 82 | std::cout << "reversing graph ... " << std::endl; 83 | for (tuple_size_t i = 0; i < graph_edge_counts; i++) { 84 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 85 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 86 | } 87 | column_type *raw_graph_data_gpu; 88 | cudaMalloc((void **)&raw_graph_data_gpu, 89 | graph_edge_counts * 2 * sizeof(column_type)); 90 | cudaMemcpy(raw_graph_data_gpu, raw_graph_data, 91 | graph_edge_counts * 2 * sizeof(column_type), 92 | cudaMemcpyHostToDevice); 93 | tuple_type *raw_graph_data_gpu_tuple; 94 | cudaMalloc((void **)&raw_graph_data_gpu_tuple, 95 | graph_edge_counts * sizeof(tuple_type)); 96 | init_tuples_unsorted<<>>( 97 | raw_graph_data_gpu_tuple, raw_graph_data_gpu, 2, graph_edge_counts); 98 | checkCuda(cudaDeviceSynchronize()); 99 | thrust::sort(thrust::device, raw_graph_data_gpu_tuple, 100 | raw_graph_data_gpu_tuple + graph_edge_counts, 101 | tuple_indexed_less(1, 2)); 102 | checkCuda(cudaDeviceSynchronize()); 103 | column_type *raw_graph_data_gpu_sorted; 104 | cudaMalloc((void **)&raw_graph_data_gpu_sorted, 105 | graph_edge_counts * 2 * sizeof(column_type)); 106 | flatten_tuples_raw_data<<>>( 107 | raw_graph_data_gpu_tuple, raw_graph_data_gpu_sorted, graph_edge_counts, 108 | 2); 109 | std::cout << "finish reverse graph." << std::endl; 110 | 111 | std::cout << "Testing datastructure build <<<<<<<<<<<<<<< " << std::endl; 112 | int REPEAT = 100; 113 | float build_table_time = 0; 114 | 115 | for (int i = 0; i < REPEAT; i++) { 116 | Relation *path_2__1_2 = new Relation(); 117 | path_2__1_2->index_flag = false; 118 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 119 | // std::cout << "edge size " << graph_edge_counts << std::endl; 120 | // load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 121 | // graph_edge_counts, 1, 0, grid_size, block_size); 122 | path_2__1_2->full = new GHashRelContainer(2, 1, 0); 123 | timer.start_timer(); 124 | float load_detail_time[5] = {0, 0, 0, 0, 0}; 125 | load_relation_container(path_2__1_2->full, 2, raw_graph_data_gpu_sorted, 126 | graph_edge_counts, 1, 0, 0.8, grid_size, 127 | block_size, load_detail_time, true, true); 128 | timer.stop_timer(); 129 | build_table_time += timer.get_spent_time(); 130 | // load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 131 | // graph_edge_counts, 1, 0, grid_size, block_size); 132 | path_2__1_2->full->tuple_counts = 0; 133 | path_2__1_2->full->index_map_size = 0; 134 | path_2__1_2->full->data_raw_row_size = 0; 135 | if (path_2__1_2->full->index_map != nullptr) { 136 | checkCuda(cudaFree(path_2__1_2->full->index_map)); 137 | path_2__1_2->full->index_map = nullptr; 138 | } 139 | if (path_2__1_2->full->tuples != nullptr) { 140 | checkCuda(cudaFree(path_2__1_2->full->tuples)); 141 | path_2__1_2->full->tuples = nullptr; 142 | } 143 | } 144 | 145 | std::cout << "Graph size: " << graph_edge_counts << std::endl; 146 | std::cout << "Build hash table time: " << build_table_time << std::endl; 147 | std::cout << "HashTable build ratio : " 148 | << graph_edge_counts * REPEAT / build_table_time << std::endl; 149 | 150 | Relation *edge_2__2_1 = new Relation(); 151 | // cudaMallocHost((void **)&edge_2__2_1, sizeof(Relation)); 152 | Relation *path_2__1_2 = new Relation(); 153 | path_2__1_2->index_flag = false; 154 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 155 | std::cout << "edge size " << graph_edge_counts << std::endl; 156 | load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 157 | graph_edge_counts, 1, 0, grid_size, block_size); 158 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 159 | graph_edge_counts, 1, 0, grid_size, block_size); 160 | tuple_generator_hook reorder_path_host; 161 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 162 | sizeof(tuple_generator_hook)); 163 | std::cout << "Testing datastructure query <<<<<<<<<<<<<<< " << std::endl; 164 | 165 | float join_time[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 166 | RelationalJoin join_test(edge_2__2_1, FULL, path_2__1_2, FULL, path_2__1_2, 167 | reorder_path_host, nullptr, LEFT, grid_size, 168 | block_size, join_time); 169 | float query_time = 0; 170 | for (int i = 0; i < REPEAT; i++) { 171 | join_test.disable_load = true; 172 | timer.start_timer(); 173 | join_test(); 174 | timer.stop_timer(); 175 | query_time += timer.get_spent_time(); 176 | } 177 | 178 | std::cout << "Query time: " << query_time << std::endl; 179 | std::cout << "HashTable query ratio : " 180 | << graph_edge_counts * REPEAT / query_time << std::endl; 181 | } 182 | 183 | int main(int argc, char *argv[]) { 184 | int device_id; 185 | int number_of_sm; 186 | cudaGetDevice(&device_id); 187 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 188 | device_id); 189 | std::cout << "num of sm " << number_of_sm << std::endl; 190 | std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 191 | << std::endl; 192 | int block_size, grid_size; 193 | block_size = 512; 194 | grid_size = 32 * number_of_sm; 195 | std::locale loc(""); 196 | 197 | datastructure_bench(argv[1], block_size, grid_size); 198 | return 0; 199 | } 200 | -------------------------------------------------------------------------------- /test/merge.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "../include/lie.cuh" 16 | #include "../include/print.cuh" 17 | #include "../include/timer.cuh" 18 | 19 | long int get_row_size(const char *data_path) { 20 | std::ifstream f; 21 | f.open(data_path); 22 | char c; 23 | long i = 0; 24 | while (f.get(c)) 25 | if (c == '\n') 26 | ++i; 27 | f.close(); 28 | return i; 29 | } 30 | 31 | enum ColumnT { U64, U32 }; 32 | 33 | column_type *get_relation_from_file(const char *file_path, int total_rows, 34 | int total_columns, char separator, 35 | ColumnT ct) { 36 | column_type *data = 37 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 38 | FILE *data_file = fopen(file_path, "r"); 39 | for (int i = 0; i < total_rows; i++) { 40 | for (int j = 0; j < total_columns; j++) { 41 | if (j != (total_columns - 1)) { 42 | if (ct == U64) { 43 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 44 | &separator); 45 | } else { 46 | fscanf(data_file, "%ld%c", &data[(i * total_columns) + j], 47 | &separator); 48 | } 49 | } else { 50 | if (ct == U64) { 51 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 52 | } else { 53 | fscanf(data_file, "%ld", &data[(i * total_columns) + j]); 54 | } 55 | } 56 | } 57 | } 58 | return data; 59 | } 60 | 61 | __device__ void reorder_path(tuple_type inner, tuple_type outer, 62 | tuple_type newt) { 63 | newt[0] = inner[1]; 64 | newt[1] = outer[1]; 65 | }; 66 | __device__ tuple_generator_hook reorder_path_device = reorder_path; 67 | 68 | int main(int argc, char *argv[]) { 69 | auto dataset_path = argv[1]; 70 | int device_id; 71 | int number_of_sm; 72 | cudaGetDevice(&device_id); 73 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 74 | device_id); 75 | int max_threads_per_block; 76 | cudaDeviceGetAttribute(&max_threads_per_block, 77 | cudaDevAttrMaxThreadsPerBlock, 0); 78 | std::cout << "num of sm " << number_of_sm << " num of thread per block " 79 | << max_threads_per_block << std::endl; 80 | std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 81 | << std::endl; 82 | ; 83 | int block_size, grid_size; 84 | block_size = 512; 85 | grid_size = 32 * number_of_sm; 86 | std::locale loc(""); 87 | 88 | int relation_columns = 2; 89 | std::chrono::high_resolution_clock::time_point time_point_begin; 90 | std::chrono::high_resolution_clock::time_point time_point_end; 91 | time_point_begin = std::chrono::high_resolution_clock::now(); 92 | double spent_time; 93 | KernelTimer timer; 94 | 95 | // load the raw graph 96 | tuple_size_t graph_edge_counts = get_row_size(dataset_path); 97 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 98 | // u64 graph_edge_counts = 2100; 99 | column_type *raw_graph_data = 100 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t', U32); 101 | column_type *raw_reverse_graph_data = 102 | (column_type *)malloc(graph_edge_counts * 2 * sizeof(column_type)); 103 | std::cout << "reversing graph ... " << std::endl; 104 | for (tuple_size_t i = 0; i < graph_edge_counts; i++) { 105 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 106 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 107 | } 108 | std::cout << "finish reverse graph." << std::endl; 109 | 110 | int REPEAT = 10; 111 | // init the tuples 112 | time_point_end = std::chrono::high_resolution_clock::now(); 113 | spent_time = std::chrono::duration_cast>( 114 | time_point_end - time_point_begin) 115 | .count(); 116 | std::cout << "init tuples time: " << spent_time << std::endl; 117 | column_type *tuple_hashvs; 118 | cudaMalloc((void **)&tuple_hashvs, graph_edge_counts * sizeof(column_type)); 119 | column_type *col_tmp; 120 | cudaMalloc((void **)&col_tmp, graph_edge_counts * sizeof(column_type)); 121 | 122 | // load raw data into edge relation 123 | time_point_begin = std::chrono::high_resolution_clock::now(); 124 | Relation *edge_2__2_1 = new Relation(); 125 | // cudaMallocHost((void **)&edge_2__2_1, sizeof(Relation)); 126 | Relation *path_2__1_2 = new Relation(); 127 | path_2__1_2->index_flag = false; 128 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 129 | std::cout << "edge size " << graph_edge_counts << std::endl; 130 | load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 131 | graph_edge_counts, 1, 0, grid_size, block_size); 132 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 133 | graph_edge_counts, 1, 0, grid_size, block_size); 134 | LIE tc_scc(grid_size, block_size); 135 | tc_scc.max_iteration = 277; 136 | tc_scc.reload_full_flag = false; 137 | tc_scc.add_relations(edge_2__2_1, true); 138 | tc_scc.add_relations(path_2__1_2, false); 139 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 140 | tuple_generator_hook reorder_path_host; 141 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 142 | sizeof(tuple_generator_hook)); 143 | tuple_copy_hook cp_1_host; 144 | RelationalJoin join_op(edge_2__2_1, FULL, path_2__1_2, DELTA, path_2__1_2, 145 | reorder_path_host, nullptr, LEFT, grid_size, 146 | block_size, join_detail); 147 | tc_scc.add_ra(join_op); 148 | timer.start_timer(); 149 | tc_scc.fixpoint_loop(); 150 | timer.stop_timer(); 151 | 152 | std::cout << "Path counts " << path_2__1_2->full->tuple_counts << std::endl; 153 | // print_tuple_rows(path_2__2_1->full, "full"); 154 | std::cout << "TC time: " << timer.get_spent_time() << std::endl; 155 | std::cout << "join detail: " << std::endl; 156 | std::cout << "compute size time: " << join_detail[0] << std::endl; 157 | std::cout << "reduce + scan time: " << join_detail[1] << std::endl; 158 | std::cout << "fetch result time: " << join_detail[2] << std::endl; 159 | std::cout << "sort time: " << join_detail[3] << std::endl; 160 | std::cout << "build index time: " << join_detail[5] << std::endl; 161 | std::cout << "merge time: " << join_detail[6] << std::endl; 162 | std::cout << "unique time: " << join_detail[4] + join_detail[7] 163 | << std::endl; 164 | 165 | join_op(); 166 | print_memory_usage(); 167 | // deduplicate with full 168 | time_point_begin = std::chrono::high_resolution_clock::now(); 169 | std::cout << "start deduplicate with full ..." << std::endl; 170 | tuple_type *dedup_buf; 171 | cudaMalloc((void **)&dedup_buf, 172 | path_2__1_2->current_full_size * sizeof(tuple_type)); 173 | cudaDeviceSynchronize(); 174 | tuple_type *dedup_buf_end = thrust::set_difference( 175 | thrust::device, path_2__1_2->newt->tuples, 176 | path_2__1_2->newt->tuples + path_2__1_2->newt->tuple_counts, 177 | path_2__1_2->tuple_full, 178 | path_2__1_2->tuple_full + path_2__1_2->current_full_size, dedup_buf, 179 | tuple_indexed_less(path_2__1_2->full->index_column_size, 180 | path_2__1_2->full->arity - 181 | path_2__1_2->dependent_column_size)); 182 | tuple_size_t tp_counts = dedup_buf_end - dedup_buf; 183 | time_point_end = std::chrono::high_resolution_clock::now(); 184 | spent_time = std::chrono::duration_cast>( 185 | time_point_end - time_point_begin) 186 | .count(); 187 | std::cout << "deduplicate with full time: " << spent_time << std::endl; 188 | 189 | // test merge speed 190 | 191 | tuple_type *merge_buf; 192 | std::cout << "start merge test ..." << std::endl; 193 | std::cout << "full size " << path_2__1_2->full->tuple_counts << std::endl; 194 | 195 | double alloc_time = 0; 196 | for (int i = 0; i < REPEAT; i++) { 197 | timer.start_timer(); 198 | cudaMalloc((void **)&merge_buf, (path_2__1_2->full->tuple_counts + 199 | tp_counts) * sizeof(tuple_type)); 200 | timer.stop_timer(); 201 | alloc_time += timer.get_spent_time(); 202 | cudaFree(merge_buf); 203 | merge_buf = nullptr; 204 | } 205 | cudaMalloc((void **)&merge_buf, (path_2__1_2->full->tuple_counts + 206 | tp_counts) * sizeof(tuple_type)); 207 | std::cout << "alloc merge buf time: " << alloc_time << std::endl; 208 | 209 | std::cout << "start merge test 2 ..." << std::endl; 210 | 211 | double resize_time = 0; 212 | for (int i = 0; i < REPEAT; i++) { 213 | thrust::device_vector full_buf_vec(path_2__1_2->full->tuples, path_2__1_2->full->tuples + path_2__1_2->full->tuple_counts); 214 | timer.start_timer(); 215 | full_buf_vec.resize(path_2__1_2->full->tuple_counts+ tp_counts); 216 | timer.stop_timer(); 217 | resize_time += timer.get_spent_time(); 218 | } 219 | 220 | std::cout << "resize merge buf time: " << resize_time << std::endl; 221 | 222 | std::cout << "dedup size " << tp_counts << std::endl; 223 | print_memory_usage(); 224 | timer.start_timer(); 225 | for (int i = 0; i < REPEAT; i++) { 226 | thrust::merge(thrust::device, path_2__1_2->tuple_full, 227 | path_2__1_2->tuple_full + path_2__1_2->current_full_size, 228 | dedup_buf, dedup_buf_end, merge_buf, 229 | tuple_indexed_less(path_2__1_2->full->index_column_size, 230 | path_2__1_2->full->arity)); 231 | } 232 | timer.stop_timer(); 233 | std::cout << "merge int once time: " << timer.get_spent_time() << std::endl; 234 | 235 | // std::cout << "start merge test 2 ..." << std::endl; 236 | // thrust::device_vector full_buf_vec(path_2__1_2->full->tuples, path_2__1_2->full->tuples + path_2__1_2->full->tuple_counts); 237 | // thrust::device_vector dedup_buf_vec(dedup_buf, dedup_buf_end); 238 | // for (int i = 0; i < REPEAT; i++) { 239 | // timer.start_timer(); 240 | // thrust::merge(thrust::device, full_buf_vec.begin(), 241 | // full_buf_vec.end(), 242 | // dedup_buf_vec.begin(), dedup_buf_vec.end(), merge_buf, 243 | // tuple_indexed_less(path_2__1_2->full->index_column_size, 244 | // path_2__1_2->full->arity)); 245 | // timer.stop_timer(); 246 | // } 247 | 248 | // std::cout << "start multi merge test ..." << std::endl; 249 | // tuple_size_t merge_step = 5000; 250 | // time_point_begin = std::chrono::high_resolution_clock::now(); 251 | // for(tuple_size_t i = 0; i < path_2__1_2->full->tuple_counts; i += merge_step) { 252 | // tuple_size_t merge_size = merge_step; 253 | // if (i + merge_step > path_2__1_2->full->tuple_counts) { 254 | // merge_size = path_2__1_2->full->tuple_counts - i; 255 | // } 256 | // cudaDeviceSynchronize(); 257 | // thrust::merge(thrust::device, path_2__1_2->tuple_full + i, 258 | // path_2__1_2->tuple_full + i + merge_size, 259 | // dedup_buf, dedup_buf_end, merge_buf, 260 | // tuple_indexed_less(path_2__1_2->full->index_column_size, 261 | // path_2__1_2->full->arity)); 262 | // } 263 | // cudaDeviceSynchronize(); 264 | // time_point_end = std::chrono::high_resolution_clock::now(); 265 | // spent_time = std::chrono::duration_cast>( 266 | // time_point_end - time_point_begin) 267 | // .count(); 268 | // std::cout << "multi merge time 1: " << spent_time << std::endl; 269 | 270 | // std::cout << "start multi merge test 2 ..." << std::endl; 271 | // timer.start_timer(); 272 | // tuple_type *merge_buf_2; 273 | // cudaMalloc((void **)&merge_buf_2, path_2__1_2->full->tuple_counts * sizeof(tuple_type)); 274 | // tuple_type *merge_buf_3; 275 | // cudaMalloc((void **)&merge_buf_3, path_2__1_2->full->tuple_counts * sizeof(tuple_type)); 276 | // tuple_size_t cur_merged_size = 0; 277 | // print_memory_usage(); 278 | // cudaDeviceSynchronize(); 279 | // time_point_begin = std::chrono::high_resolution_clock::now(); 280 | // for(tuple_size_t i = 0; i < path_2__1_2->full->tuple_counts; i += merge_step) { 281 | // tuple_size_t merge_size = merge_step; 282 | // if (i + merge_step > path_2__1_2->full->tuple_counts) { 283 | // merge_size = path_2__1_2->full->tuple_counts - i; 284 | // } 285 | // thrust::merge(thrust::device, path_2__1_2->tuple_full + i, 286 | // path_2__1_2->tuple_full + i + merge_size, 287 | // merge_buf_2, merge_buf_2 + cur_merged_size, merge_buf_2, 288 | // tuple_indexed_less(path_2__1_2->full->index_column_size, 289 | // path_2__1_2->full->arity)); 290 | // cudaDeviceSynchronize(); 291 | // cur_merged_size += merge_size; 292 | // } 293 | // time_point_end = std::chrono::high_resolution_clock::now(); 294 | // spent_time = std::chrono::duration_cast>( 295 | // time_point_end - time_point_begin) 296 | // .count(); 297 | // std::cout << "multi merge time 2: " << spent_time << std::endl; 298 | cudaFree(merge_buf); 299 | 300 | std::cout << "stupid test .... " << std::endl; 301 | thrust::host_vector H1(4); 302 | // initialize individual elements 303 | H1[0] = 14; 304 | H1[1] = 20; 305 | H1[2] = 38; 306 | H1[3] = 46; 307 | thrust::host_vector H2(3); 308 | // initialize individual elements 309 | H2[0] = 12; 310 | H2[1] = 31; 311 | H2[2] = 53; 312 | thrust::device_vector h1_device = H1; 313 | thrust::device_vector h2_device = H2; 314 | // h1_device.resize(7); 315 | thrust::merge(thrust::device, h1_device.begin(), h1_device.begin()+4, 316 | h2_device.begin(), h2_device.end(), h1_device.begin(), thrust::less()); 317 | thrust::host_vector H3 = h1_device; 318 | for (int i = 0; i < H3.size(); i++) { 319 | std::cout << H3[i] << std::endl; 320 | } 321 | 322 | return 0; 323 | } 324 | -------------------------------------------------------------------------------- /test/path_length.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../include/exception.cuh" 11 | #include "../include/lie.cuh" 12 | #include "../include/print.cuh" 13 | #include "../include/timer.cuh" 14 | 15 | ////////////////////////////////////////////////////// 16 | 17 | long int get_row_size(const char *data_path) { 18 | std::ifstream f; 19 | f.open(data_path); 20 | char c; 21 | long i = 0; 22 | while (f.get(c)) 23 | if (c == '\n') 24 | ++i; 25 | f.close(); 26 | return i; 27 | } 28 | 29 | column_type *get_relation_from_file(const char *file_path, int total_rows, 30 | int total_columns, char separator) { 31 | column_type *data = 32 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 33 | FILE *data_file = fopen(file_path, "r"); 34 | for (int i = 0; i < total_rows; i++) { 35 | for (int j = 0; j < total_columns; j++) { 36 | if (j != (total_columns - 1)) { 37 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 38 | &separator); 39 | } else { 40 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 41 | } 42 | } 43 | } 44 | return data; 45 | } 46 | 47 | ////////////////////////////////////////////////////////////////// 48 | 49 | __device__ void reorder_path(tuple_type inner, tuple_type outer, 50 | tuple_type newt) { 51 | newt[0] = inner[1]; 52 | newt[1] = outer[1]; 53 | newt[2] = outer[2] + 1; 54 | }; 55 | __device__ tuple_generator_hook reorder_path_device = reorder_path; 56 | 57 | void analysis_bench(const char *dataset_path, int block_size, int grid_size) { 58 | KernelTimer timer; 59 | int relation_columns = 2; 60 | std::chrono::high_resolution_clock::time_point time_point_begin; 61 | std::chrono::high_resolution_clock::time_point time_point_end; 62 | time_point_begin = std::chrono::high_resolution_clock::now(); 63 | double spent_time; 64 | 65 | // load the raw graph 66 | u64 graph_edge_counts = get_row_size(dataset_path); 67 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 68 | // u64 graph_edge_counts = 2100; 69 | column_type *raw_graph_data = 70 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t'); 71 | column_type *raw_reverse_graph_data; 72 | u64 raw_reverse_graph_data_mem_size = 73 | graph_edge_counts * 2 * sizeof(column_type); 74 | cudaMallocHost((void **)&raw_reverse_graph_data, 75 | raw_reverse_graph_data_mem_size); 76 | cudaMemset(raw_reverse_graph_data, 0, raw_reverse_graph_data_mem_size); 77 | column_type *raw_path_data; 78 | u64 raw_path_data_mem_size = graph_edge_counts * 3 * sizeof(column_type); 79 | cudaMallocHost((void **)&raw_path_data, 80 | raw_path_data_mem_size); 81 | cudaMemset(raw_path_data, 0, raw_path_data_mem_size); 82 | 83 | std::cout << "init path ... " << std::endl; 84 | for (u64 i = 0; i < graph_edge_counts; i++) { 85 | raw_path_data[i * 3] = raw_graph_data[i * 2]; 86 | raw_path_data[i * 3 + 1] = raw_graph_data[i * 2 + 1]; 87 | raw_path_data[i * 3 + 2] = 1; 88 | } 89 | 90 | std::cout << "reversing graph ... " << std::endl; 91 | for (u64 i = 0; i < graph_edge_counts; i++) { 92 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 93 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 94 | } 95 | std::cout << "finish reverse graph." << std::endl; 96 | 97 | timer.start_timer(); 98 | Relation *edge_2__2_1 = new Relation(); 99 | // cudaMallocHost((void **)&edge_2__2_1, sizeof(Relation)); 100 | Relation *path_3__1_2_3 = new Relation(); 101 | path_3__1_2_3->index_flag = false; 102 | // cudaMallocHost((void **)&path_3__1_2_3, sizeof(Relation)); 103 | std::cout << "edge size " << graph_edge_counts << std::endl; 104 | load_relation(path_3__1_2_3, "path_3__1_2_3", 3, raw_path_data, 105 | graph_edge_counts, 1, 1, grid_size, block_size); 106 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 107 | graph_edge_counts, 1, 0, grid_size, block_size); 108 | timer.stop_timer(); 109 | // double kernel_spent_time = timer.get_spent_time(); 110 | std::cout << "Build hash table time: " << timer.get_spent_time() 111 | << std::endl; 112 | 113 | timer.start_timer(); 114 | LIE tc_scc(grid_size, block_size); 115 | tc_scc.add_relations(edge_2__2_1, true); 116 | tc_scc.add_relations(path_3__1_2_3, false); 117 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 118 | tuple_generator_hook reorder_path_host; 119 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 120 | sizeof(tuple_generator_hook)); 121 | tc_scc.add_ra(RelationalJoin(edge_2__2_1, FULL, path_3__1_2_3, DELTA, 122 | path_3__1_2_3, reorder_path_host, nullptr, 123 | LEFT, grid_size, block_size, join_detail)); 124 | tc_scc.fixpoint_loop(); 125 | timer.stop_timer(); 126 | // print_tuple_rows(path_3__1_2_3->full, "full path"); 127 | std::cout << "PLEN time: " << timer.get_spent_time() << std::endl; 128 | std::cout << "join detail: " << std::endl; 129 | std::cout << "compute size time: " << join_detail[0] << std::endl; 130 | std::cout << "reduce + scan time: " << join_detail[1] << std::endl; 131 | std::cout << "fetch result time: " << join_detail[2] << std::endl; 132 | std::cout << "sort time: " << join_detail[3] << std::endl; 133 | std::cout << "build index time: " << join_detail[5] << std::endl; 134 | std::cout << "merge time: " << join_detail[6] << std::endl; 135 | std::cout << "unique time: " << join_detail[4] + join_detail[7] << std::endl; 136 | } 137 | 138 | int main(int argc, char *argv[]) { 139 | int device_id; 140 | int number_of_sm; 141 | cudaGetDevice(&device_id); 142 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 143 | device_id); 144 | std::cout << "num of sm " << number_of_sm << std::endl; 145 | std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 146 | << std::endl; 147 | int block_size, grid_size; 148 | block_size = 512; 149 | grid_size = 32 * number_of_sm; 150 | std::locale loc(""); 151 | 152 | analysis_bench(argv[1], block_size, grid_size); 153 | return 0; 154 | } 155 | -------------------------------------------------------------------------------- /test/sg.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../include/exception.cuh" 11 | #include "../include/lie.cuh" 12 | #include "../include/print.cuh" 13 | #include "../include/timer.cuh" 14 | 15 | ////////////////////////////////////////////////////// 16 | 17 | long int get_row_size(const char *data_path) { 18 | std::ifstream f; 19 | f.open(data_path); 20 | char c; 21 | long i = 0; 22 | while (f.get(c)) 23 | if (c == '\n') 24 | ++i; 25 | f.close(); 26 | return i; 27 | } 28 | 29 | enum ColumnT { U64, U32 }; 30 | 31 | column_type *get_relation_from_file(const char *file_path, int total_rows, 32 | int total_columns, char separator, 33 | ColumnT ct) { 34 | column_type *data = 35 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 36 | FILE *data_file = fopen(file_path, "r"); 37 | for (int i = 0; i < total_rows; i++) { 38 | for (int j = 0; j < total_columns; j++) { 39 | if (j != (total_columns - 1)) { 40 | if (ct == U64) { 41 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 42 | &separator); 43 | } else { 44 | fscanf(data_file, "%ld%c", &data[(i * total_columns) + j], 45 | &separator); 46 | } 47 | } else { 48 | if (ct == U64) { 49 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 50 | } else { 51 | fscanf(data_file, "%ld", &data[(i * total_columns) + j]); 52 | } 53 | } 54 | } 55 | } 56 | return data; 57 | } 58 | 59 | ////////////////////////////////////////////////////////////////// 60 | 61 | 62 | __device__ void reorder_path(tuple_type inner, tuple_type outer, 63 | tuple_type newt) { 64 | newt[0] = inner[1]; 65 | newt[1] = outer[1]; 66 | }; 67 | __device__ void reorder_path1(tuple_type inner, tuple_type outer, 68 | tuple_type newt) { 69 | newt[0] = outer[1]; 70 | newt[1] = inner[1]; 71 | }; 72 | 73 | // sg(x, y) :- edge(a, x), edge(b, y), sg(a, b) 74 | // __device__ void reorder_path1_3arity(tuple_type inner1, tuple_type inner2, tuple_type outer, 75 | // tuple_type newt) { 76 | // newt[0] = inner1[1]; 77 | // newt[1] = inner2[1]; 78 | // }; 79 | 80 | __device__ tuple_generator_hook reorder_path_device = reorder_path; 81 | __device__ tuple_generator_hook reorder_path1_device = reorder_path1; 82 | // __device__ tuple_generator_hook reorder_path1_3arity_device = reorder_path1_3arity; 83 | 84 | __device__ void cp_1(tuple_type src, tuple_type dest) { 85 | dest[0] = src[1]; 86 | dest[1] = src[0]; 87 | } 88 | __device__ tuple_copy_hook cp_1_device = cp_1; 89 | 90 | __device__ bool tuple_pred_eq_11(tuple_type t) { return t[0] != t[1]; } 91 | __device__ tuple_predicate tuple_pred_eq_11_device = tuple_pred_eq_11; 92 | 93 | void analysis_bench(const char *dataset_path, int block_size, int grid_size) { 94 | KernelTimer timer; 95 | int relation_columns = 2; 96 | std::chrono::high_resolution_clock::time_point time_point_begin; 97 | std::chrono::high_resolution_clock::time_point time_point_end; 98 | time_point_begin = std::chrono::high_resolution_clock::now(); 99 | double spent_time; 100 | 101 | // load the raw graph 102 | tuple_size_t graph_edge_counts = get_row_size(dataset_path); 103 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 104 | // u64 graph_edge_counts = 2100; 105 | column_type *raw_graph_data = 106 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t', U32); 107 | column_type *raw_reverse_graph_data = 108 | (column_type *)malloc(graph_edge_counts * 2 * sizeof(column_type)); 109 | std::cout << "reversing graph ... " << std::endl; 110 | for (tuple_size_t i = 0; i < graph_edge_counts; i++) { 111 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 112 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 113 | } 114 | std::cout << "finish reverse graph." << std::endl; 115 | 116 | timer.start_timer(); 117 | Relation *edge_2__1_2 = new Relation(); 118 | Relation *edge_2__2_1 = new Relation(); 119 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 120 | graph_edge_counts, 1, 0, grid_size, block_size); 121 | // cudaMallocHost((void **)&edge_2__2_1, sizeof(Relation)); 122 | Relation *sg_2__1_2 = new Relation(); 123 | sg_2__1_2->index_flag = false; 124 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 125 | std::cout << "edge size " << graph_edge_counts << std::endl; 126 | load_relation(sg_2__1_2, "sg_2__2_1", 2, nullptr, 0, 1, 0, grid_size, 127 | block_size); 128 | load_relation(edge_2__1_2, "edge_2__1_2", 2, raw_graph_data, 129 | graph_edge_counts, 1, 0, grid_size, block_size); 130 | timer.stop_timer(); 131 | // double kernel_spent_time = timer.get_spent_time(); 132 | std::cout << "Build hash table time: " << timer.get_spent_time() 133 | << std::endl; 134 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 135 | timer.start_timer(); 136 | LIE init_scc(grid_size, block_size); 137 | init_scc.add_relations(edge_2__1_2, true); 138 | init_scc.add_relations(sg_2__1_2, false); 139 | // sg(x, y) :- edge(p, x), edge(p, y), x != y. 140 | // sg:y,x 141 | tuple_generator_hook reorder_path_host; 142 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 143 | sizeof(tuple_generator_hook)); 144 | tuple_predicate tuple_pred_eq_11_host; 145 | cudaMemcpyFromSymbol(&tuple_pred_eq_11_host, tuple_pred_eq_11_device, 146 | sizeof(tuple_predicate)); 147 | init_scc.add_ra(RelationalJoin( 148 | edge_2__1_2, FULL, edge_2__1_2, FULL, sg_2__1_2, reorder_path_host, 149 | tuple_pred_eq_11_host, LEFT, grid_size, block_size, join_detail)); 150 | init_scc.fixpoint_loop(); 151 | timer.stop_timer(); 152 | std::cout << "sg init counts " << sg_2__1_2->full->tuple_counts 153 | << std::endl; 154 | std::cout << "sg init time: " << timer.get_spent_time() << std::endl; 155 | 156 | LIE sg_lie(grid_size, block_size); 157 | Relation *tmp = new Relation(); 158 | load_relation(tmp, "tmp", 2, nullptr, 0, 1, 0, grid_size, block_size); 159 | tmp->index_flag = false; 160 | sg_lie.add_relations(edge_2__1_2, true); 161 | sg_lie.add_relations(sg_2__1_2, false); 162 | 163 | sg_lie.add_tmp_relation(tmp); 164 | // sg(x, y) :- edge(a, x), sg(a, b), edge(b, y). 165 | // tmp(b,x) :- edge(a, x), sg(a, b). 166 | tuple_generator_hook reorder_path1_host; 167 | cudaMemcpyFromSymbol(&reorder_path1_host, reorder_path1_device, 168 | sizeof(tuple_generator_hook)); 169 | sg_lie.add_ra(RelationalJoin(edge_2__1_2, FULL, sg_2__1_2, DELTA, tmp, 170 | reorder_path1_host, nullptr, LEFT, grid_size, 171 | block_size, join_detail)); 172 | // sg(x, y) :- edge(b, y), tmp(b, x). 173 | sg_lie.add_ra(RelationalJoin(edge_2__1_2, FULL, tmp, NEWT, sg_2__1_2, 174 | reorder_path1_host, nullptr, LEFT, grid_size, 175 | block_size, join_detail)); 176 | timer.start_timer(); 177 | sg_lie.fixpoint_loop(); 178 | timer.stop_timer(); 179 | std::cout << "sg counts " << sg_2__1_2->full->tuple_counts << std::endl; 180 | std::cout << "sg time: " << timer.get_spent_time() << std::endl; 181 | 182 | std::cout << "join detail: " << std::endl; 183 | std::cout << "compute size time: " << join_detail[0] << std::endl; 184 | std::cout << "reduce + scan time: " << join_detail[1] << std::endl; 185 | std::cout << "fetch result time: " << join_detail[2] << std::endl; 186 | std::cout << "sort time: " << join_detail[3] << std::endl; 187 | std::cout << "build index time: " << join_detail[5] << std::endl; 188 | std::cout << "merge time: " << join_detail[6] << std::endl; 189 | std::cout << "unique time: " << join_detail[4] + join_detail[7] 190 | << std::endl; 191 | } 192 | 193 | int main(int argc, char *argv[]) { 194 | int device_id; 195 | int number_of_sm; 196 | cudaGetDevice(&device_id); 197 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 198 | device_id); 199 | std::cout << "num of sm " << number_of_sm << std::endl; 200 | std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 201 | << std::endl; 202 | int block_size, grid_size; 203 | block_size = 512; 204 | grid_size = 32 * number_of_sm; 205 | std::locale loc(""); 206 | 207 | analysis_bench(argv[1], block_size, grid_size); 208 | return 0; 209 | } 210 | -------------------------------------------------------------------------------- /test/sort.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "../include/relation.cuh" 14 | #include "../include/timer.cuh" 15 | #include "../include/relational_algebra.cuh" 16 | #include "../include/print.cuh" 17 | 18 | #define EMPTY_HASH_ENTRY ULLONG_MAX 19 | 20 | using u64 = unsigned long long; 21 | using u32 = unsigned long; 22 | 23 | using column_type = u32; 24 | using tuple_type = column_type *; 25 | using tuple_size_t = u64; 26 | using t_data_internal = u64 *; 27 | 28 | typedef void (*tuple_generator_hook)(tuple_type, tuple_type, tuple_type); 29 | typedef void (*tuple_copy_hook)(tuple_type, tuple_type); 30 | typedef bool (*tuple_predicate)(tuple_type); 31 | 32 | // struct tuple_generator_hook { 33 | // __host__ __device__ 34 | // void operator()(tuple_type inner, tuple_type outer, tuple_type newt) {}; 35 | // }; 36 | 37 | 38 | // 32 bit version of fnv1-a 39 | __host__ __device__ inline u32 prefix_hash_32(tuple_type start_ptr, 40 | u64 prefix_len) { 41 | const u32 base = 2166136261U; 42 | const u32 prime = 16777619U; 43 | 44 | u32 hash = base; 45 | for (u64 i = 0; i < prefix_len; ++i) { 46 | u32 chunk = (u32)start_ptr[i]; 47 | hash ^= chunk & 255U; 48 | hash *= prime; 49 | for (char j = 0; j < 3; ++j) { 50 | chunk = chunk >> 8; 51 | hash ^= chunk & 255U; 52 | hash *= prime; 53 | } 54 | } 55 | return hash; 56 | } 57 | 58 | // 32bit xxhash version prefix hash 59 | __host__ __device__ inline u32 prefix_hash_xxhash_32(tuple_type start_ptr, 60 | u64 prefix_len) { 61 | const u32 prime = 2654435761U; 62 | u32 hash = 0; 63 | for (u64 i = 0; i < prefix_len; ++i) { 64 | u32 chunk = (u32)start_ptr[i]; 65 | hash += chunk * prime; 66 | hash += (hash << 13); 67 | hash ^= (hash >> 7); 68 | hash += (hash << 3); 69 | hash ^= (hash >> 17); 70 | hash += (hash << 5); 71 | } 72 | return hash; 73 | } 74 | 75 | long int get_row_size(const char *data_path) { 76 | std::ifstream f; 77 | f.open(data_path); 78 | char c; 79 | long i = 0; 80 | while (f.get(c)) 81 | if (c == '\n') 82 | ++i; 83 | f.close(); 84 | return i; 85 | } 86 | 87 | enum ColumnT { U64, U32 }; 88 | 89 | column_type *get_relation_from_file(const char *file_path, int total_rows, 90 | int total_columns, char separator, 91 | ColumnT ct) { 92 | column_type *data = 93 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 94 | FILE *data_file = fopen(file_path, "r"); 95 | for (int i = 0; i < total_rows; i++) { 96 | for (int j = 0; j < total_columns; j++) { 97 | if (j != (total_columns - 1)) { 98 | if (ct == U64) { 99 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 100 | &separator); 101 | } else { 102 | fscanf(data_file, "%ld%c", &data[(i * total_columns) + j], 103 | &separator); 104 | } 105 | } else { 106 | if (ct == U64) { 107 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 108 | } else { 109 | fscanf(data_file, "%ld", &data[(i * total_columns) + j]); 110 | } 111 | } 112 | } 113 | } 114 | return data; 115 | } 116 | 117 | // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 118 | 119 | // Number of bits per pass 120 | const int BITS_PER_PASS = 4; 121 | 122 | // Number of bins per pass 123 | const int BINS_PER_PASS = 1 << BITS_PER_PASS; 124 | 125 | // Number of threads per block 126 | const int THREADS_PER_BLOCK = 256; 127 | 128 | // Radix sort kernel 129 | __global__ void radix_sort_kernel(u32 *data, int *temp, int *histogram, 130 | int num_elements, int pass) { 131 | // Compute the global thread ID 132 | int tid = blockIdx.x * blockDim.x + threadIdx.x; 133 | 134 | // Compute the local thread ID within the warp 135 | int lane = threadIdx.x & 31; 136 | 137 | // Compute the histogram index for this thread 138 | int index = (data[tid] >> (pass * BITS_PER_PASS)) & (BINS_PER_PASS - 1); 139 | 140 | // Compute the starting index for this bin in the temp array 141 | int start = histogram[index * blockDim.x + lane]; 142 | 143 | // Compute the ending index for this bin in the temp array 144 | int end = start + histogram[index * blockDim.x + blockDim.x - 1]; 145 | 146 | // Copy the element to the temp array 147 | temp[start + lane] = data[tid]; 148 | 149 | // Increment the histogram count for this bin 150 | atomicAdd(&histogram[index * blockDim.x + lane], 1); 151 | 152 | // Wait for all threads to finish updating the histogram 153 | __syncthreads(); 154 | 155 | // Compute the starting index for this thread's bin in the temp array 156 | start = histogram[index * blockDim.x + lane]; 157 | 158 | // Copy the element to the temp array 159 | temp[start + lane] = data[tid]; 160 | 161 | // Wait for all threads to finish copying to the temp array 162 | __syncthreads(); 163 | 164 | // Update the data array with the sorted elements 165 | data[tid] = temp[tid]; 166 | } 167 | 168 | // Radix sort function 169 | void radix_sort(column_type *data, int arity, int num_elements) { 170 | // Allocate memory for the temp array and histogram 171 | int max_threads_per_block; 172 | cudaDeviceGetAttribute(&max_threads_per_block, cudaDevAttrMaxThreadsPerBlock, 0); 173 | int *temp, *histogram; 174 | cudaMalloc(&temp, num_elements * sizeof(int)); 175 | cudaMalloc(&histogram, BINS_PER_PASS * THREADS_PER_BLOCK * sizeof(int)); 176 | 177 | // Initialize the histogram to zero 178 | cudaMemset(histogram, 0, BINS_PER_PASS * THREADS_PER_BLOCK * sizeof(int)); 179 | column_type pass_cnt = sizeof(column_type) * 8 * arity / BITS_PER_PASS; 180 | 181 | // Perform the radix sort passes 182 | for (column_type pass = 0; pass < pass_cnt; pass++) { 183 | // Launch the radix sort kernel 184 | radix_sort_kernel<<<(num_elements + THREADS_PER_BLOCK - 1) / 185 | THREADS_PER_BLOCK, 186 | THREADS_PER_BLOCK>>>(data+arity, temp, histogram, 187 | num_elements, pass); 188 | 189 | // Clear the histogram for the next pass 190 | cudaMemset(histogram, 0, 191 | BINS_PER_PASS * THREADS_PER_BLOCK * sizeof(int)); 192 | } 193 | 194 | // Free the memory 195 | cudaFree(temp); 196 | cudaFree(histogram); 197 | } 198 | 199 | struct t_equal_n { 200 | u64 arity; 201 | tuple_type rhs; 202 | 203 | t_equal_n(tuple_size_t arity, tuple_type target) { this->arity = arity; this->rhs = target; } 204 | 205 | __host__ __device__ bool operator()(tuple_type lhs) { 206 | for (int i = 0; i < arity; i++) { 207 | if (lhs[i] != rhs[i]) { 208 | return false; 209 | } 210 | } 211 | return true; 212 | } 213 | }; 214 | 215 | __device__ void reorder_path(tuple_type inner, tuple_type outer, 216 | tuple_type newt) { 217 | newt[0] = inner[1]; 218 | newt[1] = outer[1]; 219 | }; 220 | __device__ tuple_generator_hook reorder_path_device = reorder_path; 221 | 222 | int main(int argc, char *argv[]) { 223 | auto dataset_path = argv[1]; 224 | int device_id; 225 | int number_of_sm; 226 | cudaGetDevice(&device_id); 227 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 228 | device_id); 229 | int max_threads_per_block; 230 | cudaDeviceGetAttribute(&max_threads_per_block, cudaDevAttrMaxThreadsPerBlock, 0); 231 | std::cout << "num of sm " << number_of_sm << " num of thread per block " << max_threads_per_block << std::endl; 232 | std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 233 | << std::endl;; 234 | int block_size, grid_size; 235 | block_size = 512; 236 | grid_size = 32 * number_of_sm; 237 | std::locale loc(""); 238 | 239 | int relation_columns = 2; 240 | std::chrono::high_resolution_clock::time_point time_point_begin; 241 | std::chrono::high_resolution_clock::time_point time_point_end; 242 | time_point_begin = std::chrono::high_resolution_clock::now(); 243 | double spent_time; 244 | 245 | // load the raw graph 246 | tuple_size_t graph_edge_counts = get_row_size(dataset_path); 247 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 248 | // u64 graph_edge_counts = 2100; 249 | column_type *raw_graph_data = 250 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t', U32); 251 | column_type *raw_reverse_graph_data = 252 | (column_type *)malloc(graph_edge_counts * 2 * sizeof(column_type)); 253 | std::cout << "reversing graph ... " << std::endl; 254 | for (tuple_size_t i = 0; i < graph_edge_counts; i++) { 255 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 256 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 257 | } 258 | std::cout << "finish reverse graph." << std::endl; 259 | 260 | // copy the graph to device 261 | column_type *d_graph_data; 262 | cudaMalloc((void **)&d_graph_data, 263 | graph_edge_counts * relation_columns * sizeof(column_type)); 264 | cudaMemcpy(d_graph_data, raw_graph_data, 265 | graph_edge_counts * relation_columns * sizeof(column_type), 266 | cudaMemcpyHostToDevice); 267 | 268 | int REPEAT = 1; 269 | // init the tuples 270 | tuple_type *tuples; 271 | cudaMalloc(&tuples, graph_edge_counts * sizeof(tuple_type)); 272 | time_point_begin = std::chrono::high_resolution_clock::now(); 273 | for (int i = 0; i < REPEAT; i++) { 274 | init_tuples_unsorted<<>>( 275 | tuples, d_graph_data, relation_columns, graph_edge_counts); 276 | } 277 | cudaDeviceSynchronize(); 278 | time_point_end = std::chrono::high_resolution_clock::now(); 279 | spent_time = std::chrono::duration_cast>( 280 | time_point_end - time_point_begin) 281 | .count(); 282 | std::cout << "init tuples time: " << spent_time << std::endl; 283 | column_type *tuple_hashvs; 284 | cudaMalloc((void **)&tuple_hashvs, graph_edge_counts * sizeof(column_type)); 285 | column_type *col_tmp; 286 | cudaMalloc((void **)&col_tmp, graph_edge_counts * sizeof(column_type)); 287 | 288 | time_point_end = std::chrono::high_resolution_clock::now(); 289 | // compute hash for tuples 290 | for (int i = 0; i < REPEAT; i++) { 291 | compute_hash<<>>(tuples, graph_edge_counts, 1, 292 | tuple_hashvs); 293 | cudaDeviceSynchronize(); 294 | } 295 | time_point_end = std::chrono::high_resolution_clock::now(); 296 | spent_time = std::chrono::duration_cast>( 297 | time_point_end - time_point_begin) 298 | .count(); 299 | std::cout << "compute hash time: " << spent_time << std::endl; 300 | 301 | // sort the tuples using thrust 302 | double sort_hash_time = 0; 303 | for (int i = 0; i < REPEAT; i++) { 304 | time_point_begin = std::chrono::high_resolution_clock::now(); 305 | 306 | extract_column<<>>(tuples, graph_edge_counts, 1, 307 | col_tmp); 308 | cudaDeviceSynchronize(); 309 | thrust::stable_sort_by_key(thrust::device, col_tmp, 310 | col_tmp + graph_edge_counts, tuples); 311 | cudaDeviceSynchronize(); 312 | extract_column<<>>(tuples, graph_edge_counts, 0, 313 | col_tmp); 314 | cudaDeviceSynchronize(); 315 | thrust::stable_sort_by_key(thrust::device, col_tmp, 316 | col_tmp + graph_edge_counts, tuples); 317 | compute_hash<<>>(tuples, graph_edge_counts, 1, 318 | tuple_hashvs); 319 | cudaDeviceSynchronize(); 320 | thrust::stable_sort_by_key(thrust::device, tuple_hashvs, 321 | tuple_hashvs + graph_edge_counts, tuples); 322 | cudaDeviceSynchronize(); 323 | time_point_end = std::chrono::high_resolution_clock::now(); 324 | sort_hash_time += 325 | std::chrono::duration_cast>( 326 | time_point_end - time_point_begin) 327 | .count(); 328 | // print_tuple_list(tuples, graph_edge_counts, 2); 329 | // recover prepare for next sort 330 | init_tuples_unsorted<<>>( 331 | tuples, d_graph_data, relation_columns, graph_edge_counts); 332 | } 333 | std::cout << "sort hash time: " << sort_hash_time << std::endl; 334 | 335 | // sort the tuples using thrust with tuple_indexed_less 336 | double sort_comp_time = 0; 337 | for (int i = 0; i < REPEAT; i++) { 338 | time_point_begin = std::chrono::high_resolution_clock::now(); 339 | thrust::sort(thrust::device, tuples, tuples + graph_edge_counts, 340 | tuple_indexed_less(1, 2)); 341 | cudaDeviceSynchronize(); 342 | time_point_end = std::chrono::high_resolution_clock::now(); 343 | sort_comp_time += 344 | std::chrono::duration_cast>( 345 | time_point_end - time_point_begin) 346 | .count(); 347 | // print_tuple_list(tuples, graph_edge_counts, 2); 348 | init_tuples_unsorted<<>>( 349 | tuples, d_graph_data, relation_columns, graph_edge_counts); 350 | } 351 | std::cout << "sort using tuple_indexed_less time: " << sort_comp_time 352 | << std::endl; 353 | 354 | 355 | // load raw data into edge relation 356 | time_point_begin = std::chrono::high_resolution_clock::now(); 357 | Relation *edge_2__2_1 = new Relation(); 358 | // cudaMallocHost((void **)&edge_2__2_1, sizeof(Relation)); 359 | Relation *path_2__1_2 = new Relation(); 360 | path_2__1_2->index_flag = false; 361 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 362 | std::cout << "edge size " << graph_edge_counts << std::endl; 363 | load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 364 | graph_edge_counts, 1, 0, grid_size, block_size); 365 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 366 | graph_edge_counts, 1, 0, grid_size, block_size); 367 | time_point_end = std::chrono::high_resolution_clock::now(); 368 | // double kernel_spent_time = timer.get_spent_time(); 369 | double init_relation_time = 370 | std::chrono::duration_cast>( 371 | time_point_end - time_point_begin) 372 | .count(); 373 | std::cout << "Build hash table time: " << init_relation_time << std::endl; 374 | 375 | tuple_generator_hook reorder_path_host; 376 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 377 | sizeof(tuple_generator_hook)); 378 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 379 | RelationalJoin join_test(edge_2__2_1, FULL, path_2__1_2, FULL, path_2__1_2, 380 | reorder_path_host, nullptr, LEFT, grid_size, 381 | block_size, join_detail); 382 | time_point_begin = std::chrono::high_resolution_clock::now(); 383 | join_test(); 384 | time_point_end = std::chrono::high_resolution_clock::now(); 385 | double join_test_time = 386 | std::chrono::duration_cast>( 387 | time_point_end - time_point_begin) 388 | .count(); 389 | std::cout << "join test time: " << join_test_time << std::endl; 390 | std::cout << "join detail: " << std::endl; 391 | std::cout << "compute size time: " << join_detail[0] << std::endl; 392 | std::cout << "reduce + scan time: " << join_detail[1] << std::endl; 393 | std::cout << "fetch result time: " << join_detail[2] << std::endl; 394 | std::cout << "sort time: " << join_detail[3] << std::endl; 395 | std::cout << "build index time: " << join_detail[5] << std::endl; 396 | std::cout << "merge time: " << join_detail[6] << std::endl; 397 | std::cout << "unique time: " << join_detail[4] + join_detail[7] << std::endl; 398 | // test thrust set_difference time on path's newt and full 399 | tuple_type* deduped_tuples; 400 | cudaMalloc(&deduped_tuples, path_2__1_2->newt->tuple_counts * sizeof(tuple_type)); 401 | 402 | time_point_begin = std::chrono::high_resolution_clock::now(); 403 | for (int i = 0; i < 10; i++) { 404 | thrust::set_difference(thrust::device, path_2__1_2->newt->tuples, 405 | path_2__1_2->newt->tuples + path_2__1_2->newt->tuple_counts, 406 | path_2__1_2->full->tuples, path_2__1_2->full->tuples + path_2__1_2->full->tuple_counts, 407 | deduped_tuples, tuple_indexed_less(1, 2)); 408 | cudaDeviceSynchronize(); 409 | } 410 | time_point_end = std::chrono::high_resolution_clock::now(); 411 | double set_difference_time = 412 | std::chrono::duration_cast>( 413 | time_point_end - time_point_begin) 414 | .count(); 415 | std::cout << "set_difference time: " << set_difference_time << std::endl; 416 | 417 | // sequential set_difference 418 | tuple_type* deduped_tuples_seq; 419 | cudaMalloc(&deduped_tuples_seq, path_2__1_2->newt->tuple_counts * sizeof(tuple_type)); 420 | time_point_begin = std::chrono::high_resolution_clock::now(); 421 | for (int i = 0; i < 10; i++) { 422 | tuple_type* full_t_end = path_2__1_2->full->tuples + path_2__1_2->full->tuple_counts; 423 | for (auto i = 0; i < path_2__1_2->newt->tuple_counts ; i++) { 424 | auto cur_newt_tuple = path_2__1_2->newt->tuples[i]; 425 | 426 | auto res =thrust::find_if(thrust::device, path_2__1_2->full->tuples, path_2__1_2->full->tuples + path_2__1_2->full->tuple_counts, 427 | t_equal_n(path_2__1_2->arity, cur_newt_tuple)); 428 | cudaDeviceSynchronize(); 429 | if (res != full_t_end) { 430 | deduped_tuples_seq[i] = cur_newt_tuple; 431 | } 432 | } 433 | } 434 | time_point_end = std::chrono::high_resolution_clock::now(); 435 | double set_difference_time_seq = 436 | std::chrono::duration_cast>( 437 | time_point_end - time_point_begin) 438 | .count(); 439 | std::cout << "set_difference time seq: " << set_difference_time_seq << std::endl; 440 | 441 | return 0; 442 | } 443 | -------------------------------------------------------------------------------- /test/souffle/bip.dl: -------------------------------------------------------------------------------- 1 | .decl edge(v1:symbol, v2:symbol) 2 | .input edge 3 | .decl matching(v1:symbol, v2:symbol) choice-domain v1, v2 4 | .decl notBipartiteMatching() 5 | 6 | matching(x,y) :- edge(x,y). 7 | 8 | // No two edges share an endpoint. 9 | notBipartiteMatching() :- matching(x, y), matching(x, z), z != y. 10 | notBipartiteMatching() :- matching(y, x), matching(z, x), z != y. 11 | 12 | .printsize notBipartiteMatching 13 | -------------------------------------------------------------------------------- /test/souffle/choice_total.dl: -------------------------------------------------------------------------------- 1 | .decl domain(x:symbol) 2 | .input domain 3 | .decl list(prev:symbol, data:symbol) choice-domain prev, data 4 | .decl notTotalOrder() 5 | .printsize notTotalOrder 6 | 7 | list("nil", "head"). 8 | list(x,y) :- domain(y), list(_,x). 9 | 10 | // every node has only one sucessor. 11 | notTotalOrder() :- list(p, x), list(p, y), x != y. 12 | // every node has only one predecessor. 13 | notTotalOrder() :- list(pa, x), list(pb, x), pa != pb. 14 | // every node is in the list exactly once. 15 | notTotalOrder() :- domain(x), !list(x,_), !list(_,x). 16 | -------------------------------------------------------------------------------- /test/souffle/cspa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harp-lab/gdlog/65a6ee960ced8d04bc725ccdfa68f004f8479226/test/souffle/cspa -------------------------------------------------------------------------------- /test/souffle/cspa.dl: -------------------------------------------------------------------------------- 1 | // 2,1 2 | .decl assign(src: number, dest: number) 3 | .input assign(IO=file, deliminator="\t") 4 | // 1,2 2,1 5 | .decl dereference(src: number, dest: number) 6 | .input dereference(IO=file, deliminator="\t") 7 | 8 | //1,2 2,1 9 | .decl ValueFlow(src: number, dest: number) 10 | .printsize ValueFlow 11 | .output ValueFlow 12 | 13 | // 1,2 2,1 14 | .decl ValueAlias(src: number, dest: number) 15 | .printsize ValueAlias 16 | .output ValueAlias 17 | 18 | // 1,2 19 | .decl MemoryAlias(src: number, dest: number) 20 | .printsize MemoryAlias 21 | 22 | .decl tmp(src: number, dest: number) 23 | .printsize tmp 24 | 25 | ValueFlow(x, y) :- ValueFlow(x, z), ValueFlow(z, y). 26 | ValueAlias(x, y) :- ValueFlow(z, x), ValueFlow(z, y). 27 | ValueFlow(x, y) :- assign(x, z), MemoryAlias(z, y). 28 | // tmp(z, x) :- dereference(y, x), ValueAlias(y, z). 29 | MemoryAlias(x, w) :- dereference(y, x), ValueAlias(y, z), dereference(z, w). 30 | // MemoryAlias(x, w) :- tmp(z, x), dereference(z, w). 31 | ValueAlias(x, y) :- ValueFlow(z, x), MemoryAlias(z, w), ValueFlow(w, y). 32 | 33 | 34 | ValueFlow(y, x) :- assign(y, x). 35 | ValueFlow(x, x) :- assign(x, y). 36 | ValueFlow(x, x) :- assign(y, x). 37 | 38 | MemoryAlias(x, x) :- assign(y, x). 39 | MemoryAlias(x, x) :- assign(x, y). 40 | -------------------------------------------------------------------------------- /test/souffle/cspa.slog: -------------------------------------------------------------------------------- 1 | ; ValueFlow(y, x) :- Assign(y, x). 2 | ; ValueFlow(x, y) :- Assign(x, z), MemoryAlias(z, y). 3 | ; ValueFlow(x, y) :- ValueFlow(x, z), ValueFlow(z, y). 4 | ; MemoryAlias(x, w) :- Dereference(y, x), ValueAlias(y, z), Dereference(z, w). 5 | ; ValueAlias(x, y) :- ValueFlow(z, x), ValueFlow(z, y). 6 | ; ValueAlias(x, y) :- ValueFlow(z, x), MemoryAlias(z, w),ValueFlow(w, y). 7 | ; ValueFlow(x, x) :- Assign(x, y). 8 | ; ValueFlow(x, x) :- Assign(y, x). 9 | ; MemoryAlias(x, x) :- Assign(y, x). 10 | ; MemoryAlias(x, x) :- Assign(x, y). 11 | 12 | [(value-flow y x) <-- (assign y x)] 13 | [(value-flow x y) <-- (assign x z) (memory-alias z y)] 14 | [(value-flow x y) <-- (value-flow x z) (value-flow z y)] 15 | [(memory-alias x w) <-- (dereference y x) (value-alias y z) -- (dereference z w)] 16 | [(value-alias x y) <-- (value-flow z x) (value-flow z y)] 17 | [(value-alias x y) <-- (memory-alias z w) (value-flow w y) -- (value-flow z x)] 18 | [(value-flow x x) <-- (assign x y)] 19 | [(value-flow x x) <-- (assign y x)] 20 | [(memory-alias x x) <-- (assign y x)] 21 | [(memory-alias x x) <-- (assign x y)] 22 | -------------------------------------------------------------------------------- /test/souffle/cspa.slogc: -------------------------------------------------------------------------------- 1 | ( 2 | slog-prog 3 | ((relation-decl rel__value__alias__2__1 value-alias 1 #f 2 (1)) 4 | (relation-decl rel__value__flow__2__1 value-flow 1 #f 2 (1)) 5 | (relation-decl rel__value__flow__2__1__2 value-flow 2 #t 2 (1 2)) 6 | (relation-decl 7 | rel___dollorrule7__inter__body__2__2 8 | $rule7-inter-body 9 | 1 10 | #f 11 | 2 12 | (2)) 13 | (relation-decl rel__dereference__2__1__2 dereference 2 #t 2 (1 2)) 14 | (relation-decl rel__value__flow__2__2 value-flow 1 #f 2 (2)) 15 | (relation-decl rel__assign__2__2 assign 1 #f 2 (2)) 16 | (relation-decl rel__assign__2__1__2 assign 2 #t 2 (1 2)) 17 | (relation-decl rel__value__alias__2__1__2 value-alias 2 #t 2 (1 2)) 18 | (relation-decl rel__memory__alias__2__1 memory-alias 1 #f 2 (1)) 19 | (relation-decl rel__memory__alias__2__2 memory-alias 1 #f 2 (2)) 20 | (relation-decl 21 | rel___dollorrule7__inter__body__2__1__2 22 | $rule7-inter-body 23 | 2 24 | #t 25 | 2 26 | (1 2)) 27 | (relation-decl 28 | rel___dollorrule10__inter__body__2__1__2 29 | $rule10-inter-body 30 | 2 31 | #t 32 | 2 33 | (1 2)) 34 | (relation-decl rel__dereference__2__1 dereference 1 #f 2 (1)) 35 | (relation-decl rel__memory__alias__2__1__2 memory-alias 2 #t 2 (1 2)) 36 | (relation-decl 37 | rel___dollorrule10__inter__body__2__2 38 | $rule10-inter-body 39 | 1 40 | #f 41 | 2 42 | (2))) 43 | ((scc-decl 44 | scc0 45 | 0 46 | #f 47 | ((scc-rel rel__assign__2__1__2 #f #f) 48 | (scc-rel rel__value__flow__2__1__2 #t #f)) 49 | ((copy rel__value__flow__2__1__2 rel__assign__2__1__2 FULL (1 1)))) 50 | (scc-decl 51 | scc1 52 | 1 53 | #f 54 | ((scc-rel rel__assign__2__1__2 #f #f) 55 | (scc-rel rel__memory__alias__2__1__2 #t #f)) 56 | ((copy rel__memory__alias__2__1__2 rel__assign__2__1__2 FULL (1 1)))) 57 | (scc-decl 58 | scc2 59 | 2 60 | #f 61 | ((scc-rel rel__assign__2__1__2 #f #f) 62 | (scc-rel rel__memory__alias__2__1__2 #t #f)) 63 | ((copy rel__memory__alias__2__1__2 rel__assign__2__1__2 FULL (0 0)))) 64 | (scc-decl 65 | scc3 66 | 3 67 | #f 68 | ((scc-rel rel__assign__2__1__2 #f #f) 69 | (scc-rel rel__value__flow__2__1__2 #t #f)) 70 | ((copy rel__value__flow__2__1__2 rel__assign__2__1__2 FULL (0 1)))) 71 | (scc-decl 72 | scc4 73 | 4 74 | #f 75 | ((scc-rel rel__dereference__2__1 #t #f) 76 | (scc-rel rel__dereference__2__1__2 #t #f)) 77 | ((acopy rel__dereference__2__1 rel__dereference__2__1__2 DELTA (0 2 1)))) 78 | (scc-decl 79 | scc5 80 | 5 81 | #f 82 | ((scc-rel rel__assign__2__2 #t #f) (scc-rel rel__assign__2__1__2 #t #f)) 83 | ((acopy rel__assign__2__2 rel__assign__2__1__2 DELTA (1 2 0)))) 84 | (scc-decl 85 | scc6 86 | 6 87 | #f 88 | ((scc-rel rel__assign__2__1__2 #f #f) 89 | (scc-rel rel__value__flow__2__1__2 #t #f)) 90 | ((copy rel__value__flow__2__1__2 rel__assign__2__1__2 FULL (0 0)))) 91 | (scc-decl 92 | scc7 93 | 7 94 | #t 95 | ((scc-rel rel__value__alias__2__1__2 #t #f) 96 | (scc-rel rel__value__alias__2__1 #t #f) 97 | (scc-rel rel__value__flow__2__1 #t #f) 98 | (scc-rel rel__value__flow__2__1__2 #t #f) 99 | (scc-rel rel___dollorrule7__inter__body__2__2 #t #t) 100 | (scc-rel rel__value__flow__2__2 #t #f) 101 | (scc-rel rel__assign__2__2 #f #f) 102 | (scc-rel rel__memory__alias__2__1 #t #f) 103 | (scc-rel rel__memory__alias__2__2 #t #f) 104 | (scc-rel rel___dollorrule7__inter__body__2__1__2 #t #t) 105 | (scc-rel rel___dollorrule10__inter__body__2__1__2 #t #t) 106 | (scc-rel rel__dereference__2__1 #f #f) 107 | (scc-rel rel__memory__alias__2__1__2 #t #f) 108 | (scc-rel rel___dollorrule10__inter__body__2__2 #t #t)) 109 | ((join 110 | rel__value__flow__2__1__2 111 | rel__value__flow__2__1 112 | DELTA 113 | rel__value__flow__2__2 114 | DELTA 115 | (4 2)) 116 | (join 117 | rel___dollorrule7__inter__body__2__1__2 118 | rel__memory__alias__2__2 119 | DELTA 120 | rel__value__flow__2__1 121 | DELTA 122 | (4 2)) 123 | (join 124 | rel__value__alias__2__1__2 125 | rel__value__flow__2__1 126 | DELTA 127 | rel__value__flow__2__1 128 | FULL 129 | (2 4)) 130 | (acopy 131 | rel___dollorrule10__inter__body__2__2 132 | rel___dollorrule10__inter__body__2__1__2 133 | DELTA 134 | (1 2 0)) 135 | (join 136 | rel__value__alias__2__1__2 137 | rel__value__flow__2__1 138 | FULL 139 | rel__value__flow__2__1 140 | DELTA 141 | (2 4)) 142 | (acopy rel__value__flow__2__2 rel__value__flow__2__1__2 DELTA (1 2 0)) 143 | (acopy rel__value__flow__2__1 rel__value__flow__2__1__2 DELTA (0 2 1)) 144 | (acopy rel__memory__alias__2__1 rel__memory__alias__2__1__2 DELTA (0 2 1)) 145 | (join 146 | rel__value__flow__2__1__2 147 | rel__value__flow__2__1 148 | DELTA 149 | rel__value__flow__2__2 150 | FULL 151 | (4 2)) 152 | (join 153 | rel__value__alias__2__1__2 154 | rel___dollorrule7__inter__body__2__2 155 | DELTA 156 | rel__value__flow__2__1 157 | DELTA 158 | (4 2)) 159 | (join 160 | rel__value__alias__2__1__2 161 | rel___dollorrule7__inter__body__2__2 162 | FULL 163 | rel__value__flow__2__1 164 | DELTA 165 | (4 2)) 166 | (join 167 | rel___dollorrule7__inter__body__2__1__2 168 | rel__memory__alias__2__2 169 | DELTA 170 | rel__value__flow__2__1 171 | FULL 172 | (4 2)) 173 | (join 174 | rel___dollorrule7__inter__body__2__1__2 175 | rel__memory__alias__2__2 176 | FULL 177 | rel__value__flow__2__1 178 | DELTA 179 | (4 2)) 180 | (acopy rel__memory__alias__2__2 rel__memory__alias__2__1__2 DELTA (1 2 0)) 181 | (acopy rel__value__alias__2__1 rel__value__alias__2__1__2 DELTA (0 2 1)) 182 | (join 183 | rel__value__alias__2__1__2 184 | rel___dollorrule7__inter__body__2__2 185 | DELTA 186 | rel__value__flow__2__1 187 | FULL 188 | (4 2)) 189 | (join 190 | rel__value__flow__2__1__2 191 | rel__value__flow__2__1 192 | FULL 193 | rel__value__flow__2__2 194 | DELTA 195 | (4 2)) 196 | (join 197 | rel___dollorrule10__inter__body__2__1__2 198 | rel__value__alias__2__1 199 | DELTA 200 | rel__dereference__2__1 201 | FULL 202 | (4 2)) 203 | (acopy 204 | rel___dollorrule7__inter__body__2__2 205 | rel___dollorrule7__inter__body__2__1__2 206 | DELTA 207 | (1 2 0)) 208 | (join 209 | rel__value__flow__2__1__2 210 | rel__assign__2__2 211 | FULL 212 | rel__memory__alias__2__1 213 | DELTA 214 | (2 4)) 215 | (join 216 | rel__value__alias__2__1__2 217 | rel__value__flow__2__1 218 | DELTA 219 | rel__value__flow__2__1 220 | DELTA 221 | (2 4)) 222 | (join 223 | rel__memory__alias__2__1__2 224 | rel__dereference__2__1 225 | FULL 226 | rel___dollorrule10__inter__body__2__2 227 | DELTA 228 | (4 2))))) 229 | ((0 7) (1 7) (2 7) (3 7) (4 7) (5 7) (6 7))) 230 | -------------------------------------------------------------------------------- /test/souffle/path_length.dl: -------------------------------------------------------------------------------- 1 | 2 | .decl edge(from:number, to:number) 3 | .input edge(IO=file, filename="../../data/data_3.txt", deliminator="\t") 4 | 5 | 6 | .decl path(from:number, to:number, l: number) choice-domain (from, to) 7 | .output path 8 | 9 | path(from, to, 1) :- edge(from, to). 10 | path(from, to, l+1) :- edge(from, mid), path(mid, to, l). 11 | 12 | .printsize path 13 | -------------------------------------------------------------------------------- /test/souffle/sg.dl: -------------------------------------------------------------------------------- 1 | 2 | .decl edge(x: number, y: number) 3 | .input edge(IO=file, filename="../../data/data_39994.txt", deliminator="\t") 4 | 5 | .decl sg(x: number, y: number) 6 | // .decl sg_init(x: number, y: number) 7 | // sg_init(x, y) :- edge(p, x), edge(p, y), x != y. 8 | 9 | 10 | sg(x, y) :- edge(p, x), edge(p, y), x != y. 11 | sg(x, y) :- edge(a, x), sg(a, b), edge(b, y). 12 | 13 | .printsize sg 14 | // .printsize sg_init 15 | -------------------------------------------------------------------------------- /test/souffle/spanning.dl: -------------------------------------------------------------------------------- 1 | .decl edge(v: number, u:number) 2 | .input edge(IO=file, filename="../../data/data_39994.txt", deliminator="\t") 3 | 4 | .decl start_node(v: number) 5 | // start_node(1). 6 | start_node(10). 7 | // start_node(32). 8 | // start_node(45). 9 | // start_node(56). 10 | // start_node(886). 11 | // start_node(9851). 12 | // start_node(5682). 13 | // start_node(3301). 14 | // start_node(11234). 15 | // start_node(v) :- edge(v,_). 16 | 17 | .decl st(v:number, u:number) choice-domain u 18 | st(99999999, v) :- start_node(v). 19 | st(v,u) :- st(_, v), edge(v,u). 20 | .output st 21 | -------------------------------------------------------------------------------- /test/souffle/tc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/harp-lab/gdlog/65a6ee960ced8d04bc725ccdfa68f004f8479226/test/souffle/tc -------------------------------------------------------------------------------- /test/souffle/tc.dl: -------------------------------------------------------------------------------- 1 | 2 | .decl edge(from:number, to:number) 3 | .input edge(IO=file, filename="../../../dataset/vsp_finan512_scagr7-2c_rlfddd/vsp_finan512_scagr7-2c_rlfddd.mtx", deliminator="\t") 4 | 5 | 6 | 7 | .decl path(from:number, to:number) 8 | 9 | path(from, to) :- edge(from, to). 10 | path(from, to) :- path(from, mid), path(mid, to). 11 | 12 | .printsize path 13 | 14 | // .decl path1_join(from:number, to:number) 15 | // .output path1_join 16 | // path1_join(from, to) :- edge(from, mid), edge(mid, to). 17 | // .printsize path1_join 18 | 19 | // .decl path1(from:number, to:number) 20 | // .output path1 21 | // path1(from, to) :- edge(from, to). 22 | // path1(from, to) :- edge(from, mid), edge(mid, to). 23 | // .printsize path1 24 | 25 | // .decl path2(from:number, to:number) 26 | // // path2(from, to) :- path1(from, to). 27 | // path2(from, to) :- edge(from, mid), path1(mid, to). 28 | // .printsize path2 29 | // .output path2 30 | 31 | // .decl path3(from:number, to:number) 32 | // path3(from, to) :- path2(from, to). 33 | // path3(from, to) :- edge(from, mid), path2(mid, to). 34 | // .printsize path3 35 | 36 | // .decl path3_join(from:number, to:number) 37 | // path3_join(from, to) :- edge(from, mid), path2(mid, to). 38 | // .printsize path3_join 39 | // .output path3_join 40 | -------------------------------------------------------------------------------- /test/tc.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../include/exception.cuh" 11 | #include "../include/lie.cuh" 12 | #include "../include/timer.cuh" 13 | #include "../include/print.cuh" 14 | 15 | ////////////////////////////////////////////////////// 16 | 17 | long int get_row_size(const char *data_path) { 18 | std::ifstream f; 19 | f.open(data_path); 20 | char c; 21 | long i = 0; 22 | while (f.get(c)) 23 | if (c == '\n') 24 | ++i; 25 | f.close(); 26 | return i; 27 | } 28 | 29 | enum ColumnT{ U64, U32}; 30 | 31 | column_type *get_relation_from_file(const char *file_path, int total_rows, 32 | int total_columns, char separator, 33 | ColumnT ct) { 34 | column_type *data = 35 | (column_type *)malloc(total_rows * total_columns * sizeof(column_type)); 36 | FILE *data_file = fopen(file_path, "r"); 37 | for (int i = 0; i < total_rows; i++) { 38 | for (int j = 0; j < total_columns; j++) { 39 | if (j != (total_columns - 1)) { 40 | if (ct == U64){ 41 | fscanf(data_file, "%lld%c", &data[(i * total_columns) + j], 42 | &separator); 43 | } else { 44 | fscanf(data_file, "%ld%c", &data[(i * total_columns) + j], 45 | &separator); 46 | } 47 | } else { 48 | if (ct == U64) { 49 | fscanf(data_file, "%lld", &data[(i * total_columns) + j]); 50 | } else { 51 | fscanf(data_file, "%ld", &data[(i * total_columns) + j]); 52 | } 53 | } 54 | } 55 | } 56 | return data; 57 | } 58 | 59 | ////////////////////////////////////////////////////////////////// 60 | 61 | __device__ void reorder_path(tuple_type inner, tuple_type outer, 62 | tuple_type newt) { 63 | newt[0] = inner[1]; 64 | newt[1] = outer[1]; 65 | }; 66 | __device__ void reorder_path1(tuple_type inner, tuple_type outer, 67 | tuple_type newt) { 68 | newt[0] = outer[1]; 69 | newt[1] = inner[1]; 70 | }; 71 | __device__ tuple_generator_hook reorder_path_device = reorder_path; 72 | __device__ tuple_generator_hook reorder_path1_device = reorder_path1; 73 | 74 | __device__ void cp_1(tuple_type src, tuple_type dest) { 75 | dest[0] = src[1]; 76 | dest[1] = src[0]; 77 | } 78 | __device__ tuple_copy_hook cp_1_device = cp_1; 79 | 80 | void analysis_bench(const char *dataset_path, int block_size, int grid_size, bool fully_disable_buffer = false) { 81 | KernelTimer timer; 82 | int relation_columns = 2; 83 | std::chrono::high_resolution_clock::time_point time_point_begin; 84 | std::chrono::high_resolution_clock::time_point time_point_end; 85 | time_point_begin = std::chrono::high_resolution_clock::now(); 86 | double spent_time; 87 | 88 | // load the raw graph 89 | tuple_size_t graph_edge_counts = get_row_size(dataset_path); 90 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 91 | // u64 graph_edge_counts = 2100; 92 | column_type *raw_graph_data = 93 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t', U32); 94 | column_type *raw_reverse_graph_data = 95 | (column_type *)malloc(graph_edge_counts * 2 * sizeof(column_type)); 96 | 97 | // std::cout << "reversing graph ... " << std::endl; 98 | for (tuple_size_t i = 0; i < graph_edge_counts; i++) { 99 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 100 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 101 | } 102 | // std::cout << "finish reverse graph." << std::endl; 103 | 104 | timer.start_timer(); 105 | Relation *edge_2__2_1 = new Relation(); 106 | edge_2__2_1->fully_disable_merge_buffer_flag = fully_disable_buffer; 107 | // cudaMallocHost((void **)&edge_2__2_1, sizeof(Relation)); 108 | Relation *path_2__1_2 = new Relation(); 109 | path_2__1_2->fully_disable_merge_buffer_flag = fully_disable_buffer; 110 | path_2__1_2->index_flag = false; 111 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 112 | std::cout << "edge size " << graph_edge_counts << std::endl; 113 | load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 114 | graph_edge_counts, 1, 0, grid_size, block_size); 115 | load_relation(edge_2__2_1, "edge_2__2_1", 2, raw_reverse_graph_data, 116 | graph_edge_counts, 1, 0, grid_size, block_size); 117 | timer.stop_timer(); 118 | // // double kernel_spent_time = timer.get_spent_time(); 119 | // std::cout << "Build hash table time: " << timer.get_spent_time() 120 | // << std::endl; 121 | 122 | timer.start_timer(); 123 | LIE tc_scc(grid_size, block_size); 124 | tc_scc.reload_full_flag = false; 125 | tc_scc.add_relations(edge_2__2_1, true); 126 | tc_scc.add_relations(path_2__1_2, false); 127 | float join_detail[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; 128 | tuple_generator_hook reorder_path_host; 129 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path_device, 130 | sizeof(tuple_generator_hook)); 131 | tuple_copy_hook cp_1_host; 132 | cudaMemcpyFromSymbol(&cp_1_host, cp_1_device, sizeof(tuple_copy_hook)); 133 | tc_scc.add_ra(RelationalJoin(edge_2__2_1, FULL, path_2__1_2, DELTA, 134 | path_2__1_2, reorder_path_host, nullptr, 135 | LEFT, grid_size, block_size, join_detail)); 136 | 137 | tc_scc.fixpoint_loop(); 138 | 139 | timer.stop_timer(); 140 | std::cout << "Path counts " << path_2__1_2->full->tuple_counts << std::endl; 141 | // print_tuple_rows(path_2__2_1->full, "full"); 142 | std::cout << "TC time: " << timer.get_spent_time() << std::endl; 143 | // std::cout << "join detail: " << std::endl; 144 | // std::cout << "compute size time: " << join_detail[0] << std::endl; 145 | // std::cout << "reduce + scan time: " << join_detail[1] << std::endl; 146 | // std::cout << "fetch result time: " << join_detail[2] << std::endl; 147 | // std::cout << "sort time: " << join_detail[3] << std::endl; 148 | // std::cout << "build index time: " << join_detail[5] << std::endl; 149 | // std::cout << "merge time: " << join_detail[6] << std::endl; 150 | // std::cout << "unique time: " << join_detail[4] + join_detail[7] << std::endl; 151 | } 152 | 153 | void analysis_bench2(const char *dataset_path, int block_size, int grid_size) { 154 | KernelTimer timer; 155 | int relation_columns = 2; 156 | std::chrono::high_resolution_clock::time_point time_point_begin; 157 | std::chrono::high_resolution_clock::time_point time_point_end; 158 | time_point_begin = std::chrono::high_resolution_clock::now(); 159 | double spent_time; 160 | 161 | // load the raw graph 162 | tuple_size_t graph_edge_counts = get_row_size(dataset_path); 163 | std::cout << "Input graph rows: " << graph_edge_counts << std::endl; 164 | // u64 graph_edge_counts = 2100; 165 | column_type *raw_graph_data = 166 | get_relation_from_file(dataset_path, graph_edge_counts, 2, '\t', U32); 167 | column_type *raw_reverse_graph_data = 168 | (column_type *)malloc(graph_edge_counts * 2 * sizeof(column_type)); 169 | 170 | // std::cout << "reversing graph ... " << std::endl; 171 | for (tuple_size_t i = 0; i < graph_edge_counts; i++) { 172 | raw_reverse_graph_data[i * 2 + 1] = raw_graph_data[i * 2]; 173 | raw_reverse_graph_data[i * 2] = raw_graph_data[i * 2 + 1]; 174 | } 175 | // std::cout << "finish reverse graph." << std::endl; 176 | 177 | timer.start_timer(); 178 | Relation *path_2__1_2 = new Relation(); 179 | // cudaMallocHost((void **)&path_2__1_2, sizeof(Relation)); 180 | Relation *path_2__2_1 = new Relation(); 181 | // cudaMallocHost((void **)&path_2__2_1, sizeof(Relation)); 182 | // std::cout << "edge size " << graph_edge_counts << std::endl; 183 | load_relation(path_2__1_2, "path_2__1_2", 2, raw_graph_data, 184 | graph_edge_counts, 1, 0, grid_size, block_size); 185 | load_relation(path_2__2_1, "path_2__2_1", 2, nullptr, 0, 1, 0, grid_size, 186 | block_size); 187 | timer.stop_timer(); 188 | // double kernel_spent_time = timer.get_spent_time(); 189 | // std::cout << "Build hash table time: " << timer.get_spent_time() 190 | // << std::endl; 191 | 192 | timer.start_timer(); 193 | LIE tc_scc(grid_size, block_size); 194 | tc_scc.add_relations(path_2__2_1, false); 195 | tc_scc.add_relations(path_2__1_2, false); 196 | float join_time[3]; 197 | tuple_generator_hook reorder_path_host; 198 | cudaMemcpyFromSymbol(&reorder_path_host, reorder_path1_device, 199 | sizeof(tuple_generator_hook)); 200 | tuple_copy_hook cp_1_host; 201 | cudaMemcpyFromSymbol(&cp_1_host, cp_1_device, sizeof(tuple_copy_hook)); 202 | tc_scc.add_ra(RelationalACopy(path_2__1_2, path_2__2_1, cp_1_host, nullptr, 203 | grid_size, block_size)); 204 | tc_scc.add_ra(RelationalJoin(path_2__1_2, FULL, path_2__2_1, DELTA, 205 | path_2__1_2, reorder_path_host, nullptr, LEFT, 206 | grid_size, block_size, join_time)); 207 | 208 | tc_scc.fixpoint_loop(); 209 | 210 | timer.stop_timer(); 211 | std::cout << "Path counts " << path_2__1_2->full->tuple_counts << std::endl; 212 | // print_tuple_rows(path_2__2_1->full, "full"); 213 | std::cout << "TC time: " << timer.get_spent_time() << std::endl; 214 | } 215 | 216 | int main(int argc, char *argv[]) { 217 | int device_id; 218 | int number_of_sm; 219 | cudaGetDevice(&device_id); 220 | cudaDeviceGetAttribute(&number_of_sm, cudaDevAttrMultiProcessorCount, 221 | device_id); 222 | // std::cout << "num of sm " << number_of_sm << std::endl; 223 | // std::cout << "using " << EMPTY_HASH_ENTRY << " as empty hash entry" 224 | // << std::endl; 225 | int block_size, grid_size; 226 | block_size = 512; 227 | grid_size = 32 * number_of_sm; 228 | std::locale loc(""); 229 | if (strcmp(argv[2], "1") == 0) 230 | analysis_bench(argv[1], block_size, grid_size, true); 231 | else 232 | analysis_bench(argv[1], block_size, grid_size, false); 233 | 234 | return 0; 235 | } 236 | --------------------------------------------------------------------------------