├── .gitattributes ├── .github └── workflows │ └── cmake.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── CPM.cmake └── modules │ └── clang-format.cmake ├── src ├── .clang-format ├── benchmarking │ ├── benchmark-partitioning.sh │ └── benchmark-strong-scaling.sh ├── main.cpp ├── pcsr │ ├── PCSR.cpp │ └── PCSR.h ├── pppcsr │ ├── PPPCSR.cpp │ └── PPPCSR.h ├── thread_pool │ ├── thread_pool.cpp │ └── thread_pool.h ├── thread_pool_pppcsr │ ├── thread_pool_pppcsr.cpp │ └── thread_pool_pppcsr.h └── utility │ ├── bfs.h │ ├── fastLock.h │ ├── hybridLock.h │ ├── pagerank.h │ └── task.h └── test ├── DataStructureTest.cpp ├── DataStructureTest.h ├── SchedulerTest.cpp ├── SchedulerTest.h └── tests_main.cpp /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/domargan/parallel-packed-csr/c213b281db22e016452766a23db36a09aa327ab2/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: [push] 4 | 5 | env: 6 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 7 | BUILD_TYPE: Release 8 | 9 | jobs: 10 | build: 11 | # The CMake configure and build commands are platform agnostic and should work equally 12 | # well on Windows or Mac. You can convert this to a matrix build if you need 13 | # cross-platform coverage. 14 | # See: https://docs.github.com/en/actions/configuring-and-managing-workflows/configuring-a-workflow#configuring-a-build-matrix 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - name: Install libnuma-dev and Valgrind 21 | run: sudo apt-get install -y libnuma-dev valgrind 22 | 23 | - name: Create Build Environment 24 | # Some projects don't allow in-source building, so create a separate build directory 25 | # We'll use this as our working directory for all subsequent commands 26 | run: cmake -E make_directory ${{runner.workspace}}/build 27 | 28 | - name: Configure CMake 29 | # Use a bash shell so we can use the same syntax for environment variable 30 | # access regardless of the host operating system 31 | shell: bash 32 | working-directory: ${{runner.workspace}}/build 33 | # Note the current convention is to use the -S and -B options here to specify source 34 | # and build directories, but this is only available with CMake 3.13 and higher. 35 | # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 36 | run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE 37 | 38 | - name: Build 39 | working-directory: ${{runner.workspace}}/build 40 | shell: bash 41 | # Execute the build. You can specify a specific target with "--target " 42 | run: cmake --build . --config $BUILD_TYPE 43 | 44 | - name: Test 45 | working-directory: ${{runner.workspace}}/build 46 | shell: bash 47 | # Execute tests defined by the CMake configuration. 48 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 49 | run: TSAN_OPTIONS=detect_deadlocks=0 ctest -C $BUILD_TYPE --timeout 120 50 | 51 | - name: Test MemCheck 52 | working-directory: ${{runner.workspace}}/build 53 | shell: bash 54 | # Execute tests defined by the CMake configuration. 55 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 56 | run: TSAN_OPTIONS=detect_deadlocks=0 ctest -C $BUILD_TYPE -T memcheck -E "-tsan" --timeout 180 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | .idea 3 | cmake-build* 4 | .directory 5 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8 FATAL_ERROR) 2 | set(CMAKE_VERBOSE_MAKEFILE ON) 3 | 4 | project(parallel-packed-csr VERSION 0.1 LANGUAGES CXX) 5 | 6 | if (NOT CMAKE_BUILD_TYPE) 7 | set(CMAKE_BUILD_TYPE Release) 8 | endif () 9 | 10 | set(CMAKE_CXX_STANDARD 14) 11 | set(CMAKE_CXX_FLAGS "-std=c++14 -ansi -pedantic-errors -Wall -Wextra -pthread") 12 | set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") 13 | set(CMAKE_CXX_FLAGS_RELEASE "-O3") 14 | 15 | set(PROJECT_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src) 16 | 17 | file(GLOB_RECURSE parallel-packed-csr_SOURCES "${PROJECT_SOURCE_DIR}/*.cpp") 18 | file(GLOB_RECURSE parallel-packed-csr_HEADERS "${PROJECT_SOURCE_DIR}/*.h") 19 | 20 | set(PROJECT_TEST_DIR ${CMAKE_SOURCE_DIR}/test) 21 | 22 | file(GLOB_RECURSE parallel-packed-csr_TEST_SOURCES "${PROJECT_TEST_DIR}/*.cpp") 23 | file(GLOB_RECURSE parallel-packed-csr_TEST_HEADERS "${PROJECT_TEST_DIR}/*.h") 24 | 25 | include(${CMAKE_SOURCE_DIR}/cmake/modules/clang-format.cmake) 26 | 27 | set(parallel-packed-csr_INCLUDE_DIRS "") 28 | foreach (_headerFile ${parallel-packed-csr_HEADERS}) 29 | get_filename_component(_dir ${_headerFile} PATH) 30 | list(APPEND parallel-packed-csr_INCLUDE_DIRS ${_dir}) 31 | endforeach () 32 | list(REMOVE_DUPLICATES parallel-packed-csr_INCLUDE_DIRS) 33 | 34 | add_executable(parallel-packed-csr ${parallel-packed-csr_SOURCES}) 35 | target_include_directories(parallel-packed-csr PRIVATE ${parallel-packed-csr_INCLUDE_DIRS}) 36 | 37 | set(THREADS_PREFER_PTHREAD_FLAG ON) 38 | find_package(Threads REQUIRED) 39 | target_link_libraries(parallel-packed-csr PRIVATE Threads::Threads numa) 40 | 41 | list(REMOVE_ITEM parallel-packed-csr_SOURCES ${PROJECT_SOURCE_DIR}/main.cpp) 42 | add_executable(tests ${parallel-packed-csr_SOURCES} ${parallel-packed-csr_TEST_SOURCES}) 43 | add_executable(tests-tsan ${parallel-packed-csr_SOURCES} ${parallel-packed-csr_TEST_SOURCES}) 44 | add_executable(tests-ubsan ${parallel-packed-csr_SOURCES} ${parallel-packed-csr_TEST_SOURCES}) 45 | 46 | find_package(GTest) 47 | find_package(OpenMP REQUIRED) 48 | if (GTEST_FOUND) 49 | include_directories(${GTEST_INCLUDE_DIRS}) 50 | target_link_libraries(tests ${GTEST_LIBRARIES} pthread numa OpenMP::OpenMP_CXX) 51 | target_link_libraries(tests-tsan ${GTEST_LIBRARIES} pthread numa OpenMP::OpenMP_CXX) 52 | target_link_libraries(tests-ubsan ${GTEST_LIBRARIES} pthread numa OpenMP::OpenMP_CXX) 53 | else() 54 | include(${CMAKE_SOURCE_DIR}/cmake/CPM.cmake) 55 | CPMAddPackage( 56 | NAME googletest 57 | GITHUB_REPOSITORY google/googletest 58 | GIT_TAG release-1.8.1 59 | VERSION 1.8.1 60 | OPTIONS 61 | "INSTALL_GTEST OFF" 62 | "gtest_force_shared_crt ON" 63 | ) 64 | target_link_libraries(tests gtest gtest_main gmock pthread numa OpenMP::OpenMP_CXX) 65 | target_link_libraries(tests-tsan gtest gtest_main gmock pthread numa OpenMP::OpenMP_CXX) 66 | target_link_libraries(tests-ubsan gtest gtest_main gmock pthread numa OpenMP::OpenMP_CXX) 67 | endif() 68 | 69 | target_include_directories(tests PRIVATE ${parallel-packed-csr_INCLUDE_DIRS} ${PROJECT_TEST_DIR}) 70 | target_include_directories(tests-tsan PRIVATE ${parallel-packed-csr_INCLUDE_DIRS} ${PROJECT_TEST_DIR}) 71 | target_include_directories(tests-ubsan PRIVATE ${parallel-packed-csr_INCLUDE_DIRS} ${PROJECT_TEST_DIR}) 72 | 73 | target_compile_options(tests-tsan PRIVATE -fsanitize=thread -g -O1) 74 | target_link_options(tests-tsan PRIVATE -fsanitize=thread -g -O1) 75 | 76 | target_compile_options(tests-ubsan PRIVATE -fsanitize=undefined -g -O1) 77 | target_link_options(tests-ubsan PRIVATE -fsanitize=undefined -g -O1) 78 | 79 | include (CTest) 80 | gtest_discover_tests(tests) 81 | gtest_discover_tests(tests-tsan TEST_SUFFIX -tsan) 82 | gtest_discover_tests(tests-ubsan TEST_SUFFIX -ubsan) 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 eleni 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parallel Packed CSR 2 | A parallel implementation of the packed CSR data structure, based on its initial single threaded design[[1]](#1)(https://github.com/wheatman/Packed-Compressed-Sparse-Row/) and further parallel extension [[2]](#2). 3 | Accepts a core graph, which it loads first and an update file, which it uses to update the core graph (insert or delete edges). 4 | Performs edge insertions and deletions in parallel and uses NUMA-aware placement of data and worker threads. 5 | 6 | # Prerequisites 7 | * CMAKE 3.8 or newer required. 8 | 9 | # Build 10 | Create a build directory and run cmake & make there: 11 | ``` 12 | $ mkdir build && cd build 13 | $ cmake .. 14 | $ make 15 | ``` 16 | # Running 17 | Run the `parallel-packed-csr` binary from your build directory. 18 | 19 | ## Command line options 20 | * `-threads=`: specifies number of threads to use for updates, default=8 21 | * `-size=`: specifies number of edges that will be read from the update file, default=1000000 22 | * `-lock_free`: runs the data structure lock-free version of binary search, locks during binary search by default 23 | * `-partitions_per_domain=`: specifies the number of graph partitions per NUMA domain 24 | * `-insert`: inserts the edges from the update file to the core graph 25 | * `-delete`: deletes the edges from the update file from the core graph 26 | * `-core_graph=`: specifies the filename of the core graph 27 | * `-update_file=`: specifies the filename of the update file 28 | * Available partitioning strategies (if multiple strategies are given, the last one is used): 29 | * `-ppcsr`: No partitioning 30 | * `-pppcsr`: Partitioning (1 partition per NUMA domain) 31 | * `-pppcsrnuma`: Partitioning with explicit NUMA optimizations (default) 32 | 33 | # Authors 34 | * Eleni Alevra 35 | * Christian Menges 36 | * Dom Margan 37 | 38 | # References 39 | [1] 40 | Wheatman, B., & Xu, H. (2018). 41 | Packed Compressed Sparse Row: A Dynamic Graph 42 | Representation. 43 | 2018 IEEE High Performance Extreme Computing Conference, HPEC 2018. 44 | 45 | [2] 46 | Alevra, E., & Pietzuch, P. (2020). 47 | A Parallel Data Structure for Streaming Graphs. 48 | Master’s thesis, Imperial College London, 2020. 49 | -------------------------------------------------------------------------------- /cmake/CPM.cmake: -------------------------------------------------------------------------------- 1 | # CPM.cmake - CMake's missing package manager 2 | # =========================================== 3 | # See https://github.com/TheLartians/CPM.cmake for usage and update instructions. 4 | # 5 | # MIT License 6 | # ----------- 7 | #[[ 8 | Copyright (c) 2019 Lars Melchior 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in all 18 | copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | SOFTWARE. 27 | ]] 28 | 29 | cmake_minimum_required(VERSION 3.14 FATAL_ERROR) 30 | 31 | set(CURRENT_CPM_VERSION 0.27.3) 32 | 33 | if(CPM_DIRECTORY) 34 | if(NOT CPM_DIRECTORY STREQUAL CMAKE_CURRENT_LIST_DIR) 35 | if (CPM_VERSION VERSION_LESS CURRENT_CPM_VERSION) 36 | message(AUTHOR_WARNING "${CPM_INDENT} \ 37 | A dependency is using a more recent CPM version (${CURRENT_CPM_VERSION}) than the current project (${CPM_VERSION}). \ 38 | It is recommended to upgrade CPM to the most recent version. \ 39 | See https://github.com/TheLartians/CPM.cmake for more information." 40 | ) 41 | endif() 42 | return() 43 | endif() 44 | 45 | get_property(CPM_INITIALIZED GLOBAL "" PROPERTY CPM_INITIALIZED SET) 46 | if (CPM_INITIALIZED) 47 | return() 48 | endif() 49 | endif() 50 | 51 | set_property(GLOBAL PROPERTY CPM_INITIALIZED true) 52 | 53 | option(CPM_USE_LOCAL_PACKAGES "Always try to use `find_package` to get dependencies" $ENV{CPM_USE_LOCAL_PACKAGES}) 54 | option(CPM_LOCAL_PACKAGES_ONLY "Only use `find_package` to get dependencies" $ENV{CPM_LOCAL_PACKAGES_ONLY}) 55 | option(CPM_DOWNLOAD_ALL "Always download dependencies from source" $ENV{CPM_DOWNLOAD_ALL}) 56 | option(CPM_DONT_UPDATE_MODULE_PATH "Don't update the module path to allow using find_package" $ENV{CPM_DONT_UPDATE_MODULE_PATH}) 57 | option(CPM_DONT_CREATE_PACKAGE_LOCK "Don't create a package lock file in the binary path" $ENV{CPM_DONT_CREATE_PACKAGE_LOCK}) 58 | option(CPM_INCLUDE_ALL_IN_PACKAGE_LOCK "Add all packages added through CPM.cmake to the package lock" $ENV{CPM_INCLUDE_ALL_IN_PACKAGE_LOCK}) 59 | 60 | set(CPM_VERSION ${CURRENT_CPM_VERSION} CACHE INTERNAL "") 61 | set(CPM_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} CACHE INTERNAL "") 62 | set(CPM_FILE ${CMAKE_CURRENT_LIST_FILE} CACHE INTERNAL "") 63 | set(CPM_PACKAGES "" CACHE INTERNAL "") 64 | set(CPM_DRY_RUN OFF CACHE INTERNAL "Don't download or configure dependencies (for testing)") 65 | 66 | if(DEFINED ENV{CPM_SOURCE_CACHE}) 67 | set(CPM_SOURCE_CACHE_DEFAULT $ENV{CPM_SOURCE_CACHE}) 68 | else() 69 | set(CPM_SOURCE_CACHE_DEFAULT OFF) 70 | endif() 71 | 72 | set(CPM_SOURCE_CACHE ${CPM_SOURCE_CACHE_DEFAULT} CACHE PATH "Directory to downlaod CPM dependencies") 73 | 74 | if (NOT CPM_DONT_UPDATE_MODULE_PATH) 75 | set(CPM_MODULE_PATH "${CMAKE_BINARY_DIR}/CPM_modules" CACHE INTERNAL "") 76 | # remove old modules 77 | FILE(REMOVE_RECURSE ${CPM_MODULE_PATH}) 78 | file(MAKE_DIRECTORY ${CPM_MODULE_PATH}) 79 | # locally added CPM modules should override global packages 80 | set(CMAKE_MODULE_PATH "${CPM_MODULE_PATH};${CMAKE_MODULE_PATH}") 81 | endif() 82 | 83 | if (NOT CPM_DONT_CREATE_PACKAGE_LOCK) 84 | set(CPM_PACKAGE_LOCK_FILE "${CMAKE_BINARY_DIR}/cpm-package-lock.cmake" CACHE INTERNAL "") 85 | file(WRITE ${CPM_PACKAGE_LOCK_FILE} "# CPM Package Lock\n# This file should be committed to version control\n\n") 86 | endif() 87 | 88 | include(FetchContent) 89 | include(CMakeParseArguments) 90 | 91 | # Initialize logging prefix 92 | if(NOT CPM_INDENT) 93 | set(CPM_INDENT "CPM:") 94 | endif() 95 | 96 | function(cpm_find_package NAME VERSION) 97 | string(REPLACE " " ";" EXTRA_ARGS "${ARGN}") 98 | find_package(${NAME} ${VERSION} ${EXTRA_ARGS} QUIET) 99 | if(${CPM_ARGS_NAME}_FOUND) 100 | message(STATUS "${CPM_INDENT} using local package ${CPM_ARGS_NAME}@${VERSION}") 101 | CPMRegisterPackage(${CPM_ARGS_NAME} "${VERSION}") 102 | set(CPM_PACKAGE_FOUND YES PARENT_SCOPE) 103 | else() 104 | set(CPM_PACKAGE_FOUND NO PARENT_SCOPE) 105 | endif() 106 | endfunction() 107 | 108 | # Create a custom FindXXX.cmake module for a CPM package 109 | # This prevents `find_package(NAME)` from finding the system library 110 | function(CPMCreateModuleFile Name) 111 | if (NOT CPM_DONT_UPDATE_MODULE_PATH) 112 | # erase any previous modules 113 | FILE(WRITE ${CPM_MODULE_PATH}/Find${Name}.cmake "include(${CPM_FILE})\n${ARGN}\nset(${Name}_FOUND TRUE)") 114 | endif() 115 | endfunction() 116 | 117 | # Find a package locally or fallback to CPMAddPackage 118 | function(CPMFindPackage) 119 | set(oneValueArgs 120 | NAME 121 | VERSION 122 | FIND_PACKAGE_ARGUMENTS 123 | ) 124 | 125 | cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "" ${ARGN}) 126 | 127 | if (NOT DEFINED CPM_ARGS_VERSION) 128 | if (DEFINED CPM_ARGS_GIT_TAG) 129 | cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION) 130 | endif() 131 | endif() 132 | 133 | if (CPM_DOWNLOAD_ALL) 134 | CPMAddPackage(${ARGN}) 135 | cpm_export_variables(${CPM_ARGS_NAME}) 136 | return() 137 | endif() 138 | 139 | CPMCheckIfPackageAlreadyAdded(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" "${CPM_ARGS_OPTIONS}") 140 | if (CPM_PACKAGE_ALREADY_ADDED) 141 | cpm_export_variables(${CPM_ARGS_NAME}) 142 | return() 143 | endif() 144 | 145 | cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS}) 146 | 147 | if(NOT CPM_PACKAGE_FOUND) 148 | CPMAddPackage(${ARGN}) 149 | cpm_export_variables(${CPM_ARGS_NAME}) 150 | endif() 151 | 152 | endfunction() 153 | 154 | # checks if a package has been added before 155 | function(CPMCheckIfPackageAlreadyAdded CPM_ARGS_NAME CPM_ARGS_VERSION CPM_ARGS_OPTIONS) 156 | if ("${CPM_ARGS_NAME}" IN_LIST CPM_PACKAGES) 157 | CPMGetPackageVersion(${CPM_ARGS_NAME} CPM_PACKAGE_VERSION) 158 | if("${CPM_PACKAGE_VERSION}" VERSION_LESS "${CPM_ARGS_VERSION}") 159 | message(WARNING "${CPM_INDENT} requires a newer version of ${CPM_ARGS_NAME} (${CPM_ARGS_VERSION}) than currently included (${CPM_PACKAGE_VERSION}).") 160 | endif() 161 | if (CPM_ARGS_OPTIONS) 162 | foreach(OPTION ${CPM_ARGS_OPTIONS}) 163 | cpm_parse_option(${OPTION}) 164 | if(NOT "${${OPTION_KEY}}" STREQUAL "${OPTION_VALUE}") 165 | message(WARNING "${CPM_INDENT} ignoring package option for ${CPM_ARGS_NAME}: ${OPTION_KEY} = ${OPTION_VALUE} (${${OPTION_KEY}})") 166 | endif() 167 | endforeach() 168 | endif() 169 | cpm_get_fetch_properties(${CPM_ARGS_NAME}) 170 | SET(${CPM_ARGS_NAME}_ADDED NO) 171 | SET(CPM_PACKAGE_ALREADY_ADDED YES PARENT_SCOPE) 172 | cpm_export_variables(${CPM_ARGS_NAME}) 173 | else() 174 | SET(CPM_PACKAGE_ALREADY_ADDED NO PARENT_SCOPE) 175 | endif() 176 | endfunction() 177 | 178 | # Download and add a package from source 179 | function(CPMAddPackage) 180 | 181 | set(oneValueArgs 182 | NAME 183 | FORCE 184 | VERSION 185 | GIT_TAG 186 | DOWNLOAD_ONLY 187 | GITHUB_REPOSITORY 188 | GITLAB_REPOSITORY 189 | GIT_REPOSITORY 190 | SOURCE_DIR 191 | DOWNLOAD_COMMAND 192 | FIND_PACKAGE_ARGUMENTS 193 | NO_CACHE 194 | GIT_SHALLOW 195 | ) 196 | 197 | set(multiValueArgs 198 | OPTIONS 199 | ) 200 | 201 | cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") 202 | 203 | # Set default values for arguments 204 | 205 | if (NOT DEFINED CPM_ARGS_VERSION) 206 | if (DEFINED CPM_ARGS_GIT_TAG) 207 | cpm_get_version_from_git_tag("${CPM_ARGS_GIT_TAG}" CPM_ARGS_VERSION) 208 | endif() 209 | endif() 210 | 211 | if(CPM_ARGS_DOWNLOAD_ONLY) 212 | set(DOWNLOAD_ONLY ${CPM_ARGS_DOWNLOAD_ONLY}) 213 | else() 214 | set(DOWNLOAD_ONLY NO) 215 | endif() 216 | 217 | if (DEFINED CPM_ARGS_GITHUB_REPOSITORY) 218 | set(CPM_ARGS_GIT_REPOSITORY "https://github.com/${CPM_ARGS_GITHUB_REPOSITORY}.git") 219 | endif() 220 | 221 | if (DEFINED CPM_ARGS_GITLAB_REPOSITORY) 222 | set(CPM_ARGS_GIT_REPOSITORY "https://gitlab.com/${CPM_ARGS_GITLAB_REPOSITORY}.git") 223 | endif() 224 | 225 | if (DEFINED CPM_ARGS_GIT_REPOSITORY) 226 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_REPOSITORY ${CPM_ARGS_GIT_REPOSITORY}) 227 | if (NOT DEFINED CPM_ARGS_GIT_TAG) 228 | set(CPM_ARGS_GIT_TAG v${CPM_ARGS_VERSION}) 229 | endif() 230 | endif() 231 | 232 | if (DEFINED CPM_ARGS_GIT_TAG) 233 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_TAG ${CPM_ARGS_GIT_TAG}) 234 | # If GIT_SHALLOW is explicitly specified, honor the value. 235 | if (DEFINED CPM_ARGS_GIT_SHALLOW) 236 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW ${CPM_ARGS_GIT_SHALLOW}) 237 | endif() 238 | endif() 239 | 240 | # Check if package has been added before 241 | CPMCheckIfPackageAlreadyAdded(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" "${CPM_ARGS_OPTIONS}") 242 | if (CPM_PACKAGE_ALREADY_ADDED) 243 | cpm_export_variables(${CPM_ARGS_NAME}) 244 | return() 245 | endif() 246 | 247 | # Check for manual overrides 248 | if (NOT CPM_ARGS_FORCE AND NOT "${CPM_${CPM_ARGS_NAME}_SOURCE}" STREQUAL "") 249 | set(PACKAGE_SOURCE ${CPM_${CPM_ARGS_NAME}_SOURCE}) 250 | set(CPM_${CPM_ARGS_NAME}_SOURCE "") 251 | CPMAddPackage( 252 | NAME ${CPM_ARGS_NAME} 253 | SOURCE_DIR ${PACKAGE_SOURCE} 254 | FORCE True 255 | ) 256 | cpm_export_variables(${CPM_ARGS_NAME}) 257 | return() 258 | endif() 259 | 260 | # Check for available declaration 261 | if (NOT CPM_ARGS_FORCE AND NOT "${CPM_DECLARATION_${CPM_ARGS_NAME}}" STREQUAL "") 262 | set(declaration ${CPM_DECLARATION_${CPM_ARGS_NAME}}) 263 | set(CPM_DECLARATION_${CPM_ARGS_NAME} "") 264 | CPMAddPackage(${declaration}) 265 | cpm_export_variables(${CPM_ARGS_NAME}) 266 | # checking again to ensure version and option compatibility 267 | CPMCheckIfPackageAlreadyAdded(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" "${CPM_ARGS_OPTIONS}") 268 | return() 269 | endif() 270 | 271 | if(CPM_USE_LOCAL_PACKAGES OR CPM_LOCAL_PACKAGES_ONLY) 272 | cpm_find_package(${CPM_ARGS_NAME} "${CPM_ARGS_VERSION}" ${CPM_ARGS_FIND_PACKAGE_ARGUMENTS}) 273 | 274 | if(CPM_PACKAGE_FOUND) 275 | cpm_export_variables(${CPM_ARGS_NAME}) 276 | return() 277 | endif() 278 | 279 | if(CPM_LOCAL_PACKAGES_ONLY) 280 | message(SEND_ERROR "CPM: ${CPM_ARGS_NAME} not found via find_package(${CPM_ARGS_NAME} ${CPM_ARGS_VERSION})") 281 | endif() 282 | endif() 283 | 284 | CPMRegisterPackage("${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}") 285 | 286 | if (CPM_ARGS_OPTIONS) 287 | foreach(OPTION ${CPM_ARGS_OPTIONS}) 288 | cpm_parse_option(${OPTION}) 289 | set(${OPTION_KEY} ${OPTION_VALUE} CACHE INTERNAL "") 290 | endforeach() 291 | endif() 292 | 293 | if (DEFINED CPM_ARGS_GIT_TAG) 294 | set(PACKAGE_INFO "${CPM_ARGS_GIT_TAG}") 295 | elseif (DEFINED CPM_ARGS_SOURCE_DIR) 296 | set(PACKAGE_INFO "${CPM_ARGS_SOURCE_DIR}") 297 | else() 298 | set(PACKAGE_INFO "${CPM_ARGS_VERSION}") 299 | endif() 300 | 301 | if (DEFINED CPM_ARGS_DOWNLOAD_COMMAND) 302 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND ${CPM_ARGS_DOWNLOAD_COMMAND}) 303 | elseif (DEFINED CPM_ARGS_SOURCE_DIR) 304 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${CPM_ARGS_SOURCE_DIR}) 305 | elseif (CPM_SOURCE_CACHE AND NOT CPM_ARGS_NO_CACHE) 306 | string(TOLOWER ${CPM_ARGS_NAME} lower_case_name) 307 | set(origin_parameters ${CPM_ARGS_UNPARSED_ARGUMENTS}) 308 | list(SORT origin_parameters) 309 | string(SHA1 origin_hash "${origin_parameters}") 310 | set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}) 311 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS SOURCE_DIR ${download_directory}) 312 | if (EXISTS ${download_directory}) 313 | # disable the download command to allow offline builds 314 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS DOWNLOAD_COMMAND "${CMAKE_COMMAND}") 315 | set(PACKAGE_INFO "${download_directory}") 316 | else() 317 | # Enable shallow clone when GIT_TAG is not a commit hash. 318 | # Our guess may not be accurate, but it should guarantee no commit hash get mis-detected. 319 | if (NOT DEFINED CPM_ARGS_GIT_SHALLOW) 320 | cpm_is_git_tag_commit_hash("${CPM_ARGS_GIT_TAG}" IS_HASH) 321 | if (NOT ${IS_HASH}) 322 | list(APPEND CPM_ARGS_UNPARSED_ARGUMENTS GIT_SHALLOW TRUE) 323 | endif() 324 | endif() 325 | 326 | # remove timestamps so CMake will re-download the dependency 327 | file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/_deps/${lower_case_name}-subbuild) 328 | set(PACKAGE_INFO "${PACKAGE_INFO} -> ${download_directory}") 329 | endif() 330 | endif() 331 | 332 | CPMCreateModuleFile(${CPM_ARGS_NAME} "CPMAddPackage(${ARGN})") 333 | 334 | if (CPM_PACKAGE_LOCK_ENABLED) 335 | if ((CPM_ARGS_VERSION AND NOT CPM_ARGS_SOURCE_DIR) OR CPM_INCLUDE_ALL_IN_PACKAGE_LOCK) 336 | cpm_add_to_package_lock(${CPM_ARGS_NAME} "${ARGN}") 337 | elseif(CPM_ARGS_SOURCE_DIR) 338 | cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "local directory") 339 | else() 340 | cpm_add_comment_to_package_lock(${CPM_ARGS_NAME} "${ARGN}") 341 | endif() 342 | endif() 343 | 344 | cpm_declare_fetch("${CPM_ARGS_NAME}" "${CPM_ARGS_VERSION}" "${PACKAGE_INFO}" "${CPM_ARGS_UNPARSED_ARGUMENTS}") 345 | cpm_fetch_package("${CPM_ARGS_NAME}" "${DOWNLOAD_ONLY}") 346 | cpm_get_fetch_properties("${CPM_ARGS_NAME}") 347 | 348 | SET(${CPM_ARGS_NAME}_ADDED YES) 349 | cpm_export_variables("${CPM_ARGS_NAME}") 350 | endfunction() 351 | 352 | # Fetch a previously declared package 353 | macro(CPMGetPackage Name) 354 | if (DEFINED "CPM_DECLARATION_${Name}") 355 | CPMAddPackage( 356 | NAME ${Name} 357 | ) 358 | else() 359 | message(SEND_ERROR "Cannot retrieve package ${Name}: no declaration available") 360 | endif() 361 | endmacro() 362 | 363 | # export variables available to the caller to the parent scope 364 | # expects ${CPM_ARGS_NAME} to be set 365 | macro(cpm_export_variables name) 366 | SET(${name}_SOURCE_DIR "${${name}_SOURCE_DIR}" PARENT_SCOPE) 367 | SET(${name}_BINARY_DIR "${${name}_BINARY_DIR}" PARENT_SCOPE) 368 | SET(${name}_ADDED "${${name}_ADDED}" PARENT_SCOPE) 369 | endmacro() 370 | 371 | # declares a package, so that any call to CPMAddPackage for the 372 | # package name will use these arguments instead. 373 | # Previous declarations will not be overriden. 374 | macro(CPMDeclarePackage Name) 375 | if (NOT DEFINED "CPM_DECLARATION_${Name}") 376 | set("CPM_DECLARATION_${Name}" "${ARGN}") 377 | endif() 378 | endmacro() 379 | 380 | function(cpm_add_to_package_lock Name) 381 | if (NOT CPM_DONT_CREATE_PACKAGE_LOCK) 382 | file(APPEND ${CPM_PACKAGE_LOCK_FILE} "# ${Name}\nCPMDeclarePackage(${Name} \"${ARGN}\")\n") 383 | endif() 384 | endfunction() 385 | 386 | function(cpm_add_comment_to_package_lock Name) 387 | if (NOT CPM_DONT_CREATE_PACKAGE_LOCK) 388 | file(APPEND ${CPM_PACKAGE_LOCK_FILE} "# ${Name} (unversioned)\n# CPMDeclarePackage(${Name} \"${ARGN}\")\n") 389 | endif() 390 | endfunction() 391 | 392 | # includes the package lock file if it exists and creates a target 393 | # `cpm-write-package-lock` to update it 394 | macro(CPMUsePackageLock file) 395 | if (NOT CPM_DONT_CREATE_PACKAGE_LOCK) 396 | get_filename_component(CPM_ABSOLUTE_PACKAGE_LOCK_PATH ${file} ABSOLUTE) 397 | if(EXISTS ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}) 398 | include(${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}) 399 | endif() 400 | if (NOT TARGET cpm-update-package-lock) 401 | add_custom_target(cpm-update-package-lock COMMAND ${CMAKE_COMMAND} -E copy ${CPM_PACKAGE_LOCK_FILE} ${CPM_ABSOLUTE_PACKAGE_LOCK_PATH}) 402 | endif() 403 | set(CPM_PACKAGE_LOCK_ENABLED true) 404 | endif() 405 | endmacro() 406 | 407 | # registers a package that has been added to CPM 408 | function(CPMRegisterPackage PACKAGE VERSION) 409 | list(APPEND CPM_PACKAGES ${PACKAGE}) 410 | set(CPM_PACKAGES ${CPM_PACKAGES} CACHE INTERNAL "") 411 | set("CPM_PACKAGE_${PACKAGE}_VERSION" ${VERSION} CACHE INTERNAL "") 412 | endfunction() 413 | 414 | # retrieve the current version of the package to ${OUTPUT} 415 | function(CPMGetPackageVersion PACKAGE OUTPUT) 416 | set(${OUTPUT} "${CPM_PACKAGE_${PACKAGE}_VERSION}" PARENT_SCOPE) 417 | endfunction() 418 | 419 | # declares a package in FetchContent_Declare 420 | function (cpm_declare_fetch PACKAGE VERSION INFO) 421 | message(STATUS "${CPM_INDENT} adding package ${PACKAGE}@${VERSION} (${INFO})") 422 | 423 | if (${CPM_DRY_RUN}) 424 | message(STATUS "${CPM_INDENT} package not declared (dry run)") 425 | return() 426 | endif() 427 | 428 | FetchContent_Declare(${PACKAGE} 429 | ${ARGN} 430 | ) 431 | endfunction() 432 | 433 | # returns properties for a package previously defined by cpm_declare_fetch 434 | function (cpm_get_fetch_properties PACKAGE) 435 | if (${CPM_DRY_RUN}) 436 | return() 437 | endif() 438 | FetchContent_GetProperties(${PACKAGE}) 439 | string(TOLOWER ${PACKAGE} lpackage) 440 | SET(${PACKAGE}_SOURCE_DIR "${${lpackage}_SOURCE_DIR}" PARENT_SCOPE) 441 | SET(${PACKAGE}_BINARY_DIR "${${lpackage}_BINARY_DIR}" PARENT_SCOPE) 442 | endfunction() 443 | 444 | # downloads a previously declared package via FetchContent 445 | function (cpm_fetch_package PACKAGE DOWNLOAD_ONLY) 446 | if (${CPM_DRY_RUN}) 447 | message(STATUS "${CPM_INDENT} package ${PACKAGE} not fetched (dry run)") 448 | return() 449 | endif() 450 | 451 | if(DOWNLOAD_ONLY) 452 | FetchContent_GetProperties(${PACKAGE}) 453 | if(NOT ${PACKAGE}_POPULATED) 454 | FetchContent_Populate(${PACKAGE}) 455 | endif() 456 | else() 457 | set(CPM_OLD_INDENT "${CPM_INDENT}") 458 | set(CPM_INDENT "${CPM_INDENT} ${PACKAGE}:") 459 | FetchContent_MakeAvailable(${PACKAGE}) 460 | set(CPM_INDENT "${CPM_OLD_INDENT}") 461 | endif() 462 | endfunction() 463 | 464 | # splits a package option 465 | function(cpm_parse_option OPTION) 466 | string(REGEX MATCH "^[^ ]+" OPTION_KEY ${OPTION}) 467 | string(LENGTH ${OPTION} OPTION_LENGTH) 468 | string(LENGTH ${OPTION_KEY} OPTION_KEY_LENGTH) 469 | if (OPTION_KEY_LENGTH STREQUAL OPTION_LENGTH) 470 | # no value for key provided, assume user wants to set option to "ON" 471 | set(OPTION_VALUE "ON") 472 | else() 473 | math(EXPR OPTION_KEY_LENGTH "${OPTION_KEY_LENGTH}+1") 474 | string(SUBSTRING ${OPTION} "${OPTION_KEY_LENGTH}" "-1" OPTION_VALUE) 475 | endif() 476 | set(OPTION_KEY "${OPTION_KEY}" PARENT_SCOPE) 477 | set(OPTION_VALUE "${OPTION_VALUE}" PARENT_SCOPE) 478 | endfunction() 479 | 480 | # guesses the package version from a git tag 481 | function(cpm_get_version_from_git_tag GIT_TAG RESULT) 482 | string(LENGTH ${GIT_TAG} length) 483 | if (length EQUAL 40) 484 | # GIT_TAG is probably a git hash 485 | SET(${RESULT} 0 PARENT_SCOPE) 486 | else() 487 | string(REGEX MATCH "v?([0123456789.]*).*" _ ${GIT_TAG}) 488 | SET(${RESULT} ${CMAKE_MATCH_1} PARENT_SCOPE) 489 | endif() 490 | endfunction() 491 | 492 | # guesses if the git tag is a commit hash or an actual tag or a branch nane. 493 | function(cpm_is_git_tag_commit_hash GIT_TAG RESULT) 494 | string(LENGTH "${GIT_TAG}" length) 495 | # full hash has 40 characters, and short hash has at least 7 characters. 496 | if (length LESS 7 OR length GREATER 40) 497 | SET(${RESULT} 0 PARENT_SCOPE) 498 | else() 499 | if (${GIT_TAG} MATCHES "^[a-fA-F0-9]+$") 500 | SET(${RESULT} 1 PARENT_SCOPE) 501 | else() 502 | SET(${RESULT} 0 PARENT_SCOPE) 503 | endif() 504 | endif() 505 | endfunction() 506 | -------------------------------------------------------------------------------- /cmake/modules/clang-format.cmake: -------------------------------------------------------------------------------- 1 | find_program(CLANG_FORMAT NAMES clang-format) 2 | add_custom_target( 3 | clangformat 4 | COMMAND ${CLANG_FORMAT} -i ${parallel-packed-csr_HEADERS} ${parallel-packed-csr_SOURCES} 5 | ${parallel-packed-csr_TEST_HEADERS} ${parallel-packed-csr_TEST_SOURCES}) 6 | -------------------------------------------------------------------------------- /src/.clang-format: -------------------------------------------------------------------------------- 1 | # received from: https://github.com/AutoPas/AutoPas/blob/master/.clang-format 2 | 3 | # base is Google style guide 4 | BasedOnStyle: Google 5 | # we want more space per line 6 | ColumnLimit: 120 7 | # ensure consistent pointer alignment 8 | DerivePointerAlignment: false 9 | PointerAlignment: Right 10 | -------------------------------------------------------------------------------- /src/benchmarking/benchmark-partitioning.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Written by Christian Menges and Domagoj Margan, August 2020 3 | 4 | ###################################### 5 | 6 | # Pass the benchmark config file path as a first script argument 7 | BENCHMARK_CONFIG_FILE="$1" 8 | 9 | # The config file should contain and define the following variables: 10 | 11 | # Machine and dataset info for plotting: 12 | # MACHINE_NAME -> Name of the testbed machine 13 | # DATASET_NAME -> Dataset alias 14 | 15 | # Program and data input file paths: 16 | # PPCSR_EXEC -> program binary file 17 | # PPCSR_CORE_GRAPH_FILE -> core graph edgelist file 18 | # PPCSR_INSERTIONS_FILE -> insertions update file 19 | # PPCSR_DELETIONS_FILE -> deletions update file 20 | 21 | # Experiment parameters: 22 | # REPETITIONS -> number of times to repeat the benchmark; integer 23 | # CORES -> number of cores to utilise in the benchmark; integer 24 | # PARTITIONS_PER_DOMAIN -> number of partitions per NUMA domain; array of integers 25 | # SIZE -> number of edges that will be read from the update file; integer 26 | 27 | source $BENCHMARK_CONFIG_FILE 28 | if [ ! -f "$PPCSR_EXEC" ]; then 29 | echo -e "Executable not found.\n" 30 | exit 0 31 | fi 32 | 33 | if [ ! -f "$PPCSR_CORE_GRAPH_FILE" ]; then 34 | echo -e "Core graph not found.\n" 35 | exit 0 36 | fi 37 | 38 | if [ ! -f "$PPCSR_INSERTIONS_FILE" ] || 39 | [ ! -f "$PPCSR_DELETIONS_FILE" ]; then 40 | echo -e "Update files not found.\n" 41 | exit 0 42 | fi 43 | 44 | # Define output files 45 | TIME=$(date +%Y%m%d_%H%M%S) 46 | PPCSR_BASE_NAME="${MACHINE_NAME}_${TIME}_ppcsr_partitioning" 47 | PPCSR_BENCHMARK_OUTPUTS_DIR="${PPCSR_BASE_NAME}_bench_outputs" 48 | PPCSR_PROGRAM_OUTPUTS_DIR="${PPCSR_BENCHMARK_OUTPUTS_DIR}/program_outputs" 49 | PPCSR_BENCHMARK_LOG="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_script_log.txt" 50 | PPCSR_CSV_DATA="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_all_results.csv" 51 | PPCSR_PLOT_DATA="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_plot_data.dat" 52 | PPCSR_PDF_PLOT_FILE="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_plot" 53 | 54 | mkdir $PPCSR_BENCHMARK_OUTPUTS_DIR $PPCSR_PROGRAM_OUTPUTS_DIR 55 | 56 | # Write everyting to log file 57 | : > $PPCSR_BENCHMARK_LOG 58 | exec 2> >(tee -a $PPCSR_BENCHMARK_LOG >&2) > >(tee -a $PPCSR_BENCHMARK_LOG) 59 | 60 | ###################################### 61 | 62 | echo "######################################" 63 | echo "Starting benchmark: partitioning" 64 | 65 | echo "Testbed machine: $MACHINE_NAME" 66 | echo "Dataset: $DATASET_NAME" 67 | echo "Core graph file: $PPCSR_CORE_GRAPH_FILE" 68 | echo "Edge insertions file: $PPCSR_INSERTIONS_FILE" 69 | echo "Edge deletions file: $PPCSR_DELETIONS_FILE" 70 | echo "Repetitions: $REPETITIONS" 71 | echo "#cores: ${CORES}" 72 | echo "#partitions per NUMA domain: ${PARTITIONS_PER_DOMAIN[*]}" 73 | echo "Update batch size: $SIZE" 74 | echo -e "######################################\n" 75 | 76 | ###################################### 77 | 78 | echo -e "[START]\t Starting computations...\n" 79 | 80 | # Write headers to CSV log 81 | header="#PARTITIONS" 82 | function writeHeader() { 83 | for ((r = 0; r < REPETITIONS; r++)); do 84 | header="${header} $1${r}" 85 | done 86 | header="${header} $1_Avg $1_Stddev" 87 | } 88 | 89 | writeHeader "INS_PPPCSR" 90 | writeHeader "DEL_PPPCSR" 91 | writeHeader "INS_PPPCSR_NUMA" 92 | writeHeader "DEL_PPPCSR_NUMA" 93 | 94 | echo "$header" >>$PPCSR_CSV_DATA 95 | echo "partitions ins del ins-NUMA del-NUMA" >>$PPCSR_PLOT_DATA 96 | 97 | # Run the partitioning and write measures to the CSV log 98 | for p in ${PARTITIONS_PER_DOMAIN[@]}; do 99 | csv="" 100 | dat="" 101 | for v in -pppcsr -pppcsrnuma; do 102 | insert="" 103 | for ((r = 1; r <= REPETITIONS; r++)); do 104 | echo -e "[START]\t ${v:1} edge insertions: Executing repetition #$r on $CORES cores for $p partitions per NUMA domain..." 105 | output=$($PPCSR_EXEC -threads=$CORES $v -size=$SIZE -core_graph=$PPCSR_CORE_GRAPH_FILE -update_file=$PPCSR_INSERTIONS_FILE -partitions_per_domain=$p 2>&1 | tee "${PPCSR_PROGRAM_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_insertions_${v:1}_${CORES}cores_${p}par_${r}.txt" | sed '/Elapsed/!d' | sed -n '0~2p' | sed 's/Elapsed wall clock time: //g') 106 | echo -e "[END] \t ${v:1} edge insertions: Finished repetition #$r on $CORES cores for $p partitions per NUMA domain.\n" 107 | insert="${insert} ${output}" 108 | done 109 | 110 | if [ "$REPETITIONS" -gt 1 ]; then 111 | read avg_insert stddev_insert <<<$(echo "$insert" | awk '{ A=0; V=0; for(N=1; N<=NF; N++) A+=$N ; A/=NF ; for(N=1; N<=NF; N++) V+=(($N-A)*($N-A))/(NF-1); print A,sqrt(V) }') 112 | else 113 | avg_insert=$insert 114 | stddev_insert=0 115 | fi 116 | insert="${insert} ${avg_insert} ${stddev_insert}" 117 | 118 | delete="" 119 | for ((r = 1; r <= REPETITIONS; r++)); do 120 | echo -e "[START]\t ${v:1} edge deletions: Executing repetition #$r on $CORES cores for $p partitions per NUMA domain..." 121 | output=$($PPCSR_EXEC -delete -threads=$CORES $v -size=$SIZE -core_graph=$PPCSR_CORE_GRAPH_FILE -update_file=$PPCSR_DELETIONS_FILE -partitions_per_domain=$p 2>&1 | tee "${PPCSR_PROGRAM_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_deletions_${v:1}_${CORES}cores_${p}par_${r}.txt" | sed '/Elapsed/!d' | sed -n '0~2p' | sed 's/Elapsed wall clock time: //g') 122 | echo -e "[END] \t ${v:1} edge deletions: Finished repetition #$r on $CORES cores for $p partitions per NUMA domain.\n" 123 | delete="${delete} ${output}" 124 | done 125 | 126 | if [ "$REPETITIONS" -gt 1 ]; then 127 | read avg_delete stddev_delete <<<$(echo "$delete" | awk '{ A=0; V=0; for(N=1; N<=NF; N++) A+=$N ; A/=NF ; for(N=1; N<=NF; N++) V+=(($N-A)*($N-A))/(NF-1); print A,sqrt(V) }') 128 | else 129 | avg_delete=$delete 130 | stddev_delete=0 131 | fi 132 | delete="${delete} ${avg_delete} ${stddev_delete}" 133 | 134 | csv="${csv}${insert} ${delete}" 135 | dat="${dat}${avg_insert} ${avg_delete} " 136 | done 137 | 138 | echo "$csv" | sed -e "s/^/$p/" >>$PPCSR_CSV_DATA 139 | echo $p $dat >>$PPCSR_PLOT_DATA 140 | done 141 | 142 | echo -e "[END] \t Computations finished.\n" 143 | 144 | ###################################### 145 | 146 | # Create the plot 147 | 148 | echo -e "[START]\t Starting data plotting...\n" 149 | 150 | PPCSR_PLOT_FILE=$(mktemp gnuplot.pXXX) 151 | PPCSR_PLOT_DATA_TRANSP=$(mktemp gnuplot.datXXX) 152 | 153 | awk ' 154 | { 155 | for (i=1; i<=NF; i++) { 156 | a[NR,i] = $i 157 | } 158 | } 159 | NF>p { p = NF } 160 | END { 161 | for(j=1; j<=p; j++) { 162 | str=a[1,j] 163 | for(i=2; i<=NR; i++){ 164 | str=str" "a[i,j]; 165 | } 166 | print str 167 | } 168 | 169 | }' $PPCSR_PLOT_DATA >$PPCSR_PLOT_DATA_TRANSP 170 | 171 | XLABEL="#Partitions per NUMA domain" 172 | YLABEL="CPU Time (ms)" 173 | 174 | cat <$PPCSR_PLOT_FILE 175 | set term pdf font ", 12" 176 | set output "${PPCSR_PDF_PLOT_FILE}.pdf" 177 | 178 | set title font ", 10" 179 | set title "Machine: $MACHINE_NAME \t Threads: $CORES \t Dataset: $DATASET_NAME \t #Updates: $SIZE" 180 | set xlabel "${XLABEL}" 181 | set ylabel "${YLABEL}" offset 1.5 182 | set size ratio 0.5 183 | 184 | set key right top 185 | set key font ", 10" 186 | 187 | set style data histograms 188 | set style histogram cluster gap 1 189 | set style fill solid 0.3 190 | set boxwidth 0.9 191 | set auto x 192 | set xtic scale 0 193 | set yrange [0:] 194 | 195 | N = system("awk 'NR==1{print NF}' $PPCSR_PLOT_DATA_TRANSP") 196 | 197 | plot for [COL=2:N] "$PPCSR_PLOT_DATA_TRANSP" using COL:xtic(1) title columnheader 198 | EOF 199 | 200 | gnuplot $PPCSR_PLOT_FILE 201 | rm $PPCSR_PLOT_FILE 202 | rm $PPCSR_PLOT_DATA_TRANSP 203 | pdfcrop --margins "0 0 0 0" --clip ${PPCSR_PDF_PLOT_FILE}.pdf ${PPCSR_PDF_PLOT_FILE}.pdf &>/dev/null 204 | 205 | echo -e "[END] \t Plotting finished.\n" 206 | 207 | echo "Exiting benchmark." 208 | 209 | exit 0 210 | 211 | -------------------------------------------------------------------------------- /src/benchmarking/benchmark-strong-scaling.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Written by Christian Menges and Domagoj Margan, August 2020 3 | 4 | ###################################### 5 | 6 | # Pass the benchmark config file path as a first script argument 7 | BENCHMARK_CONFIG_FILE="$1" 8 | 9 | # The config file should contain and define the following variables: 10 | 11 | # Machine and dataset info for plotting: 12 | # MACHINE_NAME -> Name of the testbed machine 13 | # DATASET_NAME -> Dataset alias 14 | 15 | # Program and data input file paths: 16 | # PPCSR_EXEC -> program binary file 17 | # PPCSR_CORE_GRAPH_FILE -> core graph edgelist file 18 | # PPCSR_INSERTIONS_FILE -> insertions update file 19 | # PPCSR_DELETIONS_FILE -> deletions update file 20 | 21 | # Experiment parameters: 22 | # REPETITIONS -> number of times to repeat the benchmark; integer 23 | # CORES -> number of cores to utilise in the scaling benchmark; array of integers 24 | # NUMA_BOUNDS -> number of cores that mark end of NUMA domain boundaries; array of integers 25 | # PARTITIONS_PER_DOMAIN -> number of partitions per NUMA domain; array of integers 26 | # SIZE -> number of edges that will be read from the update file; integer 27 | 28 | source $BENCHMARK_CONFIG_FILE 29 | if [ ! -f "$PPCSR_EXEC" ]; then 30 | echo -e "Executable not found.\n" 31 | exit 0 32 | fi 33 | 34 | if [ ! -f "$PPCSR_CORE_GRAPH_FILE" ]; then 35 | echo -e "Core graph not found.\n" 36 | exit 0 37 | fi 38 | 39 | if [ ! -f "$PPCSR_INSERTIONS_FILE" ] || 40 | [ ! -f "$PPCSR_DELETIONS_FILE" ]; then 41 | echo -e "Update files not found.\n" 42 | exit 0 43 | fi 44 | 45 | # Define output files 46 | TIME=$(date +%Y%m%d_%H%M%S) 47 | PPCSR_BASE_NAME="${MACHINE_NAME}_${TIME}_ppcsr_scalability" 48 | PPCSR_BENCHMARK_OUTPUTS_DIR="${PPCSR_BASE_NAME}_bench_outputs" 49 | PPCSR_PROGRAM_OUTPUTS_DIR="${PPCSR_BENCHMARK_OUTPUTS_DIR}/program_outputs" 50 | PPCSR_BENCHMARK_LOG="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_script_log.txt" 51 | PPCSR_CSV_DATA="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_all_results.csv" 52 | PPCSR_PLOT_DATA="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_plot_data.dat" 53 | PPCSR_PDF_PLOT_FILE="${PPCSR_BENCHMARK_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_plot" 54 | 55 | mkdir $PPCSR_BENCHMARK_OUTPUTS_DIR $PPCSR_PROGRAM_OUTPUTS_DIR 56 | 57 | # Write everyting to log file 58 | : > $PPCSR_BENCHMARK_LOG 59 | exec 2> >(tee -a $PPCSR_BENCHMARK_LOG >&2) > >(tee -a $PPCSR_BENCHMARK_LOG) 60 | 61 | 62 | ###################################### 63 | 64 | echo "######################################" 65 | echo "Starting benchmark: strong scaling" 66 | 67 | echo "Testbed machine: $MACHINE_NAME" 68 | echo "Dataset: $DATASET_NAME" 69 | echo "Core graph file: $PPCSR_CORE_GRAPH_FILE" 70 | echo "Edge insertions file: $PPCSR_INSERTIONS_FILE" 71 | echo "Edge deletions file: $PPCSR_DELETIONS_FILE" 72 | echo "Repetitions: $REPETITIONS" 73 | echo "#cores: ${CORES[*]}" 74 | echo "NUMA domain boundaries: ${NUMA_BOUNDS[*]}" 75 | echo "#partitions per NUMA domain: ${PARTITIONS_PER_DOMAIN[*]}" 76 | echo "Update batch size: $SIZE" 77 | echo -e "######################################\n" 78 | 79 | ###################################### 80 | 81 | echo -e "[START]\t Starting computations...\n" 82 | 83 | # Write headers to CSV log 84 | header="#CORES" 85 | function writeHeader() { 86 | for ((r = 0; r < REPETITIONS; r++)); do 87 | header="${header} $1${r}" 88 | done 89 | header="${header} $1_Avg $1_Stddev" 90 | } 91 | 92 | writeHeader "INS_PPCSR" 93 | writeHeader "DEL_PPCSR" 94 | 95 | for p in ${PARTITIONS_PER_DOMAIN[@]}; do 96 | writeHeader "INS_PPPCSR_${p}PAR" 97 | writeHeader "DEL_PPPCSR_${p}PAR" 98 | writeHeader "INS_PPPCSR_NUMA_${p}PAR" 99 | writeHeader "DEL_PPPCSR_NUMA_${p}PAR" 100 | done 101 | 102 | echo "$header" >>$PPCSR_CSV_DATA 103 | 104 | # Run the scaling experiment and write measures to the CSV log 105 | for core in ${CORES[@]}; do 106 | csv="" 107 | dat="" 108 | for v in -ppcsr -pppcsr -pppcsrnuma; do 109 | for p in ${PARTITIONS_PER_DOMAIN[@]}; do 110 | if [ "$v" = "-ppcsr" ]; then 111 | p=1 112 | fi 113 | insert="" 114 | for ((r = 1; r <= REPETITIONS; r++)); do 115 | echo -e "[START]\t ${v:1} edge insertions: Executing repetition #$r on $core cores for $p partitions per NUMA domain......" 116 | output=$($PPCSR_EXEC -threads=$core $v -size=$SIZE -core_graph=$PPCSR_CORE_GRAPH_FILE -update_file=$PPCSR_INSERTIONS_FILE -partitions_per_domain=$p 2>&1 | tee "${PPCSR_PROGRAM_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_insertions_${v:1}_${core}cores_${p}par_${r}.txt" | sed '/Elapsed/!d' | sed -n '0~2p' | sed 's/Elapsed wall clock time: //g') 117 | echo -e "[END] \t ${v:1} edge insertions: Finished repetition #$r on $core cores.\n" 118 | insert="${insert} ${output}" 119 | done 120 | 121 | if [ "$REPETITIONS" -gt 1 ]; then 122 | read avg_insert stddev_insert <<<$(echo "$insert" | awk '{ A=0; V=0; for(N=1; N<=NF; N++) A+=$N ; A/=NF ; for(N=1; N<=NF; N++) V+=(($N-A)*($N-A))/(NF-1); print A,sqrt(V) }') 123 | else 124 | avg_insert=$insert 125 | stddev_insert=0 126 | fi 127 | insert="${insert} ${avg_insert} ${stddev_insert}" 128 | 129 | delete="" 130 | for ((r = 1; r <= REPETITIONS; r++)); do 131 | echo -e "[START]\t ${v:1} edge deletions: Executing repetition #$r on $core cores for $p partitions per NUMA domain......" 132 | output=$($PPCSR_EXEC -delete -threads=$core $v -size=$SIZE -core_graph=$PPCSR_CORE_GRAPH_FILE -update_file=$PPCSR_DELETIONS_FILE -partitions_per_domain=$p 2>&1 | tee "${PPCSR_PROGRAM_OUTPUTS_DIR}/${PPCSR_BASE_NAME}_deletions_${v:1}_${core}cores_${p}par_${r}.txt" | sed '/Elapsed/!d' | sed -n '0~2p' | sed 's/Elapsed wall clock time: //g') 133 | echo -e "[END] \t ${v:1} edge deletions: Finished repetition #$r on $core cores.\n" 134 | delete="${delete} ${output}" 135 | done 136 | 137 | if [ "$REPETITIONS" -gt 1 ]; then 138 | read avg_delete stddev_delete <<<$(echo "$delete" | awk '{ A=0; V=0; for(N=1; N<=NF; N++) A+=$N ; A/=NF ; for(N=1; N<=NF; N++) V+=(($N-A)*($N-A))/(NF-1); print A,sqrt(V) }') 139 | else 140 | avg_delete=$delete 141 | stddev_delete=0 142 | fi 143 | delete="${delete} ${avg_delete} ${stddev_delete}" 144 | 145 | csv="${csv}${insert} ${delete}" 146 | dat="${dat}${avg_insert} ${stddev_insert} ${avg_delete} ${stddev_delete} " 147 | 148 | if [ "$v" = "-ppcsr" ]; then 149 | break 150 | fi 151 | done 152 | done 153 | 154 | echo "$csv" | sed -e "s/^/$core/" >>$PPCSR_CSV_DATA 155 | echo $core $dat >>$PPCSR_PLOT_DATA 156 | done 157 | 158 | echo -e "[END] \t Computations finished.\n" 159 | 160 | ###################################### 161 | 162 | # Create the plot 163 | 164 | echo -e "[START]\t Starting data plotting...\n" 165 | 166 | PPCSR_PLOT_FILE=$(mktemp gnuplot.pXXX) 167 | 168 | XLABEL="#cores" 169 | YLABEL="CPU time (ms)" 170 | 171 | cat <$PPCSR_PLOT_FILE 172 | set term pdf font ", 12" 173 | set output "${PPCSR_PDF_PLOT_FILE}.pdf" 174 | set title "Machine: $MACHINE_NAME \t Dataset: $DATASET_NAME \t #Updates: $SIZE" 175 | set xlabel "${XLABEL}" 176 | set ylabel "${YLABEL}" offset 1.5 177 | set size ratio 0.5 178 | #set size 0.8,0.8 179 | EOF 180 | 181 | echo -n 'set xtics (' >>$PPCSR_PLOT_FILE 182 | for i in ${CORES[@]}; do 183 | echo -n " $i," >>$PPCSR_PLOT_FILE 184 | done 185 | echo ')' >>$PPCSR_PLOT_FILE 186 | 187 | for i in ${NUMA_BOUNDS[@]}; do 188 | echo "set arrow from $i, graph 0 to $i, graph 1 nohead dt 3" >>$PPCSR_PLOT_FILE 189 | done 190 | 191 | cat <>$PPCSR_PLOT_FILE 192 | #set ytics nomirror 193 | set key right top 194 | set key font ",12" 195 | 196 | set style line 1 lt 1 lc rgb "blue" lw 1 pt 5 ps 0.5 197 | set style line 2 lt 1 dt 4 lc rgb "blue" lw 1 pt 4 ps 0.5 198 | set style line 3 lt 1 lc rgb "red" lw 1 pt 7 ps 0.5 199 | set style line 4 lt 1 dt 4 lc rgb "red" lw 1 pt 6 ps 0.5 200 | set style line 5 lt 1 lc rgb "green" lw 1 pt 9 ps 0.5 201 | set style line 6 lt 1 dt 4 lc rgb "green" lw 1 pt 8 ps 0.5 202 | 203 | set xrange [${CORES[0]}:${CORES[-1]}] 204 | set yrange [0:] 205 | plot \ 206 | "$PPCSR_PLOT_DATA" using 1:2 title 'insertions' with linespoint ls 1, \ 207 | "$PPCSR_PLOT_DATA" using 1:2:3 title '' with yerrorbars ls 1, \ 208 | "$PPCSR_PLOT_DATA" using 1:4 title 'deletions' with linespoint ls 2, \ 209 | "$PPCSR_PLOT_DATA" using 1:4:5 title '' with yerrorbars ls 2, \ 210 | "$PPCSR_PLOT_DATA" using 1:6 title 'insertions par' with linespoint ls 3, \ 211 | "$PPCSR_PLOT_DATA" using 1:6:7 title '' with yerrorbars ls 3, \ 212 | "$PPCSR_PLOT_DATA" using 1:8 title 'deletions par' with linespoint ls 4, \ 213 | "$PPCSR_PLOT_DATA" using 1:8:9 title '' with yerrorbars ls 4, \ 214 | "$PPCSR_PLOT_DATA" using 1:10 title 'insertions numa' with linespoint ls 5, \ 215 | "$PPCSR_PLOT_DATA" using 1:10:11 title '' with yerrorbars ls 5, \ 216 | "$PPCSR_PLOT_DATA" using 1:12 title 'deletions numa' with linespoint ls 6, \ 217 | "$PPCSR_PLOT_DATA" using 1:12:13 title '' with yerrorbars ls 6 218 | EOF 219 | 220 | gnuplot $PPCSR_PLOT_FILE 221 | rm $PPCSR_PLOT_FILE 222 | pdfcrop --margins "0 0 0 0" --clip ${PPCSR_PDF_PLOT_FILE}.pdf ${PPCSR_PDF_PLOT_FILE}.pdf &>/dev/null 223 | 224 | echo -e "[END] \t Plotting finished.\n" 225 | 226 | echo "Exiting benchmark." 227 | 228 | exit 0 229 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by Eleni Alevra 3 | * modified by Christian Menges 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "thread_pool/thread_pool.h" 22 | #include "thread_pool_pppcsr/thread_pool_pppcsr.h" 23 | 24 | using namespace std; 25 | 26 | enum class Operation { READ, ADD, DELETE }; 27 | 28 | // Reads edge list with separator 29 | pair>, int> read_input(string filename, Operation defaultOp) { 30 | ifstream f; 31 | string line; 32 | f.open(filename); 33 | if (!f.good()) { 34 | std::cerr << "Invalid file" << std::endl; 35 | exit(EXIT_FAILURE); 36 | } 37 | vector> edges; 38 | int num_nodes = 0; 39 | std::size_t pos, pos2; 40 | while (getline(f, line)) { 41 | int src = stoi(line, &pos); 42 | int target = stoi(line.substr(pos + 1), &pos2); 43 | 44 | num_nodes = std::max(num_nodes, std::max(src, target)); 45 | 46 | Operation op = defaultOp; 47 | if (pos + 1 + pos2 + 1 < line.length()) { 48 | switch (line[pos + 1 + pos2 + 1]) { 49 | case '1': 50 | op = Operation::ADD; 51 | break; 52 | case '0': 53 | op = Operation::DELETE; 54 | break; 55 | default: 56 | cerr << "Invalid operation"; 57 | } 58 | } 59 | edges.emplace_back(op, src, target); 60 | } 61 | return make_pair(edges, num_nodes); 62 | } 63 | 64 | // Does insertions 65 | template 66 | void update_existing_graph(const vector> &input, ThreadPool_t *thread_pool, int threads, 67 | int size) { 68 | for (int i = 0; i < size; i++) { 69 | switch (get<0>(input[i])) { 70 | case Operation::ADD: 71 | thread_pool->submit_add(i % threads, get<1>(input[i]), get<2>(input[i])); 72 | break; 73 | case Operation::DELETE: 74 | thread_pool->submit_delete(i % threads, get<1>(input[i]), get<2>(input[i])); 75 | break; 76 | case Operation::READ: 77 | cerr << "Not implemented\n"; 78 | break; 79 | } 80 | } 81 | thread_pool->start(threads); 82 | thread_pool->stop(); 83 | } 84 | 85 | template 86 | void execute(int threads, int size, const vector> &core_graph, 87 | const vector> &updates, std::unique_ptr &thread_pool) { 88 | // Load core graph 89 | update_existing_graph(core_graph, thread_pool.get(), threads, core_graph.size()); 90 | // Do updates 91 | update_existing_graph(updates, thread_pool.get(), threads, size); 92 | 93 | // DEBUGGING CODE 94 | // Check that all edges are there and in sorted order 95 | // for (int i = 0; i < core_graph.size(); i++) { 96 | // if (!thread_pool->pcsr->edge_exists(std::get<1>(core_graph[i]),std::get<2>(core_graph[i]))) { 97 | // cout << "Not there " << std::get<1>(core_graph[i]) << " " << 98 | // std::get<2>(core_graph[i]) << endl; 99 | // } 100 | // } 101 | // for (int i = 0; i < size; i++) { 102 | // if (!thread_pool->pcsr->edge_exists(std::get<1>(updates[i]), std::get<2>(updates[i]))) { 103 | // cout << "Update not there " << std::get<1>(updates[i]) << " " << 104 | // std::get<2>(updates[i]) << endl; 105 | // } 106 | // } 107 | } 108 | 109 | enum class Version { PPCSR, PPPCSR, PPPCSRNUMA }; 110 | 111 | int main(int argc, char *argv[]) { 112 | int threads = 8; 113 | int size = 1000000; 114 | int num_nodes = 0; 115 | bool lock_search = true; 116 | bool insert = true; 117 | Version v = Version::PPPCSRNUMA; 118 | int partitions_per_domain = 1; 119 | vector> core_graph; 120 | vector> updates; 121 | for (int i = 1; i < argc; i++) { 122 | string s = string(argv[i]); 123 | if (s.rfind("-threads=", 0) == 0) { 124 | threads = stoi(s.substr(string("-threads=").length(), s.length())); 125 | } else if (s.rfind("-size=", 0) == 0) { 126 | size = stoi(s.substr(string("-size=").length(), s.length())); 127 | } else if (s.rfind("-lock_free", 0) == 0) { 128 | lock_search = false; 129 | } else if (s.rfind("-insert", 0) == 0) { 130 | insert = true; 131 | } else if (s.rfind("-delete", 0) == 0) { 132 | insert = false; 133 | } else if (s.rfind("-pppcsrnuma", 0) == 0) { 134 | v = Version::PPPCSRNUMA; 135 | } else if (s.rfind("-pppcsr", 0) == 0) { 136 | v = Version::PPPCSR; 137 | } else if (s.rfind("-ppcsr", 0) == 0) { 138 | v = Version::PPCSR; 139 | } else if (s.rfind("-partitions_per_domain=", 0) == 0) { 140 | partitions_per_domain = stoi(s.substr(string("-partitions_per_domain=").length(), s.length())); 141 | } else if (s.rfind("-core_graph=", 0) == 0) { 142 | string core_graph_filename = s.substr(string("-core_graph=").length(), s.length()); 143 | int temp = 0; 144 | std::tie(core_graph, temp) = read_input(core_graph_filename, Operation::ADD); 145 | num_nodes = std::max(num_nodes, temp); 146 | } else if (s.rfind("-update_file=", 0) == 0) { 147 | string update_filename = s.substr(string("-update_file=").length(), s.length()); 148 | cout << update_filename << endl; 149 | int temp = 0; 150 | Operation defaultOp = Operation::ADD; 151 | if (!insert) { 152 | defaultOp = Operation::DELETE; 153 | } 154 | std::tie(updates, temp) = read_input(update_filename, defaultOp); 155 | num_nodes = std::max(num_nodes, temp); 156 | size = std::min((size_t)size, updates.size()); 157 | } 158 | } 159 | if (core_graph.empty()) { 160 | cout << "Core graph file not specified" << endl; 161 | exit(EXIT_FAILURE); 162 | } 163 | if (updates.empty()) { 164 | cout << "Updates file not specified" << endl; 165 | exit(EXIT_FAILURE); 166 | } 167 | cout << "Core graph size: " << core_graph.size() << endl; 168 | // sort(core_graph.begin(), core_graph.end()); 169 | switch (v) { 170 | case Version::PPCSR: { 171 | auto thread_pool = make_unique(threads, lock_search, num_nodes + 1, partitions_per_domain); 172 | execute(threads, size, core_graph, updates, thread_pool); 173 | break; 174 | } 175 | case Version::PPPCSR: { 176 | auto thread_pool = 177 | make_unique(threads, lock_search, num_nodes + 1, partitions_per_domain, false); 178 | execute(threads, size, core_graph, updates, thread_pool); 179 | break; 180 | } 181 | default: { 182 | auto thread_pool = 183 | make_unique(threads, lock_search, num_nodes + 1, partitions_per_domain, true); 184 | execute(threads, size, core_graph, updates, thread_pool); 185 | } 186 | } 187 | 188 | return 0; 189 | } 190 | -------------------------------------------------------------------------------- /src/pcsr/PCSR.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * This file was cloned from https://github.com/wheatman/Packed-Compressed-Sparse-Row/. The 3 | * parts of the code that Eleni Alevra has added for the parallel version are marked by comments. 4 | * modified by Christian Menges 5 | */ 6 | #include "PCSR.h" 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | 21 | // find index of first 1-bit (least significant bit) 22 | static inline int bsf_word(int word) { 23 | int result; 24 | __asm__ volatile("bsf %1, %0" : "=r"(result) : "r"(word)); 25 | return result; 26 | } 27 | 28 | template 29 | static inline T bsr_word(T word) { 30 | T result; 31 | __asm__ volatile("bsr %1, %0" : "=r"(result) : "r"(word)); 32 | return result; 33 | } 34 | 35 | typedef struct _pair_double { 36 | double x; 37 | double y; 38 | } pair_double; 39 | 40 | void PCSR::nodes_unlock_shared(bool unlock, int start_node, int end_node) { 41 | if (unlock) { 42 | for (int i = start_node; i <= end_node; i++) { 43 | edges.node_locks[i]->unlock_shared(); 44 | } 45 | } 46 | } 47 | 48 | template 49 | void checkAllocation(T *ptr) { 50 | if (ptr == NULL) { 51 | cout << "Allocation failed. Abort\n"; 52 | exit(EXIT_FAILURE); 53 | } 54 | } 55 | 56 | // same as find_leaf, but does it for any level in the tree 57 | // index: index in array 58 | // len: length of sub-level. 59 | int find_node(int index, int len) { return (index / len) * len; } 60 | 61 | // null overrides sentinel 62 | // e.g. in rebalance, we check if an edge is null 63 | // first before copying it into temp, then fix the sentinels. 64 | bool is_sentinel(const edge_t &e) { return e.dest == UINT32_MAX || e.value == UINT32_MAX; } 65 | 66 | // bool is_null(edge_t e) { return e.value == 0; } 67 | 68 | void PCSR::resizeEdgeArray(size_t newSize) { 69 | edges.N = newSize; 70 | edges.logN = (1 << bsr_word(bsr_word(edges.N) * 2 + 1)); 71 | edges.H = bsr_word(edges.N / edges.logN); 72 | std::cout << "Edges: " << edges.N << " logN: " << edges.logN << " #count: " << edges.N / edges.logN << std::endl; 73 | } 74 | 75 | void PCSR::clear() { 76 | if (is_numa_available) { 77 | numa_free(edges.items, edges.N * sizeof(*(edges.items))); 78 | } else { 79 | free(edges.items); 80 | } 81 | resizeEdgeArray(2 << bsr_word(0)); 82 | } 83 | 84 | vector> PCSR::get_edges() { 85 | const auto n = get_n(); 86 | vector> output; 87 | 88 | for (uint64_t i = 0; i < n; i++) { 89 | auto start = nodes[i].beginning; 90 | auto end = nodes[i].end; 91 | for (auto j = start + 1; j < end; j++) { 92 | if (!is_null(edges.items[j].value)) { 93 | output.push_back(make_tuple(i, edges.items[j].dest, edges.items[j].value)); 94 | } 95 | } 96 | } 97 | return output; 98 | } 99 | 100 | uint64_t PCSR::get_n() const { return nodes.size(); } 101 | 102 | uint64_t PCSR::get_size() { 103 | uint64_t size = nodes.capacity() * sizeof(node_t); 104 | size += edges.N * sizeof(edge_t); 105 | return size; 106 | } 107 | 108 | void PCSR::print_array() { 109 | for (uint64_t i = 0; i < edges.N; i++) { 110 | if (is_null(edges.items[i].value)) { 111 | cout << i << "-x "; 112 | } else if (is_sentinel(edges.items[i])) { 113 | uint32_t value = edges.items[i].value; 114 | if (value == UINT32_MAX) { 115 | value = 0; 116 | } 117 | printf("\n%d-s(%u):(%d, %d) ", i, value, nodes[value].beginning, nodes[value].end); 118 | } else { 119 | printf("%d-(%d, %u) ", i, edges.items[i].dest, edges.items[i].value); 120 | } 121 | } 122 | printf("\n\n"); 123 | } 124 | 125 | // get density of a node 126 | double get_density(edge_list_t *list, int index, int len) { 127 | int full = 0; 128 | for (auto i = index; i < index + len; i++) { 129 | full += (!is_null(list->items[i].value)); 130 | } 131 | const auto full_d = static_cast(full); 132 | return full_d / len; 133 | } 134 | 135 | double get_full(edge_list_t *list, int index, int len) { 136 | int full = 0; 137 | for (int i = 0; i < index + len; i++) { 138 | full += (!is_null(list->items[i].value)); 139 | } 140 | return static_cast(full); 141 | } 142 | 143 | // height of this node in the tree 144 | int get_depth(edge_list_t *list, int len) { return bsr_word(list->N / len); } 145 | 146 | // get parent of this node in the tree 147 | pair get_parent(edge_list_t *list, int index, int len) { 148 | int parent_len = len * 2; 149 | int depth = get_depth(list, len); 150 | 151 | return make_pair(parent_len, depth); 152 | } 153 | 154 | // when adjusting the list size, make sure you're still in the 155 | // density bound 156 | pair_double density_bound(edge_list_t *list, int depth) { 157 | pair_double pair; 158 | 159 | // between 1/4 and 1/2 160 | // pair.x = 1.0/2.0 - (( .25*depth)/list->H); 161 | // between 1/8 and 1/4 162 | pair.x = 1.0 / 4.0 - ((0.125 * depth) / list->H); 163 | pair.y = 3.0 / 4.0 + ((.25 * depth) / list->H); 164 | return pair; 165 | } 166 | 167 | // fix pointer from node to moved sentinel 168 | void PCSR::fix_sentinel(const edge_t &sentinel, int in) { 169 | if (!is_sentinel(sentinel)) { 170 | return; 171 | } 172 | auto node_index = sentinel.value; 173 | 174 | if (node_index == UINT32_MAX) { 175 | node_index = 0; 176 | } else { 177 | nodes[node_index - 1].end = in; 178 | } 179 | nodes[node_index].beginning = in; 180 | if (node_index == nodes.size() - 1) { 181 | nodes[node_index].end = edges.N - 1; 182 | } 183 | } 184 | 185 | // Evenly redistribute elements in the ofm, given a range to look into 186 | // index: starting position in ofm structure 187 | // len: area to redistribute 188 | // Out of place version 189 | // void PCSR::redistribute(int index, int len) { 190 | // edge_t *space = (edge_t *)malloc(len * sizeof(*(edges.items))); 191 | // int j = 0; 192 | // 193 | // // move all items in ofm in the range into 194 | // // a temp array 195 | // for (int i = index; i < index + len; i++) { 196 | // space[j] = edges.items[i]; 197 | // // counting non-null edges 198 | // j += (!is_null(edges.items[i].value)); 199 | // // setting section to null 200 | // edges.items[i].src = -1; 201 | // edges.items[i].value = 0; 202 | // edges.items[i].dest = 0; 203 | // } 204 | // 205 | // // evenly redistribute for a uniform density 206 | // double index_d = index; 207 | // double step = ((double)len) / j; 208 | // for (int i = 0; i < j; i++) { 209 | // int in = static_cast(index_d); 210 | // 211 | // edges.items[in] = space[i]; 212 | // if (is_sentinel(space[i])) { 213 | // // fixing pointer of node that goes to this sentinel 214 | // fix_sentinel(space[i], in); 215 | // } 216 | // index_d += step; 217 | // } 218 | // free(space); 219 | //} 220 | 221 | // Inplace version 222 | void PCSR::redistribute(int index, int len) { 223 | size_t j = 0; 224 | const size_t end = index + len; 225 | 226 | for (size_t i = index; i < end; i++) { 227 | edges.items[index + j] = edges.items[i]; 228 | // counting non-null edges 229 | j += (!is_null(edges.items[index + j].value)); 230 | } 231 | for (size_t i = index + j; i < end; i++) { 232 | edges.items[i].src = -1; 233 | edges.items[i].value = 0; 234 | edges.items[i].dest = 0; 235 | } 236 | // evenly redistribute for a uniform density 237 | const double step = static_cast(len) / j; 238 | double index_d = index + static_cast(j - 1) * step; 239 | 240 | // Ignore element at position index since it is already in the correct position 241 | for (auto i = index + j - 1; i > index; i--) { 242 | const size_t in = static_cast(index_d); 243 | 244 | std::swap(edges.items[in], edges.items[i]); 245 | fix_sentinel(edges.items[in], in); 246 | index_d -= step; 247 | } 248 | fix_sentinel(edges.items[index], index); 249 | } 250 | 251 | void PCSR::double_list() { 252 | const int prev_locks_size = edges.N / edges.logN; 253 | resizeEdgeArray(edges.N * 2); 254 | const int new_locks_size = edges.N / edges.logN; 255 | 256 | // Added by Eleni Alevra - START 257 | if (is_numa_available) { 258 | edges.node_locks = (HybridLock **)numa_realloc(edges.node_locks, prev_locks_size * sizeof(HybridLock *), 259 | new_locks_size * sizeof(HybridLock *)); 260 | checkAllocation(edges.node_locks); 261 | } else { 262 | edges.node_locks = (HybridLock **)realloc(edges.node_locks, new_locks_size * sizeof(HybridLock *)); 263 | } 264 | for (int i = prev_locks_size; i < new_locks_size; i++) { 265 | edges.node_locks[i] = new HybridLock(); 266 | } 267 | // Added by Eleni Alevra - END 268 | 269 | if (is_numa_available) { 270 | edges.items = 271 | (edge_t *)numa_realloc(edges.items, (edges.N / 2) * sizeof(*(edges.items)), edges.N * sizeof(*(edges.items))); 272 | } else { 273 | edges.items = (edge_t *)realloc(edges.items, edges.N * sizeof(*(edges.items))); 274 | } 275 | 276 | for (int i = edges.N / 2; i < edges.N; i++) { 277 | edges.items[i].value = 0; // setting second half to null 278 | edges.items[i].dest = 0; // setting second half to null 279 | } 280 | 281 | redistribute(0, edges.N); 282 | } 283 | 284 | void PCSR::half_list() { 285 | const int prev_locks_size = edges.N / edges.logN; 286 | resizeEdgeArray(edges.N / 2); 287 | const int new_locks_size = edges.N / edges.logN; 288 | 289 | int j = 0; 290 | for (int i = 0; i < edges.N * 2; i++) { 291 | if (!is_null(edges.items[i].value)) { 292 | edges.items[j++] = edges.items[i]; 293 | } 294 | } 295 | // set remaining elements to null 296 | for (; j < edges.N; j++) { 297 | edges.items[j].value = 0; 298 | edges.items[j].dest = 0; 299 | } 300 | 301 | for (int i = new_locks_size; i < prev_locks_size; i++) { 302 | edges.node_locks[i]->unlock(); 303 | delete edges.node_locks[i]; 304 | } 305 | 306 | if (is_numa_available) { 307 | edges.node_locks = (HybridLock **)numa_realloc(edges.node_locks, prev_locks_size * sizeof(HybridLock *), 308 | new_locks_size * sizeof(HybridLock *)); 309 | checkAllocation(edges.node_locks); 310 | edges.items = 311 | (edge_t *)numa_realloc(edges.items, edges.N * 2 * sizeof(*(edges.items)), edges.N * sizeof(*(edges.items))); 312 | 313 | checkAllocation(edges.items); 314 | } else { 315 | edges.node_locks = (HybridLock **)realloc(edges.node_locks, new_locks_size * sizeof(HybridLock *)); 316 | edges.items = (edge_t *)realloc(edges.items, edges.N * sizeof(*(edges.items))); 317 | } 318 | 319 | redistribute(0, edges.N); 320 | } 321 | 322 | // index is the beginning of the sequence that you want to slide right. 323 | // notice that slide right does not not null the current spot. 324 | // this is ok because we will be putting something in the current index 325 | // after sliding everything to the right. 326 | int PCSR::slide_right(int index, uint32_t src) { 327 | int rval = 0; 328 | edge_t el = edges.items[index]; 329 | edges.items[index].src = -1; 330 | edges.items[index].dest = 0; 331 | edges.items[index].value = 0; 332 | index++; 333 | while (index < edges.N && !is_null(edges.items[index].value)) { 334 | edge_t temp = edges.items[index]; 335 | edges.items[index] = el; 336 | if (!is_null(el.value)) { 337 | // fixing pointer of node that goes to this sentinel 338 | fix_sentinel(el, index); 339 | } 340 | el = temp; 341 | index++; 342 | } 343 | if (!is_null(el.value)) { 344 | // fixing pointer of node that goes to this sentinel 345 | fix_sentinel(el, index); 346 | } 347 | if (index == edges.N) { 348 | index--; 349 | slide_left(index, src); 350 | rval = -1; 351 | printf("slide off the end on the right, should be rare\n"); 352 | } 353 | edges.items[index] = el; 354 | return rval; 355 | } 356 | 357 | // only called in slide right if it was going to go off the edge 358 | // since it can't be full this doesn't need to worry about going off the other 359 | // end 360 | void PCSR::slide_left(int index, uint32_t src) { 361 | edge_t el = edges.items[index]; 362 | edges.items[index].src = -1; 363 | edges.items[index].dest = 0; 364 | edges.items[index].value = 0; 365 | 366 | index--; 367 | while (index >= 0 && !is_null(edges.items[index].value)) { 368 | edge_t temp = edges.items[index]; 369 | edges.items[index] = el; 370 | if (!is_null(el.value)) { 371 | // fixing pointer of node that goes to this sentinel 372 | fix_sentinel(el, index); 373 | } 374 | el = temp; 375 | index--; 376 | } 377 | 378 | if (index == -1) { 379 | double_list(); 380 | 381 | slide_right(0, src); 382 | index = 0; 383 | } 384 | if (!is_null(el.value)) { 385 | // fixing pointer of node that goes to this sentinel 386 | fix_sentinel(el, index); 387 | } 388 | 389 | edges.items[index] = el; 390 | } 391 | 392 | // given index, return the starting index of the leaf it is in 393 | int find_leaf(edge_list_t *list, int index) { return (index / list->logN) * list->logN; } 394 | 395 | // true if e1, e2 are equals 396 | bool edge_equals(const edge_t &e1, const edge_t &e2) { return e1.dest == e2.dest && e1.value == e2.value; } 397 | 398 | // return index of the edge elem 399 | // takes in edge list and place to start looking 400 | uint32_t find_elem_pointer(edge_list_t *list, uint32_t index, edge_t elem) { 401 | edge_t item = list->items[index]; 402 | while (!edge_equals(item, elem)) { 403 | item = list->items[++index]; 404 | } 405 | return index; 406 | } 407 | 408 | // return index of the edge elem 409 | // takes in edge list and place to start looking 410 | // looks in reverse 411 | uint32_t find_elem_pointer_reverse(edge_list_t *list, uint32_t index, edge_t elem) { 412 | edge_t item = list->items[index]; 413 | while (!edge_equals(item, elem)) { 414 | item = list->items[--index]; 415 | } 416 | return index; 417 | } 418 | 419 | // important: make sure start, end don't include sentinels 420 | // returns the index of the smallest element bigger than you in the range 421 | // [start, end) if no such element is found, returns end (because insert shifts 422 | // everything to the right) 423 | // also returns the version number of the node we will insert to 424 | // this is to check if it has changed when we lock to do the insertion 425 | // This function was modified for Eleni Alevra's implementation to return the version number and to do 426 | // unlocking when unlock is set 427 | pair PCSR::binary_search(edge_t *elem, uint32_t start, uint32_t end, bool unlock) { 428 | int ins_v = -1; 429 | uint32_t start_node = find_leaf(&edges, start) / edges.logN; 430 | uint32_t end_node = find_leaf(&edges, end) / edges.logN; 431 | 432 | while (start + 1 < end) { 433 | // TODO: fix potential overflow for large data sets (use std::midpoint) 434 | const uint32_t mid = (start + end) / 2; 435 | // elems++; 436 | edge_t item = edges.items[mid]; 437 | uint32_t change = 1; 438 | uint32_t check = mid; 439 | 440 | bool flag = true; 441 | while (is_null(item.value) && flag) { 442 | flag = false; 443 | check = mid + change; 444 | if (check < end) { 445 | flag = true; 446 | if (check <= end) { 447 | // elems++; 448 | item = edges.items[check]; 449 | if (!is_null(item.value) || check == end) { 450 | break; 451 | } 452 | } 453 | } 454 | check = mid - change; 455 | if (check >= start) { 456 | flag = true; 457 | // elems++; 458 | item = edges.items[check]; 459 | } 460 | change++; 461 | } 462 | 463 | ins_v = edges.node_locks[find_leaf(&edges, check) / edges.logN]->load(); 464 | int ins2 = edges.node_locks[find_leaf(&edges, mid) / edges.logN]->load(); 465 | if (is_null(item.value) || start == check || end == check) { 466 | nodes_unlock_shared(unlock, start_node, end_node); 467 | if (!is_null(item.value) && start == check && elem->dest <= item.dest) { 468 | return make_pair(check, ins_v); 469 | } else { 470 | return make_pair(mid, ins2); 471 | } 472 | } 473 | 474 | // if we found it, return 475 | ins_v = edges.node_locks[find_leaf(&edges, check) / edges.logN]->load(); 476 | if (elem->dest == item.dest) { 477 | nodes_unlock_shared(unlock, start_node, end_node); 478 | return make_pair(check, ins_v); 479 | } else if (elem->dest < item.dest) { 480 | end = check; // if the searched for item is less than current item, set end 481 | } else { 482 | start = check; 483 | // otherwise, searched for item is more than current and we set start 484 | } 485 | } 486 | if (end < start) { 487 | start = end; 488 | } 489 | // handling the case where there is one element left 490 | // if you are leq, return start (index where elt is) 491 | // otherwise, return end (no element greater than you in the range) 492 | // printf("start = %d, end = %d, n = %d\n", start,end, list->N); 493 | ins_v = edges.node_locks[find_leaf(&edges, start) / edges.logN]->load(); 494 | if (elem->dest <= edges.items[start].dest && !is_null(edges.items[start].value)) { 495 | nodes_unlock_shared(unlock, start_node, end_node); 496 | return make_pair(start, ins_v); 497 | } 498 | ins_v = edges.node_locks[find_leaf(&edges, end) / edges.logN]->load(); 499 | nodes_unlock_shared(unlock, start_node, end_node); 500 | // Could also be null but it's end 501 | return make_pair(end, ins_v); 502 | } 503 | 504 | uint32_t PCSR::find_value(uint32_t src, uint32_t dest) { 505 | edge_t e; 506 | e.value = 0; 507 | e.dest = dest; 508 | auto bs = binary_search(&e, nodes[src].beginning + 1, nodes[src].end, false); 509 | auto loc = bs.first; 510 | if (!is_null(edges.items[loc].value) && edges.items[loc].dest == dest) { 511 | return edges.items[loc].value; 512 | } else { 513 | return 0; 514 | } 515 | } 516 | 517 | // insert elem at index returns index that the element went to (which 518 | // may not be the same one that you put it at) 519 | void PCSR::insert(uint32_t index, edge_t elem, uint32_t src, insertion_info_t *info) { 520 | auto node_index = find_leaf(&edges, index); 521 | auto level = edges.H; 522 | auto len = edges.logN; 523 | 524 | // always deposit on the left 525 | if (!is_null(edges.items[index].value)) { 526 | // if the edge already exists in the graph, update its value 527 | // do not make another edge 528 | // return index of the edge that already exists 529 | if (!is_sentinel(elem) && edges.items[index].dest == elem.dest) { 530 | edges.items[index].value = elem.value; 531 | return; 532 | } 533 | if (index == edges.N - 1) { 534 | // when adding to the end double then add edge 535 | double_list(); 536 | node_t node = nodes[src]; 537 | // If we are at this point we already have a global lock on the data structure so there is no need to 538 | // do any extra locking for binary search 539 | auto loc_to_add = binary_search(&elem, node.beginning + 1, node.end, false).first; 540 | return insert(loc_to_add, elem, src, nullptr); 541 | } else { 542 | if (slide_right(index, src) == -1) { 543 | index -= 1; 544 | slide_left(index, src); 545 | } 546 | } 547 | } 548 | edges.items[index].src = elem.src; 549 | edges.items[index].value = elem.value; 550 | edges.items[index].dest = elem.dest; 551 | 552 | auto density = get_density(&edges, node_index, len); 553 | 554 | // spill over into next level up, node is completely full. 555 | if (density == 1) { 556 | node_index = find_node(node_index, len * 2); 557 | redistribute(node_index, len * 2); 558 | } else { 559 | redistribute(node_index, len); 560 | } 561 | 562 | // get density of the leaf you are in 563 | pair_double density_b = density_bound(&edges, level); 564 | density = get_density(&edges, node_index, len); 565 | 566 | // while density too high, go up the implicit tree 567 | // go up to the biggest node above the density bound 568 | if (info != nullptr) { 569 | // We have information about how much to redistribute from when we acquired locks 570 | if (info->double_list) { 571 | double_list(); 572 | return; 573 | } else { 574 | len = info->max_len; 575 | node_index = info->node_index_final; 576 | } 577 | } else { 578 | while (density >= density_b.y) { 579 | len *= 2; 580 | if (len <= edges.N) { 581 | level--; 582 | node_index = find_node(node_index, len); 583 | density_b = density_bound(&edges, level); 584 | density = get_density(&edges, node_index, len); 585 | } else { 586 | // if you reach the root, double the list 587 | double_list(); 588 | return; 589 | } 590 | } 591 | } 592 | if (len > edges.logN) { 593 | redistribute(node_index, len); 594 | } 595 | } 596 | 597 | void PCSR::remove(uint32_t index, const edge_t &elem, uint32_t src) { 598 | auto node_index = find_leaf(&edges, index); 599 | auto level = edges.H; 600 | auto len = edges.logN; 601 | 602 | if (is_null(edges.items[index].value) || is_sentinel(elem) || edges.items[index].dest != elem.dest) { 603 | return; 604 | } else { 605 | edges.items[index].value = 0; 606 | edges.items[index].dest = 0; 607 | } 608 | 609 | redistribute(node_index, len); 610 | // get density of the leaf you are in 611 | pair_double density_b = density_bound(&edges, level); 612 | auto density = get_density(&edges, node_index, len); 613 | 614 | // while density too low, go up the implicit tree 615 | // go up to the biggest node above the density bound 616 | while (density < density_b.x) { 617 | len *= 2; 618 | if (len <= edges.N) { 619 | level--; 620 | node_index = find_node(node_index, len); 621 | density_b = density_bound(&edges, level); 622 | density = get_density(&edges, node_index, len); 623 | } else { 624 | // if you reach the root, halve the list 625 | half_list(); 626 | return; 627 | } 628 | } 629 | redistribute(node_index, len); 630 | } 631 | 632 | // find index of edge 633 | uint32_t find_index(edge_list_t *list, edge_t *elem_pointer) { 634 | edge_t *array_start = list->items; 635 | uint32_t index = (elem_pointer - array_start); 636 | return index; 637 | } 638 | 639 | std::vector PCSR::sparse_matrix_vector_multiplication(std::vector const &v) { 640 | std::vector result(nodes.size(), 0); 641 | 642 | auto num_vertices = nodes.size(); 643 | 644 | for (size_t i = 0; i < num_vertices; i++) { 645 | // +1 to avoid sentinel 646 | 647 | for (uint32_t j = nodes[i].beginning + 1; j < nodes[i].end; j++) { 648 | result[i] += edges.items[j].value * v[edges.items[j].dest]; 649 | } 650 | } 651 | return result; 652 | } 653 | 654 | // Prints neighbours of vertex src 655 | void PCSR::print_graph(int src) { 656 | int num_vertices = nodes.size(); 657 | for (int i = 0; i < num_vertices; i++) { 658 | // +1 to avoid sentinel 659 | // int matrix_index = 0; 660 | if (i != src) continue; 661 | 662 | for (uint32_t j = nodes[i].beginning + 1; j < nodes[i].end; j++) { 663 | if (!is_null(edges.items[j].value)) { 664 | printf("%d ", edges.items[j].dest); 665 | // while (matrix_index < edges.items[j].dest) { 666 | // printf("000 "); 667 | // matrix_index++; 668 | // } 669 | // printf("%03d ", edges.items[j].value); 670 | // matrix_index++; 671 | } 672 | } 673 | // for (uint32_t j = matrix_index; j < num_vertices; j++) { 674 | // printf("000 "); 675 | // } 676 | printf("\n"); 677 | } 678 | } 679 | 680 | // add a node to the graph 681 | void PCSR::add_node() { 682 | adding_sentinels = true; 683 | node_t node; 684 | auto len = nodes.size(); 685 | edge_t sentinel; 686 | sentinel.src = len; 687 | sentinel.dest = UINT32_MAX; // placeholder 688 | sentinel.value = len; // back pointer 689 | 690 | if (len > 0) { 691 | node.beginning = nodes[len - 1].end; 692 | node.end = node.beginning + 1; 693 | } else { 694 | node.beginning = 0; 695 | node.end = 1; 696 | sentinel.value = UINT32_MAX; 697 | } 698 | node.num_neighbors = 0; 699 | 700 | nodes.push_back(node); 701 | insert(node.beginning, sentinel, nodes.size() - 1, nullptr); 702 | adding_sentinels = false; 703 | } 704 | 705 | // This function was re-written for Eleni Alevra's implementation 706 | void PCSR::add_edge(uint32_t src, uint32_t dest, uint32_t value) { add_edge_parallel(src, dest, value, 0); } 707 | 708 | // Added by me 709 | void PCSR::remove_edge(uint32_t src, uint32_t dest) { 710 | edge_t e; 711 | e.src = src; 712 | e.dest = dest; 713 | e.value = 1; 714 | 715 | edges.global_lock->lock_shared(); 716 | 717 | auto beginning = nodes[src].beginning; 718 | auto end = nodes[src].end; 719 | auto first_node = get_node_id(find_leaf(&edges, beginning + 1)); 720 | auto last_node = get_node_id(find_leaf(&edges, end)); 721 | uint32_t loc_to_rem; 722 | int ins_node_v; 723 | if (lock_bsearch) { 724 | for (auto i = first_node; i <= last_node; i++) { 725 | edges.node_locks[i]->lock_shared(); 726 | } 727 | if (nodes[src].beginning != beginning || nodes[src].end != end) { 728 | release_locks_no_inc(make_pair(first_node, last_node)); 729 | edges.global_lock->unlock_shared(); 730 | remove_edge(src, dest); 731 | return; 732 | } 733 | loc_to_rem = binary_search(&e, beginning + 1, end, false).first; 734 | // Keep the version number of the PCSR node we will remove from so that if by the time we lock it has changed we 735 | // can re-start. We can't keep the PCSR node locked after binary search in case we have to acquire some locks to 736 | // its left first. 737 | ins_node_v = edges.node_locks[get_node_id(find_leaf(&edges, loc_to_rem))]->load(); 738 | for (auto i = first_node; i <= last_node; i++) { 739 | edges.node_locks[i]->unlock_shared(); 740 | } 741 | } else { 742 | auto bs = binary_search(&e, nodes[src].beginning + 1, nodes[src].end, false); 743 | loc_to_rem = bs.first; 744 | ins_node_v = bs.second; 745 | } 746 | 747 | nodes[src].num_neighbors--; 748 | 749 | auto acquired_locks = acquire_remove_locks(loc_to_rem, e, src, ins_node_v, -1); 750 | if (acquired_locks.first == EDGE_NOT_FOUND) { 751 | cout << "not found " << src << " " << dest << endl; 752 | edges.global_lock->unlock_shared(); 753 | return; 754 | } 755 | if (acquired_locks.first == NEED_GLOBAL_WRITE) { 756 | // we need to halve the array 757 | // release all node locks 758 | release_locks_no_inc({0, edges.N / edges.logN - 1}); 759 | edges.global_lock->unlock_shared(); 760 | const std::lock_guard lck(*edges.global_lock); 761 | loc_to_rem = binary_search(&e, nodes[src].beginning + 1, nodes[src].end, false).first; 762 | remove(loc_to_rem, e, src); 763 | } else if (acquired_locks.first == NEED_RETRY) { 764 | // we need to re-start because when we acquired the locks things had changed 765 | nodes[src].num_neighbors++; 766 | edges.global_lock->unlock_shared(); 767 | remove_edge(src, dest); 768 | } else { 769 | remove(loc_to_rem, e, src); 770 | release_locks(acquired_locks); 771 | edges.global_lock->unlock_shared(); 772 | } 773 | } 774 | 775 | PCSR::PCSR(uint32_t init_n, uint32_t src_n, bool lock_search, int domain) 776 | : nodes(src_n), is_numa_available{numa_available() >= 0 && domain >= 0}, domain(domain) { 777 | resizeEdgeArray(2 << bsr_word(std::max(init_n + src_n, 1024u))); 778 | edges.global_lock = make_shared(); 779 | 780 | lock_bsearch = lock_search; 781 | if (is_numa_available) { 782 | edges.node_locks = (HybridLock **)numa_alloc_onnode((edges.N / edges.logN) * sizeof(HybridLock *), domain); 783 | checkAllocation(edges.node_locks); 784 | 785 | edges.items = (edge_t *)numa_alloc_onnode(edges.N * sizeof(*(edges.items)), domain); 786 | checkAllocation(edges.items); 787 | } else { 788 | edges.node_locks = (HybridLock **)malloc((edges.N / edges.logN) * sizeof(HybridLock *)); 789 | edges.items = (edge_t *)malloc(edges.N * sizeof(*(edges.items))); 790 | } 791 | 792 | for (uint32_t i = 0; i < edges.N / edges.logN; i++) { 793 | edges.node_locks[i] = new HybridLock(); 794 | } 795 | 796 | double index_d = 0.0; 797 | const double step = ((double)edges.N) / src_n; 798 | int in = 0; 799 | 800 | for (uint32_t i = 0; i < src_n; i++) { 801 | if (i == 0) { 802 | nodes[i].beginning = 0; 803 | } else { 804 | nodes[i].beginning = nodes[i - 1].end; 805 | } 806 | index_d += step; 807 | in = static_cast(index_d); 808 | nodes[i].end = in; 809 | nodes[i].num_neighbors = 0; 810 | } 811 | if (src_n != 0) { 812 | nodes[nodes.size() - 1].end = edges.N - 1; 813 | } 814 | 815 | index_d = 0.0; 816 | in = 0; 817 | int current = 0; 818 | 819 | // evenly distribute for a uniform density 820 | for (int i = 0; i < edges.N; i++) { 821 | if (i == in && current < src_n) { 822 | edges.items[i].src = current; 823 | edges.items[i].dest = UINT32_MAX; // placeholder 824 | if (i == 0) { 825 | edges.items[i].value = UINT32_MAX; 826 | } else { 827 | edges.items[i].value = current; // back pointer 828 | } 829 | current++; 830 | index_d += step; 831 | in = static_cast(index_d); 832 | } else { 833 | edges.items[i].src = -1; 834 | edges.items[i].dest = 0; 835 | edges.items[i].value = 0; 836 | } 837 | } 838 | } 839 | 840 | PCSR::~PCSR() { 841 | for (uint32_t i = 0; i < (edges.N / edges.logN); i++) { 842 | delete edges.node_locks[i]; 843 | } 844 | if (is_numa_available) { 845 | numa_free(edges.node_locks, (edges.N / edges.logN) * sizeof(HybridLock *)); 846 | numa_free(edges.items, edges.N * sizeof(*(edges.items))); 847 | } else { 848 | free(edges.node_locks); 849 | free(edges.items); 850 | } 851 | } 852 | 853 | /** 854 | * The following functions were all added for Eleni Alevra's implementation. 855 | */ 856 | 857 | // Used for debugging 858 | // Returns true if edge {src, dest} exists 859 | // Added by Eleni Alevra 860 | bool PCSR::edge_exists(uint32_t src, uint32_t dest) { 861 | node_t node = nodes[src]; 862 | 863 | edge_t e; 864 | e.dest = dest; 865 | e.value = 1; 866 | auto loc_to_rem = binary_search(&e, node.beginning + 1, node.end, false).first; 867 | e = edges.items[loc_to_rem]; 868 | return !(is_null(e.value)) && !is_sentinel(e) && e.dest == dest; 869 | } 870 | 871 | // Used for debugging 872 | // Returns true if every neighbourhood is sorted 873 | // Added by Eleni Alevra 874 | bool PCSR::is_sorted() const { 875 | for (int i = 0; i < nodes.size(); i++) { 876 | int prev = 0; 877 | for (int j = nodes[i].beginning + 1; j < nodes[i].end; j++) { 878 | if (!is_null(edges.items[j].value)) { 879 | if (edges.items[j].dest < prev) { 880 | cout << prev << " " << i << " " << edges.items[j].dest << endl; 881 | return false; 882 | } 883 | prev = edges.items[j].dest; 884 | } 885 | } 886 | } 887 | return true; 888 | } 889 | 890 | // Reads the neighbourhood of vertex src 891 | // Added by Eleni Alevra 892 | void PCSR::read_neighbourhood(int src) { 893 | if (src < get_n()) { 894 | int k = 0; 895 | for (int i = nodes[src].beginning + 1; i < nodes[src].end; i++) { 896 | k = edges.items[i].dest; 897 | } 898 | } 899 | } 900 | 901 | vector PCSR::get_neighbourhood(int src) const { 902 | std::vector neighbours; 903 | if (src < get_n()) { 904 | neighbours.reserve(nodes[src].num_neighbors); 905 | for (int i = nodes[src].beginning + 1; i < nodes[src].end; i++) { 906 | if (edges.items[i].value != 0) { 907 | neighbours.push_back(edges.items[i].dest); 908 | } 909 | } 910 | } 911 | return neighbours; 912 | } 913 | 914 | // Get id of PCSR node (starting from 0) 915 | // e.g. if every PCSR node has 8 elements, index number 5 is in PCSR node 0, index number 8 is in PCSR node 1 etc. 916 | // Added by Eleni Alevra 917 | uint32_t PCSR::get_node_id(uint32_t node_index) const { return node_index / edges.logN; } 918 | 919 | // Release acquired locks and increment the version counters to notify any other thread that will acquire them 920 | // that a change has happened 921 | // Added by Eleni Alevra 922 | void PCSR::release_locks(pair acquired_locks) { 923 | for (int i = acquired_locks.first; i <= acquired_locks.second; i++) { 924 | ++(*edges.node_locks[i]); 925 | edges.node_locks[i]->unlock(); 926 | } 927 | } 928 | 929 | // Release acquired locks without incrementing version counters (we didn't make any changes to these PCSR nodes) 930 | // Added by Eleni Alevra 931 | void PCSR::release_locks_no_inc(pair acquired_locks) { 932 | for (int i = acquired_locks.first; i <= acquired_locks.second; i++) { 933 | edges.node_locks[i]->unlock(); 934 | } 935 | } 936 | 937 | // Acquire locks required to insert an edge 938 | // Returns id of first and last node locked and a struct with information about redistribute to avoid repeating checks 939 | // index: where the new edge should be inserted 940 | // elem: the edge to insert 941 | // src: source vertex 942 | // ins_node_v: the version number of the PCSR node we want to insert to, at the time when binary search happened. 943 | // We use this to verify nothing has changed when we lock it. 944 | // left_node_bound: the leftmost PCSR node to lock from, initially this will be the node where we want to insert but 945 | // during redistribute we might have to lock some extra PCSR nodes to the left so to avoid deadlocks we release the 946 | // locks we already have and re-start acquiring from the new leftmost PCSR node 947 | // tries: how many times we have re-tried locking, to make sure we don't re-try too many times 948 | // Added by Eleni Alevra 949 | pair, insertion_info_t *> PCSR::acquire_insert_locks(uint32_t index, edge_t elem, uint32_t src, 950 | int ins_node_v, uint32_t left_node_bound, 951 | int tries) { 952 | if (tries > 3) { 953 | // very rarely happens (about 100 times in 14M insertions) 954 | return make_pair(make_pair(NEED_GLOBAL_WRITE, NEED_GLOBAL_WRITE), nullptr); 955 | } 956 | int node_index = find_leaf(&edges, index); 957 | int init_node_index = node_index; 958 | int level = edges.H; 959 | int len = edges.logN; 960 | uint32_t min_node = get_node_id(node_index); 961 | uint32_t max_node = min_node; 962 | uint32_t node_id = get_node_id(node_index); 963 | if (left_node_bound != -1) { 964 | uint32_t leftmost_node = left_node_bound; 965 | for (int i = leftmost_node; i <= node_id; i++) { 966 | edges.node_locks[i]->lock(); 967 | } 968 | // if (node_id < (edges.N / edges.logN) - 1) { 969 | // edges.node_locks[node_id + 1]->lock(); 970 | // max_node = node_id + 1; 971 | // } 972 | // edges.node_locks[node_id + 1]->lock(); 973 | // max_node = node_id + 1; 974 | min_node = min(min_node, leftmost_node); 975 | } else { 976 | if (node_id > 0 && !lock_bsearch) { 977 | edges.node_locks[node_id - 1]->lock(); 978 | min_node = node_id - 1; 979 | } 980 | edges.node_locks[node_id]->lock(); 981 | // if (node_id < (edges.N / edges.logN) - 1) { 982 | // edges.node_locks[node_id + 1]->lock(); 983 | // max_node = node_id + 1; 984 | // } 985 | } 986 | if (ins_node_v != edges.node_locks[node_id]->load()) { 987 | for (int i = min_node; i <= max_node; i++) { 988 | edges.node_locks[i]->unlock(); 989 | } 990 | return make_pair(make_pair(NEED_RETRY, NEED_RETRY), nullptr); 991 | } 992 | if (index == edges.N - 1 && !(is_null(edges.items[index].value))) { 993 | for (int i = min_node; i <= max_node; i++) { 994 | edges.node_locks[i]->unlock(); 995 | } 996 | return make_pair(make_pair(NEED_GLOBAL_WRITE, NEED_GLOBAL_WRITE), nullptr); 997 | } 998 | if (!lock_bsearch) { 999 | // We didn't lock during binary search so we might have gotten back a wrong index, need to check and if it's wrong 1000 | // re-try 1001 | auto ins_edge = edges.items[index]; 1002 | if (!got_correct_insertion_index(ins_edge, src, index, elem, node_index, node_id, max_node)) { 1003 | for (int i = min_node; i <= max_node; i++) { 1004 | edges.node_locks[i]->unlock(); 1005 | } 1006 | return make_pair(make_pair(NEED_RETRY, NEED_RETRY), nullptr); 1007 | } 1008 | } 1009 | 1010 | // check which locks we still need to acquire for redistribute 1011 | 1012 | if (get_density(&edges, node_index, len) + (1.0 / len) == 1) { 1013 | uint32_t new_node_idx = find_node(node_index, 2 * len); 1014 | uint32_t new_node_id = get_node_id(new_node_idx); 1015 | if (new_node_idx == node_index && new_node_id > max_node) { 1016 | edges.node_locks[new_node_id]->lock(); 1017 | max_node = new_node_id; 1018 | } else if (new_node_id < min_node) { 1019 | release_locks_no_inc(make_pair(min_node, max_node)); 1020 | return acquire_insert_locks(index, elem, src, ins_node_v, new_node_id, tries + 1); 1021 | } 1022 | node_index = new_node_idx; 1023 | } 1024 | 1025 | pair_double density_b = density_bound(&edges, level); 1026 | double density = get_density(&edges, node_index, len) + (1.0 / len); 1027 | 1028 | while (density >= density_b.y) { 1029 | len *= 2; 1030 | if (len <= edges.N) { 1031 | level--; 1032 | uint32_t new_node_index = find_node(node_index, len); 1033 | if (new_node_index < node_index) { 1034 | uint32_t new_node_id = get_node_id(new_node_index); 1035 | if (new_node_id < min_node) { 1036 | release_locks_no_inc(make_pair(min_node, max_node)); 1037 | return acquire_insert_locks(index, elem, src, ins_node_v, get_node_id(new_node_index), tries + 1); 1038 | } 1039 | min_node = min(min_node, new_node_id); 1040 | node_index = new_node_index; 1041 | } else { 1042 | uint32_t end = get_node_id(find_leaf(&edges, new_node_index + len)); 1043 | node_index = new_node_index; 1044 | for (uint32_t i = max_node + 1; i < end; i++) { 1045 | max_node = max(max_node, i); 1046 | edges.node_locks[i]->lock(); 1047 | // got_locks++; 1048 | } 1049 | } 1050 | density_b = density_bound(&edges, level); 1051 | density = get_density(&edges, node_index, len) + (1.0 / len); 1052 | } else { 1053 | for (int i = min_node; i <= max_node; i++) { 1054 | edges.node_locks[i]->unlock(); 1055 | } 1056 | insertion_info_t *info = (insertion_info_t *)malloc(sizeof(insertion_info_t)); 1057 | 1058 | info->double_list = true; 1059 | return make_pair(make_pair(NEED_GLOBAL_WRITE, NEED_GLOBAL_WRITE), info); 1060 | } 1061 | } 1062 | uint32_t new_node_index = find_node(node_index, len); 1063 | if (new_node_index < node_index) { 1064 | uint32_t node_id = get_node_id(new_node_index); 1065 | if (node_id < min_node) { 1066 | release_locks_no_inc(make_pair(min_node, max_node)); 1067 | return acquire_insert_locks(index, elem, src, ins_node_v, get_node_id(new_node_index), tries + 1); 1068 | } 1069 | min_node = min(min_node, get_node_id(new_node_index)); 1070 | } else { 1071 | uint32_t end = get_node_id(find_leaf(&edges, new_node_index + len)); 1072 | for (uint32_t i = max_node + 1; i < end; i++) { 1073 | max_node = max(max_node, i); 1074 | // got_locks++; 1075 | edges.node_locks[i]->lock(); 1076 | } 1077 | } 1078 | node_index = new_node_index; 1079 | 1080 | // lock PCSR nodes needed for slide_right / slide_left 1081 | insertion_info_t *info = (insertion_info_t *)malloc(sizeof(insertion_info_t)); 1082 | info->double_list = false; 1083 | info->max_len = len; 1084 | info->node_index_final = node_index; 1085 | len = edges.logN; 1086 | node_index = find_leaf(&edges, index); 1087 | 1088 | if (!(is_null(edges.items[index].value))) { 1089 | auto curr_node = get_node_id(node_index); 1090 | int curr_ind = index + 1; 1091 | uint32_t curr_node_idx = node_index; 1092 | if (curr_ind < edges.N && curr_ind >= curr_node_idx + len) { 1093 | curr_node_idx = curr_ind; 1094 | curr_node++; 1095 | if (curr_node > max_node) { 1096 | edges.node_locks[curr_node]->lock(); 1097 | max_node = curr_node; 1098 | } 1099 | } 1100 | while (curr_ind < edges.N && !(is_null(edges.items[curr_ind].value))) { 1101 | if (++curr_ind < edges.N && curr_ind >= curr_node_idx + len) { 1102 | curr_node++; 1103 | if (curr_node > max_node) { 1104 | edges.node_locks[curr_node]->lock(); 1105 | max_node = curr_node; 1106 | } 1107 | curr_node_idx = curr_ind; 1108 | } 1109 | } 1110 | if (curr_ind == edges.N) { 1111 | curr_ind = index; 1112 | curr_node = get_node_id(node_index); 1113 | curr_node_idx = node_index; 1114 | while (curr_ind >= 0 && !(is_null(edges.items[curr_ind].value))) { 1115 | if (--curr_ind >= 0 && curr_ind < curr_node_idx) { 1116 | curr_node_idx = find_leaf(&edges, curr_ind); 1117 | curr_node--; 1118 | if (curr_node < min_node) { 1119 | min_node = curr_node; 1120 | release_locks_no_inc(make_pair(min_node, max_node)); 1121 | return acquire_insert_locks(index, elem, src, ins_node_v, curr_node, tries + 1); 1122 | } 1123 | } 1124 | } 1125 | if (curr_ind == -1) { 1126 | for (auto i = min_node; i <= max_node; i++) { 1127 | edges.node_locks[i]->unlock(); 1128 | } 1129 | return make_pair(make_pair(NEED_GLOBAL_WRITE, NEED_GLOBAL_WRITE), nullptr); 1130 | } 1131 | } 1132 | } 1133 | return make_pair(make_pair(min_node, max_node), info); 1134 | } 1135 | 1136 | // Acquire locks required to remove an edge 1137 | // Returns id of first and last node locked 1138 | // index: where the edge to remove is 1139 | // elem: the edge to remove 1140 | // src: source vertex 1141 | // ins_node_v: the version number of the PCSR node we want to remove from, at the time when binary search happened 1142 | // we use this to verify nothing has changed when we lock it 1143 | // left_node_bound: the leftmost PCSR node to lock from, initially this will be the node where the edge is but 1144 | // during redistribute we might have to lock some extra PCSR nodes to the left so to avoid deadlocks we release the 1145 | // locks we already have and re-start acquiring from the new leftmost PCSR node 1146 | // Added by Eleni Alevra 1147 | pair PCSR::acquire_remove_locks(uint32_t index, edge_t elem, uint32_t src, int ins_node_v, 1148 | uint32_t left_node_bound) { 1149 | int node_index = find_leaf(&edges, index); 1150 | // printf("node_index = %d\n", node_index); 1151 | int level = edges.H; 1152 | int len = edges.logN; 1153 | int node_id = get_node_id(node_index); 1154 | uint32_t min_node = node_id; 1155 | uint32_t max_node = node_id; 1156 | 1157 | // If we have a leftmost PCSR start locking from it 1158 | if (left_node_bound != -1) { 1159 | for (uint32_t i = left_node_bound; i <= node_id; i++) { 1160 | edges.node_locks[i]->lock(); 1161 | // got_locks++; 1162 | } 1163 | min_node = left_node_bound; 1164 | } else { 1165 | edges.node_locks[node_id]->lock(); 1166 | // got_locks++; 1167 | } 1168 | if (!got_correct_insertion_index(edges.items[index], src, index, elem, node_index, node_id, max_node)) { 1169 | release_locks_no_inc(make_pair(min_node, max_node)); 1170 | // retries++; 1171 | return make_pair(NEED_RETRY, NEED_RETRY); 1172 | } 1173 | // We now have the lock for the PCSR node the edge is but things might have moved since binary search so we compare 1174 | // its version number to the one during binary search to see if any changes have happened. If they have we re-start. 1175 | if (edges.node_locks[node_id]->load() != ins_node_v) { 1176 | release_locks_no_inc(make_pair(min_node, max_node)); 1177 | return make_pair(NEED_RETRY, NEED_RETRY); 1178 | } 1179 | if (is_null(edges.items[index].value)) { 1180 | // Edge not found 1181 | release_locks_no_inc(make_pair(min_node, max_node)); 1182 | return make_pair(EDGE_NOT_FOUND, EDGE_NOT_FOUND); 1183 | } else { 1184 | if (is_sentinel(elem) || edges.items[index].dest != elem.dest) { 1185 | // Edge not found 1186 | release_locks_no_inc(make_pair(min_node, max_node)); 1187 | return make_pair(EDGE_NOT_FOUND, EDGE_NOT_FOUND); 1188 | } 1189 | } 1190 | 1191 | // get density of the leaf you are in 1192 | pair_double density_b = density_bound(&edges, level); 1193 | double density = get_density(&edges, node_index, len) - (1.0 / len); 1194 | 1195 | // while density too low, go up the implicit tree 1196 | // go up to the biggest node below the density bound 1197 | while (density < density_b.x) { 1198 | len *= 2; 1199 | if (len <= edges.N) { 1200 | level--; 1201 | uint32_t new_node_idx = find_node(node_index, len); 1202 | int new_node_id = get_node_id(new_node_idx); 1203 | if (new_node_idx < node_index && new_node_id < min_node) { 1204 | release_locks_no_inc(make_pair(min_node, max_node)); 1205 | return acquire_remove_locks(index, elem, src, ins_node_v, new_node_id); 1206 | } 1207 | for (uint32_t i = max_node + 1; i < get_node_id(new_node_idx + len); i++) { 1208 | edges.node_locks[i]->lock(); 1209 | // got_locks++; 1210 | max_node = i; 1211 | } 1212 | node_index = new_node_idx; 1213 | density_b = density_bound(&edges, level); 1214 | density = get_density(&edges, node_index, len) - (1.0 / len); 1215 | } else { 1216 | return make_pair(NEED_GLOBAL_WRITE, NEED_GLOBAL_WRITE); 1217 | } 1218 | } 1219 | 1220 | auto new_node_idx = find_node(node_index, len); 1221 | auto new_node_id = get_node_id(new_node_idx); 1222 | if (new_node_idx < node_index && new_node_id < min_node) { 1223 | release_locks_no_inc(make_pair(min_node, max_node)); 1224 | return acquire_remove_locks(index, elem, src, ins_node_v, new_node_id); 1225 | } 1226 | for (auto i = max_node + 1; i < get_node_id(new_node_idx + len); i++) { 1227 | edges.node_locks[i]->lock(); 1228 | // got_locks++; 1229 | max_node = i; 1230 | } 1231 | return make_pair(min_node, max_node); 1232 | } 1233 | 1234 | // Returns total number of edges in the array 1235 | // Added by Eleni Alevra 1236 | int PCSR::count_total_edges() { 1237 | int t = 0; 1238 | for (size_t i = 0; i < nodes.size(); i++) { 1239 | for (auto j = nodes[i].beginning + 1; j < nodes[i].end; j++) { 1240 | if (!(is_null(edges.items[j].value))) { 1241 | t++; 1242 | } 1243 | } 1244 | } 1245 | return t; 1246 | } 1247 | 1248 | // Used for parallel re-distributing 1249 | // Stores the elements in the range [index, index + len) in array space and returns the redistribution step 1250 | // and the number of elements 1251 | // Added by Eleni Alevra 1252 | pair PCSR::redistr_store(edge_t *space, int index, int len) { 1253 | int j = 0; 1254 | for (auto i = index; i < index + len; i++) { 1255 | space[j] = edges.items[i]; 1256 | j += (!(is_null(edges.items[i].value))); 1257 | edges.items[i].value = 0; 1258 | edges.items[i].dest = 0; 1259 | } 1260 | return make_pair(((double)len) / j, j); 1261 | } 1262 | 1263 | // Added by Eleni Alevra 1264 | PCSR::PCSR(uint32_t init_n, vector *cvs, bool lock_search, int domain) 1265 | : is_numa_available{numa_available() >= 0 && domain >= 0}, domain(domain) { 1266 | resizeEdgeArray(2 << bsr_word(init_n)); 1267 | edges.global_lock = make_shared(); 1268 | 1269 | this->redistr_mutex = new mutex; 1270 | this->redistr_cv = new condition_variable; 1271 | this->redistr_cvs = cvs; 1272 | lock_bsearch = lock_search; 1273 | 1274 | if (is_numa_available) { 1275 | edges.node_locks = (HybridLock **)numa_alloc_onnode((edges.N / edges.logN) * sizeof(HybridLock *), domain); 1276 | checkAllocation(edges.node_locks); 1277 | edges.items = (edge_t *)numa_alloc_onnode(edges.N * sizeof(*(edges.items)), domain); 1278 | checkAllocation(edges.items); 1279 | } else { 1280 | edges.node_locks = (HybridLock **)malloc((edges.N / edges.logN) * sizeof(HybridLock *)); 1281 | edges.items = (edge_t *)malloc(edges.N * sizeof(*(edges.items))); 1282 | } 1283 | for (uint64_t i = 0; i < edges.N; i++) { 1284 | edges.items[i].src = -1; 1285 | edges.items[i].value = 0; 1286 | edges.items[i].dest = 0; 1287 | } 1288 | 1289 | for (uint32_t i = 0; i < edges.N / edges.logN; i++) { 1290 | edges.node_locks[i] = new HybridLock(); 1291 | } 1292 | 1293 | for (uint32_t i = 0; i < init_n; i++) { 1294 | add_node(); 1295 | } 1296 | } 1297 | 1298 | // Returns total number of edges in range [index, index + len) 1299 | // Added by Eleni Alevra 1300 | int PCSR::count_elems(int index, int len) { 1301 | int j = 0; 1302 | for (auto i = index; i < index + len; i++) { 1303 | j += !(is_null(edges.items[i].value)) && !is_sentinel(edges.items[i]); 1304 | } 1305 | return j; 1306 | } 1307 | 1308 | // Returns true if the given edge should be inserted in index 1309 | // Added by Eleni Alevra 1310 | bool PCSR::got_correct_insertion_index(edge_t ins_edge, uint32_t src, uint32_t index, edge_t elem, int node_index, 1311 | int node_id, uint32_t &max_node) { 1312 | // Check that we are in the right neighbourhood 1313 | if (!(is_null(ins_edge.value)) && 1314 | ((is_sentinel(ins_edge) && src != nodes.size() - 1 && ins_edge.src != src + 1) || 1315 | (is_sentinel(ins_edge) && src == nodes.size() - 1 && ins_edge.value != UINT32_MAX) || 1316 | (!is_sentinel(ins_edge) && ins_edge.src != src))) { 1317 | return false; 1318 | } 1319 | // Check that the current edge is larger than the one we want to insert 1320 | if (!(is_null(ins_edge.value)) && !is_sentinel(ins_edge) && ins_edge.dest < elem.dest) { 1321 | return false; 1322 | } 1323 | if (is_null(ins_edge.value)) { 1324 | // The current position is empty so we need to find the next element to the right to make sure it's bigger than the 1325 | // one we want to insert 1326 | int ind = index + 1; 1327 | auto curr_n = node_index; 1328 | if (ind < edges.N && ind >= curr_n + edges.logN) { 1329 | curr_n += edges.logN; 1330 | edges.node_locks[++max_node]->lock(); 1331 | } 1332 | while (ind < edges.N && is_null(edges.items[ind].value)) { 1333 | ind++; 1334 | if (ind < edges.N && ind >= curr_n + edges.logN) { 1335 | curr_n += edges.logN; 1336 | edges.node_locks[++max_node]->lock(); 1337 | } 1338 | } 1339 | 1340 | if (ind < edges.N) { 1341 | edge_t item = edges.items[ind]; 1342 | // if it's in the same neighbourhood and smaller we're in the wrong position 1343 | if (!is_null(item.value) && !is_sentinel(item) && item.src == src && item.dest < elem.dest) { 1344 | return false; 1345 | } 1346 | // if it's a sentinel node for the wrong vertex the index is wrong 1347 | if (!(is_null(item.value)) && is_sentinel(item) && 1348 | ((src != nodes.size() - 1 && item.value != src + 1) || 1349 | (src == nodes.size() - 1 && item.value == UINT32_MAX))) { 1350 | return false; 1351 | } 1352 | } 1353 | } 1354 | // Go to the left to find the next element to the left and make sure it's less than the one we are inserting 1355 | auto ind = index - 1; 1356 | edge_t item; 1357 | item.value = 0; 1358 | item.dest = 0; 1359 | item.src = -1; 1360 | while (ind >= 0 && is_null(edges.items[ind].value)) { 1361 | ind--; 1362 | } 1363 | item = edges.items[ind]; 1364 | if (!is_null(item.value) && !is_sentinel(item) && item.src == src && item.dest >= elem.dest) { 1365 | return false; 1366 | } 1367 | if (!is_null(item.value) && is_sentinel(item) && 1368 | ((src == 0 && item.value != UINT32_MAX) || (src != 0 && item.value != src))) { 1369 | return false; 1370 | } 1371 | return true; 1372 | } 1373 | 1374 | void PCSR::add_edge_parallel(uint32_t src, uint32_t dest, uint32_t value, int retries) { 1375 | if (value != 0 && src < get_n()) { 1376 | edge_t e; 1377 | e.src = src; 1378 | e.dest = dest; 1379 | e.value = value; 1380 | if (retries > 3) { 1381 | const std::lock_guard lck(*edges.global_lock); 1382 | nodes[src].num_neighbors++; 1383 | int pos = binary_search(&e, nodes[src].beginning + 1, nodes[src].end, false).first; 1384 | insert(pos, e, src, nullptr); 1385 | return; 1386 | } 1387 | 1388 | edges.global_lock->lock_shared(); 1389 | auto beginning = nodes[src].beginning; 1390 | auto end = nodes[src].end; 1391 | uint32_t first_node = get_node_id(find_leaf(&edges, beginning + 1)); 1392 | nodes[src].num_neighbors++; 1393 | pair bs; 1394 | uint32_t loc_to_add; 1395 | if (lock_bsearch) { 1396 | uint32_t last_node = get_node_id(find_leaf(&edges, end)); 1397 | // Lock for binary search 1398 | for (uint32_t i = first_node; i <= last_node; i++) { 1399 | edges.node_locks[i]->lock_shared(); 1400 | } 1401 | // If after we have locked there have been more edges added, re-start to include them in the search 1402 | if (nodes[src].beginning != beginning || nodes[src].end != end) { 1403 | for (auto i = first_node; i <= last_node; i++) { 1404 | edges.node_locks[i]->unlock_shared(); 1405 | } 1406 | edges.global_lock->unlock_shared(); 1407 | nodes[src].num_neighbors--; 1408 | add_edge_parallel(src, dest, value, retries + 1); 1409 | return; 1410 | } 1411 | bs = binary_search(&e, beginning + 1, end, true); 1412 | loc_to_add = bs.first; 1413 | } else { 1414 | // get back index where the new edge should go and the version number of its PCSR node when we read its value 1415 | bs = binary_search(&e, beginning + 1, end, false); 1416 | loc_to_add = bs.first; 1417 | uint32_t index_node = get_node_id(find_leaf(&edges, loc_to_add)); 1418 | if (index_node < first_node) { 1419 | nodes[src].num_neighbors--; 1420 | edges.global_lock->unlock_shared(); 1421 | add_edge_parallel(src, dest, value, retries + 1); 1422 | return; 1423 | } 1424 | } 1425 | pair, insertion_info_t *> acquired_locks = 1426 | acquire_insert_locks(loc_to_add, e, src, bs.second, -1, 0); 1427 | if (acquired_locks.first.first == NEED_RETRY) { 1428 | nodes[src].num_neighbors--; 1429 | edges.global_lock->unlock_shared(); 1430 | add_edge_parallel(src, dest, value, retries + 1); 1431 | return; 1432 | } 1433 | if (acquired_locks.first.first == NEED_GLOBAL_WRITE) { 1434 | edges.global_lock->unlock_shared(); 1435 | const std::lock_guard lck(*edges.global_lock); 1436 | loc_to_add = binary_search(&e, nodes[src].beginning + 1, nodes[src].end, false).first; 1437 | insert(loc_to_add, e, src, acquired_locks.second); 1438 | } else { 1439 | insert(loc_to_add, e, src, acquired_locks.second); 1440 | release_locks(acquired_locks.first); 1441 | edges.global_lock->unlock_shared(); 1442 | } 1443 | free(acquired_locks.second); 1444 | } 1445 | } 1446 | 1447 | void PCSR::insert_nodes_and_edges_front(std::vector new_nodes, std::vector new_edges) { 1448 | (void)new_nodes; 1449 | (void)new_edges; 1450 | } 1451 | 1452 | void PCSR::insert_nodes_and_edges_back(std::vector new_nodes, std::vector new_edges) { 1453 | (void)new_nodes; 1454 | (void)new_edges; 1455 | } 1456 | 1457 | std::pair, std::vector> PCSR::remove_nodes_and_edges_front(int num_nodes) { 1458 | (void)num_nodes; 1459 | std::vector exported_nodes; 1460 | std::vector exported_edges; 1461 | return make_pair(exported_nodes, exported_edges); 1462 | } 1463 | std::pair, std::vector> PCSR::remove_nodes_and_edges_back(int num_nodes) { 1464 | (void)num_nodes; 1465 | std::vector exported_nodes; 1466 | std::vector exported_edges; 1467 | return make_pair(exported_nodes, exported_edges); 1468 | } -------------------------------------------------------------------------------- /src/pcsr/PCSR.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Created by Eleni Alevra on 02/06/2020. 3 | * modified by Christian Menges 4 | */ 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "hybridLock.h" 12 | 13 | using namespace std; 14 | #ifndef PCSR2_PCSR_H 15 | #define PCSR2_PCSR_H 16 | 17 | /** Types */ 18 | typedef struct _node { 19 | // beginning and end of the associated region in the edge list 20 | uint32_t beginning; // deleted = max int 21 | uint32_t end; // end pointer is exclusive 22 | uint32_t num_neighbors; // number of edges with this node as source 23 | } node_t; 24 | 25 | // each node has an associated sentinel (max_int, offset) that gets back to its 26 | // offset into the node array 27 | // UINT32_MAX 28 | // 29 | // if value == UINT32_MAX, read it as null. 30 | typedef struct _edge { 31 | uint32_t src; 32 | uint32_t dest; // destination of this edge in the graph, MAX_INT if this is a 33 | // sentinel 34 | uint32_t value; // edge value of zero means it a null since we don't store 0 edges 35 | } edge_t; 36 | 37 | typedef struct edge_list { 38 | uint64_t N; 39 | int H; 40 | int logN; 41 | shared_ptr global_lock; 42 | HybridLock **node_locks; // locks for every PCSR leaf node 43 | edge_t *items; 44 | } edge_list_t; 45 | 46 | // When we acquire locks to insert we have to make checks to see up to which position we will redistribute 47 | // during the insert 48 | // To avoid repeating this check during the actual insertion we pass this struct to it so it can immediately know 49 | // up to where it needs to redistribute 50 | typedef struct insertion_info { 51 | uint32_t first_empty; // first empty spot for slide 52 | int max_len; // len to redistribute up to 53 | int node_index_final; // final node index for redistr 54 | bool double_list; // double_list during redistr 55 | } insertion_info_t; 56 | 57 | template 58 | constexpr bool is_null(T val) { 59 | return val == 0; 60 | } 61 | 62 | enum SpecialCases { NEED_GLOBAL_WRITE = -1, NEED_RETRY = -2, EDGE_NOT_FOUND = -3 }; 63 | 64 | class PCSR { 65 | public: 66 | // data members 67 | edge_list_t edges; 68 | 69 | PCSR(uint32_t init_n, uint32_t, bool lock_search, int domain = 0); 70 | PCSR(uint32_t init_n, vector *cvs, bool search_lock, int domain = 0); 71 | ~PCSR(); 72 | /** Public API */ 73 | bool edge_exists(uint32_t src, uint32_t dest); 74 | void add_node(); 75 | void add_edge(uint32_t src, uint32_t dest, uint32_t value); 76 | void remove_edge(uint32_t src, uint32_t dest); 77 | void read_neighbourhood(int src); 78 | vector get_neighbourhood(int src) const; 79 | 80 | /** 81 | * Returns the node count 82 | * @return node count 83 | */ 84 | uint64_t get_n() const; 85 | 86 | /** 87 | * inserts nodes and edges at the front ot the data structure 88 | * @param nodes 89 | * @param new_items 90 | */ 91 | void insert_nodes_and_edges_front(std::vector nodes, std::vector new_edges); 92 | 93 | /** 94 | * inserts nodes and edges at the end ot the data structure 95 | * @param nodes 96 | * @param new_items 97 | */ 98 | void insert_nodes_and_edges_back(std::vector nodes, std::vector new_edges); 99 | 100 | /** 101 | * removes nodes and edges at the front ot the data structure 102 | * @param num_nodes #nodes to remove 103 | * @return removed nodes and edges 104 | */ 105 | std::pair, std::vector> remove_nodes_and_edges_front(int num_nodes); 106 | 107 | /** 108 | * removes nodes and edges at the end ot the data structure 109 | * @param num_nodes 110 | * @return removed nodes and edges 111 | */ 112 | std::pair, std::vector> remove_nodes_and_edges_back(int num_nodes); 113 | 114 | /** 115 | * Returns a ref. to the node with the given id 116 | * @return ref. to node 117 | */ 118 | node_t &getNode(int id) { return nodes[id]; } 119 | 120 | /** 121 | * Returns a const ref. to the node with the given id 122 | * @return const ref. to node 123 | */ 124 | const node_t &getNode(int id) const { return nodes[id]; } 125 | 126 | private: 127 | // data members 128 | std::vector nodes; 129 | bool lock_bsearch = false; // true if we lock during binary search 130 | 131 | // members used when parallel redistributing is enabled 132 | bool adding_sentinels = false; // true if we are in the middle of inserting a sentinel node 133 | mutex *redistr_mutex; // for synchronisation with the redistributing worker threads 134 | condition_variable *redistr_cv; // for synchronisation with the redistributing worker threads 135 | vector *redistr_locks; // for synchronisation with the redistributing worker threads 136 | vector *redistr_cvs; // for synchronisation with the redistributing worker threads 137 | 138 | void redistribute(int index, int len); 139 | bool got_correct_insertion_index(edge_t ins_edge, uint32_t src, uint32_t index, edge_t elem, int node_index, 140 | int node_id, uint32_t &max_node); 141 | pair, insertion_info_t *> acquire_insert_locks(uint32_t index, edge_t elem, uint32_t src, 142 | int ins_node_v, uint32_t left_node_bound, int tries); 143 | pair acquire_remove_locks(uint32_t index, edge_t elem, uint32_t src, int ins_node_v, 144 | uint32_t left_node_bound); 145 | void release_locks(pair acquired_locks); 146 | void release_locks_no_inc(pair acquired_locks); 147 | uint32_t find_value(uint32_t src, uint32_t dest); 148 | vector sparse_matrix_vector_multiplication(std::vector const &v); 149 | void double_list(); 150 | void half_list(); 151 | int slide_right(int index, uint32_t src); 152 | void slide_left(int index, uint32_t src); 153 | void add_edge_parallel(uint32_t src, uint32_t dest, uint32_t value, int retries); 154 | void insert(uint32_t index, edge_t elem, uint32_t src, insertion_info_t *info); 155 | void remove(uint32_t index, const edge_t &elem, uint32_t src); 156 | uint32_t get_node_id(uint32_t node_index) const; 157 | void print_array(); 158 | void print_graph(int); 159 | pair redistr_store(edge_t *space, int index, int len); 160 | void fix_sentinel(const edge_t &sentinel, int in); 161 | pair binary_search(edge_t *elem, uint32_t start, uint32_t end, bool unlock); 162 | void resizeEdgeArray(size_t newSize); 163 | 164 | /** 165 | * Returns total number of edges in range [index, index + len) 166 | * @param index start index 167 | * @param len range length 168 | * @return #edges in range 169 | */ 170 | int count_elems(int index, int len); 171 | /** 172 | * Returns true if every neighbourhood is sorted 173 | * @return sorted 174 | */ 175 | bool is_sorted() const; 176 | /** 177 | * Returns the total number of stored edges 178 | * @return #edges 179 | */ 180 | int count_total_edges(); 181 | /** 182 | * Return the memory footprint of this data structure in byte 183 | * @return memory footprint in byte 184 | */ 185 | uint64_t get_size(); 186 | 187 | /** 188 | * Returns all stored edges 189 | * @return [{node_id, dest_id, edge_value}] 190 | */ 191 | vector> get_edges(); 192 | 193 | /** 194 | * Deletes all edges. The data structure is invalid afterwards 195 | */ 196 | void clear(); 197 | 198 | void nodes_unlock_shared(bool unlock, int start_node, int end_node); 199 | 200 | const bool is_numa_available; 201 | int domain; 202 | }; 203 | 204 | #endif // PCSR2_PCSR_H 205 | -------------------------------------------------------------------------------- /src/pppcsr/PPPCSR.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file PPPCSR.cpp 3 | * @author Christian Menges 4 | */ 5 | 6 | #include "PPPCSR.h" 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | PPPCSR::PPPCSR(uint32_t init_n, uint32_t src_n, bool lock_search, int numDomain, int partitionsPerDomain, bool use_numa) 14 | : partitionsPerDomain(partitionsPerDomain) { 15 | std::size_t numDomains = numDomain; 16 | 17 | partitions.reserve(numDomains * partitionsPerDomain); 18 | distribution.reserve(numDomains * partitionsPerDomain); 19 | distribution.push_back(0); 20 | size_t partitionSize = std::ceil(init_n / (numDomains * partitionsPerDomain)); 21 | 22 | for (std::size_t i = 0; i < numDomains; i++) { 23 | for (std::size_t p = 0; p < partitionsPerDomain; p++) { 24 | if (i > 0 || p > 0) { 25 | distribution.push_back(distribution.back() + partitionSize); 26 | } 27 | if (i == numDomains - 1 && p == partitionsPerDomain - 1) { 28 | partitionSize = init_n - ((i * partitionsPerDomain) + p) * partitionSize; 29 | } 30 | partitions.emplace_back(partitionSize, partitionSize, lock_search, (use_numa) ? i : -1); 31 | } 32 | } 33 | cout << "Number of partitions: " << partitions.size() << std::endl; 34 | } 35 | 36 | bool PPPCSR::edge_exists(uint32_t src, uint32_t dest) { 37 | return partitions[get_partiton(src)].edge_exists(src - distribution[get_partiton(src)], dest); 38 | } 39 | 40 | vector PPPCSR::get_neighbourhood(int src) const { 41 | return partitions[get_partiton(src)].get_neighbourhood(src - distribution[get_partiton(src)]); 42 | } 43 | 44 | void PPPCSR::add_node() { partitions.back().add_node(); } 45 | 46 | void PPPCSR::add_edge(uint32_t src, uint32_t dest, uint32_t value) { 47 | partitions[get_partiton(src)].add_edge(src - distribution[get_partiton(src)], dest, value); 48 | } 49 | 50 | void PPPCSR::remove_edge(uint32_t src, uint32_t dest) { 51 | partitions[get_partiton(src)].remove_edge(src - distribution[get_partiton(src)], dest); 52 | } 53 | 54 | void PPPCSR::read_neighbourhood(int src) { 55 | partitions[get_partiton(src)].read_neighbourhood(src - distribution[get_partiton(src)]); 56 | } 57 | 58 | std::size_t PPPCSR::get_partiton(size_t vertex_id) const { 59 | for (std::size_t i = 1; i < distribution.size(); i++) { 60 | if (distribution[i] > vertex_id) { 61 | return i - 1; 62 | } 63 | } 64 | // Return last partition 65 | return distribution.size() - 1; 66 | } 67 | 68 | uint64_t PPPCSR::get_n() { 69 | uint64_t n = 0; 70 | for (int i = 0; i < partitions.size(); i++) { 71 | n += partitions[i].get_n(); 72 | } 73 | return n; 74 | } 75 | 76 | node_t &PPPCSR::getNode(int id) { return partitions[get_partiton(id)].getNode(id - distribution[get_partiton(id)]); } 77 | 78 | const node_t &PPPCSR::getNode(int id) const { 79 | return partitions[get_partiton(id)].getNode(id - distribution[get_partiton(id)]); 80 | } 81 | -------------------------------------------------------------------------------- /src/pppcsr/PPPCSR.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file PPPCSR.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #include "../pcsr/PCSR.h" 7 | 8 | #ifndef PPPCSR_H 9 | #define PPPCSR_H 10 | 11 | class PPPCSR { 12 | public: 13 | // data members 14 | edge_list_t edges; 15 | 16 | PPPCSR(uint32_t init_n, uint32_t, bool lock_search, int numDomain, int partitionsPerDomain, bool use_numa); 17 | // PPPCSR(uint32_t init_n, vector *cvs, bool search_lock); 18 | // ~PPPCSR(); 19 | /** Public API */ 20 | bool edge_exists(uint32_t src, uint32_t dest); 21 | void add_node(); 22 | void add_edge(uint32_t src, uint32_t dest, uint32_t value); 23 | void remove_edge(uint32_t src, uint32_t dest); 24 | void read_neighbourhood(int src); 25 | 26 | std::size_t get_partiton(size_t vertex_id) const; 27 | 28 | vector get_neighbourhood(int src) const; 29 | 30 | /** 31 | * Returns the node count 32 | * @return node count 33 | */ 34 | uint64_t get_n(); 35 | 36 | /** 37 | * Returns a ref. to the node with the given id 38 | * @return ref. to node 39 | */ 40 | node_t &getNode(int id); 41 | 42 | /** 43 | * Returns a const ref. to the node with the given id 44 | * @return const ref. to node 45 | */ 46 | const node_t &getNode(int id) const; 47 | 48 | void registerThread(int par) { partitions[par].edges.global_lock->registerThread(); } 49 | 50 | void unregisterThread(int par) { partitions[par].edges.global_lock->unregisterThread(); } 51 | 52 | private: 53 | /// different partitions 54 | std::vector partitions; 55 | 56 | /// start index vertices in the partitions 57 | std::vector distribution; 58 | 59 | int partitionsPerDomain; 60 | }; 61 | 62 | #endif // PPPCSR_H 63 | -------------------------------------------------------------------------------- /src/thread_pool/thread_pool.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by Eleni Alevra on 29/03/2020. 3 | * modified by Christian Menges 4 | */ 5 | 6 | #include "thread_pool.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | using namespace std; 15 | 16 | /** 17 | * Initializes a pool of threads. Every thread has its own task queue. 18 | */ 19 | ThreadPool::ThreadPool(const int NUM_OF_THREADS, bool lock_search, uint32_t init_num_nodes, int partitions_per_domain) 20 | : finished(false) { 21 | tasks.resize(NUM_OF_THREADS); 22 | pcsr = new PCSR(init_num_nodes, init_num_nodes, lock_search, -1); 23 | } 24 | 25 | // Function executed by worker threads 26 | // Does insertions, deletions and reads on the PCSR 27 | // Finishes when finished is set to true and there are no outstanding tasks 28 | template 29 | void ThreadPool::execute(int thread_id) { 30 | cout << "Thread " << thread_id << " has " << tasks[thread_id].size() << " tasks" << endl; 31 | 32 | int registered = -1; 33 | 34 | while (!tasks[thread_id].empty() || (!isMasterThread && !finished)) { 35 | if (!tasks[thread_id].empty()) { 36 | task t = tasks[thread_id].front(); 37 | tasks[thread_id].pop(); 38 | 39 | if (registered == -1) { 40 | pcsr->edges.global_lock->registerThread(); 41 | registered = 0; 42 | } 43 | if (t.add) { 44 | pcsr->add_edge(t.src, t.target, 1); 45 | } else if (!t.read) { 46 | pcsr->remove_edge(t.src, t.target); 47 | } else { 48 | pcsr->read_neighbourhood(t.src); 49 | } 50 | 51 | } else { 52 | if (registered != -1) { 53 | pcsr->edges.global_lock->unregisterThread(); 54 | registered = -1; 55 | } 56 | } 57 | } 58 | if (registered != -1) { 59 | pcsr->edges.global_lock->unregisterThread(); 60 | } 61 | } 62 | 63 | // Submit an update for edge {src, target} to thread with number thread_id 64 | void ThreadPool::submit_add(int thread_id, int src, int target) { 65 | tasks[thread_id].push(task{true, false, src, target}); 66 | } 67 | 68 | // Submit a delete edge task for edge {src, target} to thread with number thread_id 69 | void ThreadPool::submit_delete(int thread_id, int src, int target) { 70 | tasks[thread_id].push(task{false, false, src, target}); 71 | } 72 | 73 | // Submit a read neighbourhood task for vertex src to thread with number thread_id 74 | void ThreadPool::submit_read(int thread_id, int src) { tasks[thread_id].push(task{false, true, src, src}); } 75 | 76 | // starts a new number of threads 77 | // number of threads is passed to the constructor 78 | void ThreadPool::start(int threads) { 79 | s = chrono::steady_clock::now(); 80 | finished = false; 81 | 82 | for (int i = 1; i < threads; i++) { 83 | thread_pool.push_back(thread(&ThreadPool::execute, this, i)); 84 | // Pin thread to core 85 | // cpu_set_t cpuset; 86 | // CPU_ZERO(&cpuset); 87 | // CPU_SET((i * 4), &cpuset); 88 | // if (i >= 4) { 89 | // CPU_SET(1 + (i * 4), &cpuset); 90 | // } else { 91 | // CPU_SET(i * 4, &cpuset); 92 | // } 93 | // int rc = pthread_setaffinity_np(thread_pool.back().native_handle(), 94 | // sizeof(cpu_set_t), &cpuset); 95 | // if (rc != 0) { 96 | // cout << "error pinning thread" << endl; 97 | // } 98 | } 99 | execute(0); 100 | } 101 | 102 | // Stops currently running worker threads without redistributing worker threads 103 | // start() can still be used after this is called to start a new set of threads operating on the same pcsr 104 | void ThreadPool::stop() { 105 | finished = true; 106 | for (auto &&t : thread_pool) { 107 | if (t.joinable()) t.join(); 108 | cout << "Done" << endl; 109 | } 110 | end = chrono::steady_clock::now(); 111 | cout << "Elapsed wall clock time: " << chrono::duration_cast(end - s).count() << endl; 112 | thread_pool.clear(); 113 | } 114 | -------------------------------------------------------------------------------- /src/thread_pool/thread_pool.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by Eleni Alevra on 02/06/2020. 3 | * modified by Christian Menges 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../pcsr/PCSR.h" 11 | #include "task.h" 12 | 13 | using namespace std; 14 | #ifndef PCSR2_THREAD_POOL_H 15 | #define PCSR2_THREAD_POOL_H 16 | 17 | class ThreadPool { 18 | public: 19 | PCSR *pcsr; 20 | 21 | explicit ThreadPool(const int NUM_OF_THREADS, bool lock_search, uint32_t init_num_nodes, int partitions_per_domain); 22 | ~ThreadPool() = default; 23 | 24 | /** Public API */ 25 | void submit_add(int thread_id, int src, int dest); // submit task to thread {thread_id} to insert edge {src, dest} 26 | void submit_delete(int thread_id, int src, int dest); // submit task to thread {thread_id} to delete edge {src, dest} 27 | void submit_read(int, int); // submit task to thread {thread_id} to read the neighbourhood of vertex {src} 28 | void start(int threads); // start the threads 29 | void stop(); // stop the threads 30 | 31 | private: 32 | vector thread_pool; 33 | vector> tasks; 34 | chrono::steady_clock::time_point s; 35 | chrono::steady_clock::time_point end; 36 | std::atomic_bool finished; 37 | 38 | template 39 | void execute(int); 40 | }; 41 | 42 | #endif // PCSR2_THREAD_POOL_H 43 | -------------------------------------------------------------------------------- /src/thread_pool_pppcsr/thread_pool_pppcsr.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file thread_pool_pppcsr.cpp 3 | * @author Christian Menges 4 | */ 5 | 6 | #include "thread_pool_pppcsr.h" 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | /** 18 | * Initializes a pool of threads. Every thread has its own task queue. 19 | */ 20 | ThreadPoolPPPCSR::ThreadPoolPPPCSR(const int NUM_OF_THREADS, bool lock_search, uint32_t init_num_nodes, 21 | int partitions_per_domain, bool use_numa) 22 | : tasks(NUM_OF_THREADS), 23 | finished(false), 24 | available_nodes(std::min(numa_max_node() + 1, NUM_OF_THREADS)), 25 | indeces(available_nodes, 0), 26 | partitions_per_domain(partitions_per_domain), 27 | threadToDomain(NUM_OF_THREADS), 28 | firstThreadDomain(available_nodes, 0), 29 | numThreadsDomain(available_nodes) { 30 | pcsr = new PPPCSR(init_num_nodes, init_num_nodes, lock_search, available_nodes, partitions_per_domain, use_numa); 31 | 32 | int d = available_nodes; 33 | int minNumThreads = NUM_OF_THREADS / d; 34 | int threshold = NUM_OF_THREADS % d; 35 | int counter = 0; 36 | int currentDomain = 0; 37 | 38 | for (int i = 0; i < NUM_OF_THREADS; i++) { 39 | threadToDomain[i] = currentDomain; 40 | counter++; 41 | if (counter == minNumThreads + (currentDomain < threshold)) { 42 | numThreadsDomain[currentDomain] = counter; 43 | firstThreadDomain[currentDomain] = i - counter + 1; 44 | counter = 0; 45 | currentDomain++; 46 | } 47 | } 48 | } 49 | 50 | // Function executed by worker threads 51 | // Does insertions, deletions and reads on the PCSR 52 | // Finishes when finished is set to true and there are no outstanding tasks 53 | template 54 | void ThreadPoolPPPCSR::execute(const int thread_id) { 55 | cout << "Thread " << thread_id << " has " << tasks[thread_id].size() << " tasks, runs on domain " 56 | << threadToDomain[thread_id] << endl; 57 | if (numa_available() >= 0) { 58 | numa_run_on_node(threadToDomain[thread_id]); 59 | } 60 | int registered = -1; 61 | 62 | while (!tasks[thread_id].empty() || (!isMasterThread && !finished)) { 63 | if (!tasks[thread_id].empty()) { 64 | task t = tasks[thread_id].front(); 65 | tasks[thread_id].pop(); 66 | 67 | int currentPar = pcsr->get_partiton(t.src); 68 | 69 | if (registered != currentPar) { 70 | if (registered != -1) { 71 | pcsr->unregisterThread(registered); 72 | } 73 | pcsr->registerThread(currentPar); 74 | registered = currentPar; 75 | } 76 | if (t.add) { 77 | pcsr->add_edge(t.src, t.target, 1); 78 | } else if (!t.read) { 79 | pcsr->remove_edge(t.src, t.target); 80 | } else { 81 | pcsr->read_neighbourhood(t.src); 82 | } 83 | } else { 84 | if (registered != -1) { 85 | pcsr->unregisterThread(registered); 86 | registered = -1; 87 | } 88 | } 89 | } 90 | if (registered != -1) { 91 | pcsr->unregisterThread(registered); 92 | } 93 | } 94 | 95 | // Submit an update for edge {src, target} to thread with number thread_id 96 | void ThreadPoolPPPCSR::submit_add(int thread_id, int src, int target) { 97 | (void)thread_id; 98 | auto par = pcsr->get_partiton(src) / partitions_per_domain; 99 | auto index = (indeces[par]++) % numThreadsDomain[par]; 100 | tasks[firstThreadDomain[par] + index].push(task{true, false, src, target}); 101 | } 102 | 103 | // Submit a delete edge task for edge {src, target} to thread with number thread_id 104 | void ThreadPoolPPPCSR::submit_delete(int thread_id, int src, int target) { 105 | (void)thread_id; 106 | auto par = pcsr->get_partiton(src) / partitions_per_domain; 107 | auto index = (indeces[par]++) % numThreadsDomain[par]; 108 | tasks[firstThreadDomain[par] + index].push(task{false, false, src, target}); 109 | } 110 | 111 | // Submit a read neighbourhood task for vertex src to thread with number thread_id 112 | void ThreadPoolPPPCSR::submit_read(int thread_id, int src) { 113 | (void)thread_id; 114 | auto par = pcsr->get_partiton(src) / partitions_per_domain; 115 | auto index = (indeces[par]++) % numThreadsDomain[par]; 116 | tasks[firstThreadDomain[par] + index].push(task{false, true, src, src}); 117 | } 118 | 119 | // starts a new number of threads 120 | // number of threads is passed to the constructor 121 | void ThreadPoolPPPCSR::start(int threads) { 122 | s = chrono::steady_clock::now(); 123 | finished = false; 124 | 125 | for (int i = 1; i < threads; i++) { 126 | thread_pool.push_back(thread(&ThreadPoolPPPCSR::execute, this, i)); 127 | // Pin thread to core 128 | // cpu_set_t cpuset; 129 | // CPU_ZERO(&cpuset); 130 | // CPU_SET((i * 4), &cpuset); 131 | // if (i >= 4) { 132 | // CPU_SET(1 + (i * 4), &cpuset); 133 | // } else { 134 | // CPU_SET(i * 4, &cpuset); 135 | // } 136 | // int rc = pthread_setaffinity_np(thread_pool.back().native_handle(), 137 | // sizeof(cpu_set_t), &cpuset); 138 | // if (rc != 0) { 139 | // cout << "error pinning thread" << endl; 140 | // } 141 | } 142 | execute(0); 143 | } 144 | 145 | // Stops currently running worker threads without redistributing worker threads 146 | // start() can still be used after this is called to start a new set of threads operating on the same pcsr 147 | void ThreadPoolPPPCSR::stop() { 148 | finished = true; 149 | for (auto &&t : thread_pool) { 150 | if (t.joinable()) t.join(); 151 | cout << "Done" << endl; 152 | } 153 | end = chrono::steady_clock::now(); 154 | cout << "Elapsed wall clock time: " << chrono::duration_cast(end - s).count() << endl; 155 | thread_pool.clear(); 156 | } 157 | -------------------------------------------------------------------------------- /src/thread_pool_pppcsr/thread_pool_pppcsr.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file thread_pool_pppcsr.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../pppcsr/PPPCSR.h" 11 | #include "task.h" 12 | 13 | using namespace std; 14 | #ifndef PPPCSR_THREAD_POOL_H 15 | #define PPPCSR_THREAD_POOL_H 16 | 17 | class ThreadPoolPPPCSR { 18 | public: 19 | PPPCSR *pcsr; 20 | 21 | explicit ThreadPoolPPPCSR(const int NUM_OF_THREADS, bool lock_search, uint32_t init_num_nodes, 22 | int partitions_per_domain, bool use_numa); 23 | ~ThreadPoolPPPCSR() = default; 24 | /** Public API */ 25 | void submit_add(int thread_id, int src, int dest); // submit task to thread {thread_id} to insert edge {src, dest} 26 | void submit_delete(int thread_id, int src, int dest); // submit task to thread {thread_id} to delete edge {src, dest} 27 | void submit_read(int, int); // submit task to thread {thread_id} to read the neighbourhood of vertex {src} 28 | void start(int threads); // start the threads 29 | void stop(); // stop the threads 30 | 31 | private: 32 | vector thread_pool; 33 | vector> tasks; 34 | chrono::steady_clock::time_point s; 35 | chrono::steady_clock::time_point end; 36 | std::atomic_bool finished; 37 | 38 | template 39 | void execute(int); 40 | 41 | const int available_nodes; 42 | std::vector indeces; 43 | int partitions_per_domain = 1; 44 | std::vector threadToDomain; 45 | std::vector firstThreadDomain; 46 | std::vector numThreadsDomain; 47 | }; 48 | 49 | #endif // PPPCSR_THREAD_POOL_H 50 | -------------------------------------------------------------------------------- /src/utility/bfs.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file bfs.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | #ifndef PARALLEL_PACKED_CSR_BFS_H 13 | #define PARALLEL_PACKED_CSR_BFS_H 14 | 15 | template 16 | vector bfs(T &graph, uint32_t start_node) { 17 | uint64_t n = graph.get_n(); 18 | vector out(n, UINT32_MAX); 19 | queue next; 20 | next.push(start_node); 21 | out[start_node] = 0; 22 | 23 | while (!next.empty()) { 24 | uint32_t active = next.front(); 25 | next.pop(); 26 | 27 | // get neighbors 28 | for (const int neighbour : graph.get_neighbourhood(active)) { 29 | if (out[neighbour] == UINT32_MAX) { 30 | next.push(neighbour); 31 | out[neighbour] = out[active] + 1; 32 | } 33 | } 34 | } 35 | return out; 36 | } 37 | 38 | #endif // PARALLEL_PACKED_CSR_BFS_H 39 | -------------------------------------------------------------------------------- /src/utility/fastLock.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file fastLock.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #ifndef PARALLEL_PACKED_CSR_FASTLOCK_H 7 | #define PARALLEL_PACKED_CSR_FASTLOCK_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class FastLock { 15 | using lock_type = std::shared_timed_mutex; 16 | 17 | public: 18 | FastLock() : lockRequested{false}, thread_counter{0}, arrived_threads{0} {} 19 | 20 | ~FastLock() = default; 21 | 22 | FastLock(const FastLock &) = delete; 23 | 24 | FastLock &operator=(const FastLock &) = delete; 25 | 26 | void lock() { 27 | arrived_threads.fetch_add(1); 28 | mtx.lock(); 29 | lockRequested = true; 30 | while (arrived_threads.load() < thread_counter.load()) { 31 | std::this_thread::yield(); 32 | } 33 | arrived_threads.fetch_sub(1); 34 | } 35 | 36 | void unlock() { 37 | lockRequested.store(false); 38 | mtx.unlock(); 39 | } 40 | 41 | void registerThread() { thread_counter.fetch_add(1); } 42 | 43 | void unregisterThread() { thread_counter.fetch_sub(1); } 44 | 45 | void lock_shared() { 46 | if (lockRequested) { 47 | arrived_threads.fetch_add(1); 48 | std::shared_lock lck(mtx); 49 | arrived_threads.fetch_sub(1); 50 | } 51 | } 52 | 53 | void unlock_shared() { lock_shared(); } 54 | 55 | bool lockable() { return true; } 56 | 57 | private: 58 | lock_type mtx; 59 | std::atomic lockRequested; 60 | std::atomic thread_counter; 61 | std::atomic arrived_threads; 62 | }; 63 | 64 | #endif // PARALLEL_PACKED_CSR_FASTLOCK_H 65 | -------------------------------------------------------------------------------- /src/utility/hybridLock.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file hybridLock.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #ifndef PARALLEL_PACKED_CSR_HYBRIDLOCK_H 7 | #define PARALLEL_PACKED_CSR_HYBRIDLOCK_H 8 | 9 | #include 10 | #include 11 | 12 | class HybridLock { 13 | public: 14 | HybridLock() : version_counter{0} {} 15 | ~HybridLock() = default; 16 | 17 | HybridLock(const HybridLock &) = delete; 18 | HybridLock &operator=(const HybridLock &) = delete; 19 | 20 | inline HybridLock &operator++() { 21 | version_counter++; 22 | return *this; 23 | } 24 | 25 | inline HybridLock &operator--() { 26 | version_counter--; 27 | return *this; 28 | } 29 | 30 | inline void lock() { mtx.lock(); } 31 | inline void unlock() { mtx.unlock(); } 32 | 33 | inline void lock_shared() { mtx.lock_shared(); } 34 | inline void unlock_shared() { mtx.unlock_shared(); } 35 | 36 | inline int load() const { return version_counter.load(); } 37 | 38 | bool lockable() { 39 | auto r = mtx.try_lock(); 40 | if (r) { 41 | mtx.unlock(); 42 | } 43 | return r; 44 | } 45 | 46 | private: 47 | std::shared_timed_mutex mtx; 48 | std::atomic version_counter; 49 | }; 50 | 51 | #endif // PARALLEL_PACKED_CSR_HYBRIDLOCK_H 52 | -------------------------------------------------------------------------------- /src/utility/pagerank.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file pagerank.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | #ifndef PARALLEL_PACKED_CSR_PAGERANK_H 13 | #define PARALLEL_PACKED_CSR_PAGERANK_H 14 | 15 | template 16 | vector pagerank(T &graph, std::vector const &node_values) { 17 | auto n = graph.get_n(); 18 | 19 | vector output(n, 0); 20 | for (uint64_t i = 0; i < n; i++) { 21 | const weight_t contrib = (node_values[i] / graph.getNode(i).num_neighbors); 22 | 23 | // get neighbors 24 | for (const int neighbour : graph.get_neighbourhood(i)) { 25 | output[neighbour] += contrib; 26 | } 27 | } 28 | return output; 29 | } 30 | 31 | #endif // PARALLEL_PACKED_CSR_PAGERANK_H 32 | -------------------------------------------------------------------------------- /src/utility/task.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file task.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #ifndef PARALLEL_PACKED_CSR_TASK_H 7 | #define PARALLEL_PACKED_CSR_TASK_H 8 | 9 | /** Struct for tasks to the threads */ 10 | struct task { 11 | bool add; // True if this is an add task. If this is false it means it's a delete. 12 | bool read; // True if this is a read task. 13 | int src; // Source vertex for this task's edge 14 | int target; // Target vertex for this task's edge 15 | }; 16 | 17 | #endif // PARALLEL_PACKED_CSR_TASK_H 18 | -------------------------------------------------------------------------------- /test/DataStructureTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file DataStructureTest.cpp 3 | * @author Christian Menges 4 | */ 5 | #include "DataStructureTest.h" 6 | #include "PPPCSR.h" 7 | #include "bfs.h" 8 | #include "pagerank.h" 9 | 10 | using ::testing::Bool; 11 | 12 | TEST_P(DataStructureTest, Initialization) { 13 | const int size = 10; 14 | PPPCSR pcsr(size, size, GetParam(), 1, 1, false); 15 | EXPECT_EQ(pcsr.get_n(), size); 16 | } 17 | 18 | TEST_P(DataStructureTest, add_node) { 19 | PPPCSR pcsr(0, 0, GetParam(), 1, 1, false); 20 | EXPECT_EQ(pcsr.get_n(), 0); 21 | pcsr.add_node(); 22 | EXPECT_EQ(pcsr.get_n(), 1); 23 | EXPECT_EQ(pcsr.get_neighbourhood(0).size(), 0); 24 | } 25 | 26 | TEST_P(DataStructureTest, add_edge) { 27 | PPPCSR pcsr(10, 10, GetParam(), 1, 1, false); 28 | // Try to add edge without corresponding node 29 | pcsr.add_edge(11, 1, 1); 30 | 31 | pcsr.add_edge(0, 1, 1); 32 | EXPECT_TRUE(pcsr.edge_exists(0, 1)); 33 | EXPECT_EQ(pcsr.get_neighbourhood(0).size(), 1); 34 | EXPECT_EQ(pcsr.get_n(), 10); 35 | EXPECT_EQ(pcsr.get_neighbourhood(2).size(), 0); 36 | } 37 | 38 | TEST_P(DataStructureTest, remove_edge) { 39 | PPPCSR pcsr(10, 10, GetParam(), 1, 1, false); 40 | pcsr.add_node(); 41 | pcsr.remove_edge(0, 1); 42 | EXPECT_FALSE(pcsr.edge_exists(0, 1)); 43 | pcsr.add_edge(0, 1, 1); 44 | EXPECT_TRUE(pcsr.edge_exists(0, 1)); 45 | EXPECT_EQ(pcsr.get_neighbourhood(0).size(), 1); 46 | pcsr.remove_edge(0, 1); 47 | EXPECT_FALSE(pcsr.edge_exists(0, 1)); 48 | EXPECT_EQ(pcsr.get_neighbourhood(2).size(), 0); 49 | } 50 | 51 | TEST_P(DataStructureTest, add_remove_edge_1E4_seq) { 52 | PCSR pcsr(10, 10, GetParam(), 0); 53 | constexpr int edge_count = 1E4; 54 | for (int i = 1; i < edge_count + 1; ++i) { 55 | pcsr.add_edge(0, i, i); 56 | EXPECT_TRUE(pcsr.edge_exists(0, i)) << i; 57 | // Check whether all locks were released 58 | for (uint32_t j = 0; j < pcsr.edges.N / pcsr.edges.logN; ++j) { 59 | EXPECT_TRUE(pcsr.edges.node_locks[j]->lockable()) 60 | << "Current iteration: " << i << " lock id: " << j; 61 | } 62 | EXPECT_TRUE(pcsr.edges.global_lock->lockable()); 63 | } 64 | EXPECT_EQ(pcsr.get_n(), 10); 65 | EXPECT_EQ(pcsr.getNode(0).num_neighbors, edge_count); 66 | 67 | for (int i = 1; i < edge_count + 1; ++i) { 68 | pcsr.remove_edge(0, i); 69 | EXPECT_FALSE(pcsr.edge_exists(0, i)) << i; 70 | // Check whether all locks were released 71 | for (uint32_t j = 0; j < pcsr.edges.N / pcsr.edges.logN; ++j) { 72 | EXPECT_TRUE(pcsr.edges.node_locks[j]->lockable()) 73 | << "Current iteration: " << i << " lock id: " << j; 74 | } 75 | EXPECT_TRUE(pcsr.edges.global_lock->lockable()); 76 | } 77 | EXPECT_EQ(pcsr.get_neighbourhood(0).size(), 0); 78 | EXPECT_EQ(pcsr.get_n(), 10); 79 | } 80 | 81 | TEST_P(DataStructureTest, add_remove_edge_1E5_par) { 82 | PCSR pcsr(10, 10, GetParam(), 0); 83 | constexpr int edge_count = 1E5; 84 | #pragma omp parallel 85 | { 86 | pcsr.edges.global_lock->registerThread(); 87 | #pragma omp for nowait 88 | for (int i = 1; i < edge_count + 1; ++i) { 89 | pcsr.add_edge(0, i, i); 90 | EXPECT_TRUE(pcsr.edge_exists(0, i)) << i; 91 | } 92 | pcsr.edges.global_lock->unregisterThread(); 93 | } 94 | 95 | // Check whether all locks were released 96 | for (uint32_t j = 0; j < pcsr.edges.N / pcsr.edges.logN; ++j) { 97 | EXPECT_TRUE(pcsr.edges.node_locks[j]->lockable()) << "Lock id: " << j; 98 | } 99 | EXPECT_TRUE(pcsr.edges.global_lock->lockable()); 100 | EXPECT_EQ(pcsr.get_n(), 10); 101 | EXPECT_EQ(pcsr.getNode(0).num_neighbors, edge_count); 102 | 103 | #pragma omp parallel 104 | { 105 | pcsr.edges.global_lock->registerThread(); 106 | #pragma omp for nowait 107 | for (int i = 1; i < edge_count + 1; ++i) { 108 | pcsr.remove_edge(0, i); 109 | EXPECT_FALSE(pcsr.edge_exists(0, i)) << i; 110 | } 111 | pcsr.edges.global_lock->unregisterThread(); 112 | } 113 | // Check whether all locks were released 114 | for (uint32_t j = 0; j < pcsr.edges.N / pcsr.edges.logN; ++j) { 115 | EXPECT_TRUE(pcsr.edges.node_locks[j]->lockable()) << "Lock id: " << j; 116 | } 117 | EXPECT_TRUE(pcsr.edges.global_lock->lockable()); 118 | EXPECT_EQ(pcsr.get_neighbourhood(0).size(), 0); 119 | EXPECT_EQ(pcsr.get_n(), 10); 120 | } 121 | 122 | TEST_P(DataStructureTest, add_remove_edge_random_2E4_seq) { 123 | PCSR pcsr(1000, 1000, GetParam(), 0); 124 | constexpr int edge_count = 2E4; 125 | for (int i = 1; i < edge_count + 1; ++i) { 126 | int src = std::rand() % 1000; 127 | int target = std::rand() % 1000; 128 | if (std::rand() % 4 != 0) { 129 | pcsr.add_edge(src, target, i); 130 | ASSERT_TRUE(pcsr.edge_exists(src, target)) 131 | << "Add: " << src << " " << target; 132 | } else { 133 | pcsr.remove_edge(src, target); 134 | ASSERT_FALSE(pcsr.edge_exists(src, target)) 135 | << "Delete: " << src << " " << target; 136 | } 137 | // Check whether all locks were released 138 | for (uint32_t j = 0; j < pcsr.edges.N / pcsr.edges.logN; ++j) { 139 | ASSERT_TRUE(pcsr.edges.node_locks[j]->lockable()) 140 | << "Current iteration: " << i << " lock id: " << j; 141 | } 142 | ASSERT_TRUE(pcsr.edges.global_lock->lockable()); 143 | } 144 | } 145 | 146 | TEST_P(DataStructureTest, add_remove_edge_random_2E4_par) { 147 | PCSR pcsr(1000, 1000, GetParam(), 0); 148 | constexpr int edge_count = 2E5; 149 | #pragma omp parallel 150 | { 151 | pcsr.edges.global_lock->registerThread(); 152 | #pragma omp for nowait 153 | for (int i = 1; i < edge_count + 1; ++i) { 154 | int src = std::rand() % 1000; 155 | int target = std::rand() % 1000; 156 | if (std::rand() % 4 != 0) { 157 | pcsr.add_edge(src, target, i); 158 | EXPECT_TRUE(pcsr.edge_exists(src, target)) 159 | << "Add: " << src << " " << target; 160 | } else { 161 | pcsr.remove_edge(src, target); 162 | EXPECT_FALSE(pcsr.edge_exists(src, target)) 163 | << "Delete: " << src << " " << target; 164 | } 165 | } 166 | pcsr.edges.global_lock->unregisterThread(); 167 | } 168 | 169 | // Check whether all locks were released 170 | for (uint32_t j = 0; j < pcsr.edges.N / pcsr.edges.logN; ++j) { 171 | EXPECT_TRUE(pcsr.edges.node_locks[j]->lockable()) << "Lock id: " << j; 172 | } 173 | EXPECT_TRUE(pcsr.edges.global_lock->lockable()); 174 | } 175 | 176 | TEST_P(DataStructureTest, bfs_5E4) { 177 | PCSR pcsr(1000, 1000, GetParam(), 0); 178 | constexpr int edge_count = 5E4; 179 | for (int i = 1; i < edge_count + 1; ++i) { 180 | int src = std::rand() % 1000; 181 | int target = std::rand() % 1000; 182 | pcsr.add_edge(src, target, i); 183 | } 184 | 185 | // run BFS 186 | auto start = chrono::steady_clock::now(); 187 | auto res = bfs(pcsr, 0); 188 | auto finish = chrono::steady_clock::now(); 189 | EXPECT_EQ(res.size(), 1000); 190 | cout << "BFS time: " 191 | << chrono::duration_cast(finish - start).count() 192 | << endl; 193 | } 194 | 195 | TEST_P(DataStructureTest, pagerank_5E4) { 196 | PCSR pcsr(1000, 1000, GetParam(), 0); 197 | constexpr int edge_count = 5E4; 198 | for (int i = 1; i < edge_count + 1; ++i) { 199 | int src = std::rand() % 1000; 200 | int target = std::rand() % 1000; 201 | pcsr.add_edge(src, target, i); 202 | } 203 | 204 | // run pagerank 205 | vector weights(pcsr.get_n(), 1.0f); 206 | auto start = chrono::steady_clock::now(); 207 | auto res = pagerank(pcsr, weights); 208 | auto finish = chrono::steady_clock::now(); 209 | EXPECT_EQ(res.size(), 1000); 210 | cout << "Pagerank time: " 211 | << chrono::duration_cast(finish - start).count() 212 | << endl; 213 | } 214 | 215 | INSTANTIATE_TEST_CASE_P(DataStructureTestSuite, DataStructureTest, Bool()); -------------------------------------------------------------------------------- /test/DataStructureTest.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file DataStructureTest.cpp 3 | * @author Christian Menges 4 | */ 5 | 6 | #ifndef PARALLEL_PACKED_CSR_DATASTRUCTURETEST_H 7 | #define PARALLEL_PACKED_CSR_DATASTRUCTURETEST_H 8 | 9 | #include 10 | 11 | class DataStructureTest : public testing::TestWithParam {}; 12 | 13 | #endif // PARALLEL_PACKED_CSR_DATASTRUCTURETEST_H 14 | -------------------------------------------------------------------------------- /test/SchedulerTest.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SchedulerTest.cpp 3 | * @author Christian Menges 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | #include "SchedulerTest.h" 10 | 11 | TEST_F(SchedulerTest, lookupTableCreation) { 12 | for (int d = 1; d <= 8; ++d) { 13 | for (int t = 1; t <= 256; ++t) { 14 | 15 | int minNumThreads = t / d; 16 | int threshold = t % d; 17 | std::vector threadToDomain(t); 18 | std::vector firstThreadDomain(d, 0); 19 | std::vector numThreadsDomain(d); 20 | int counter = 0; 21 | int currentDomain = 0; 22 | 23 | for (int i = 0; i < t; i++) { 24 | threadToDomain[i] = currentDomain; 25 | counter++; 26 | if (counter == minNumThreads + (currentDomain < threshold)) { 27 | numThreadsDomain[currentDomain] = counter; 28 | firstThreadDomain[currentDomain] = i - counter + 1; 29 | counter = 0; 30 | currentDomain++; 31 | } 32 | } 33 | 34 | ASSERT_EQ(std::accumulate(numThreadsDomain.cbegin(), 35 | numThreadsDomain.cend(), 0), 36 | t); 37 | ASSERT_EQ(firstThreadDomain.size(), d); 38 | ASSERT_EQ(firstThreadDomain[0], 0); 39 | std::set domains; 40 | for (int i = 0; i < t; ++i) { 41 | 42 | const int domain = threadToDomain[i]; 43 | 44 | ASSERT_GT(d, domain) << "#domains: " << d << " #threads: " << t 45 | << " current thread: " << i; 46 | 47 | domains.insert(domain); 48 | } 49 | ASSERT_EQ(std::min(t, d), domains.size()) 50 | << "#domains: " << d << " #threads: " << t; 51 | std::set differentDomainSizes; 52 | for (const auto it : numThreadsDomain) { 53 | differentDomainSizes.insert(it); 54 | } 55 | ASSERT_LT(0, differentDomainSizes.size()); 56 | ASSERT_GE(2, differentDomainSizes.size()); 57 | } 58 | } 59 | } -------------------------------------------------------------------------------- /test/SchedulerTest.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file SchedulerTest.h 3 | * @author Christian Menges 4 | */ 5 | 6 | #ifndef PARALLEL_PACKED_CSR_SCHEDULERTEST_H 7 | #define PARALLEL_PACKED_CSR_SCHEDULERTEST_H 8 | 9 | #include 10 | 11 | class SchedulerTest : public ::testing::Test {}; 12 | 13 | #endif // PARALLEL_PACKED_CSR_SCHEDULERTEST_H 14 | -------------------------------------------------------------------------------- /test/tests_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char **argv) { 4 | testing::InitGoogleTest(&argc, argv); 5 | return RUN_ALL_TESTS(); 6 | } --------------------------------------------------------------------------------