├── include ├── CPU_adj_list │ ├── parse_string.hpp │ ├── algorithm │ │ ├── CPU_BFS.hpp │ │ ├── CPU_shortest_paths.hpp │ │ ├── CPU_PageRank.hpp │ │ ├── CPU_sssp_pre.hpp │ │ ├── CPU_Community_Detection.hpp │ │ └── CPU_connected_components.hpp │ ├── binary_save_read_vector_of_vectors.hpp │ ├── ThreadPool.h │ ├── sorted_vector_binary_operations.hpp │ └── CPU_adj_list.hpp ├── GPU_csr │ ├── GPU_csr.hpp │ └── algorithm │ │ ├── GPU_connected_components.cuh │ │ ├── GPU_BFS.cuh │ │ ├── GPU_sssp_pre.cuh │ │ ├── GPU_shortest_paths.cuh │ │ ├── GPU_Community_Detection.cuh │ │ └── GPU_PageRank.cuh └── LDBC │ ├── ldbc.hpp │ └── checker.hpp ├── CMakeLists.txt ├── src ├── GPU_csr │ └── GPU_example.cu ├── CPU_adj_list │ └── CPU_example.cpp └── LDBC │ ├── LDBC_CPU_adj_list.cpp │ └── LDBC_GPU_csr.cu ├── README.md └── LICENSE.txt /include/CPU_adj_list/parse_string.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | inline std::vector parse_string(std::string parse_target, std::string delimiter); 6 | 7 | inline std::vector parse_string(std::string parse_target, std::string delimiter) { 8 | 9 | std::vector Parsed_content; 10 | size_t pos = 0; 11 | std::string token; 12 | while ((pos = parse_target.find(delimiter)) != std::string::npos) { 13 | // find(const string& str, size_t pos = 0) function returns the position of the first occurrence of str in the string, or npos if the string is not found. 14 | token = parse_target.substr(0, pos); 15 | // The substr(size_t pos = 0, size_t n = npos) function returns a substring of the object, starting at position pos and of length npos 16 | Parsed_content.push_back(token); // store the subtr to the list 17 | parse_target.erase(0, pos + delimiter.length()); // remove the front substr and the first delimiter 18 | } 19 | Parsed_content.push_back(parse_target); // store the subtr to the list 20 | 21 | return Parsed_content; 22 | 23 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_BFS.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | template // T is float or double 10 | std::vector CPU_BFS(std::vector>> &input_graph, int root = 0, int min_depth = 0, int max_depth = INT_MAX) 11 | { 12 | /* The CPU code for breadth first search uses queues to traverse the graph and record depth, 13 | which is also used to prevent duplicate traversal */ 14 | int N = input_graph.size(); 15 | 16 | std::vector depth(N, INT_MAX); 17 | depth[root] = 0; 18 | std::vector searched_vertices; // use to prevent duplicate traversal while recording depth 19 | 20 | std::queue Q; // Queue is a data structure designed to operate in FIFO (First in First out) context. 21 | Q.push(root); 22 | while (Q.size() > 0) 23 | { 24 | int v = Q.front(); 25 | if (depth[v] >= min_depth && depth[v] <= max_depth) 26 | searched_vertices.push_back(v); 27 | 28 | Q.pop(); // Removing that vertex from queue,whose neighbour will be visited now 29 | 30 | if (depth[v] + 1 <= max_depth) 31 | { 32 | // Traversing node v in the graph yields a pair value, adjfirst being the adjacency point 33 | for (auto &adj : input_graph[v]) 34 | { /*processing all the neighbours of v*/ 35 | if (depth[adj.first] > depth[v] + 1) 36 | { 37 | // If the depth of adjacent points is greater, add them to the queue. Otherwise, it means that the adjacent points have already been traversed before 38 | depth[adj.first] = depth[v] + 1; 39 | Q.push(adj.first); 40 | } 41 | } 42 | } 43 | } 44 | 45 | return depth; 46 | } 47 | 48 | std::vector> CPU_Bfs(graph_structure &graph, std::string src_v, int min_depth = 0, int max_depth = INT_MAX) 49 | { 50 | std::vector depth = CPU_BFS(graph.OUTs, graph.vertex_str_to_id[src_v], min_depth, max_depth); 51 | return graph.res_trans_id_val(depth); 52 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_shortest_paths.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using namespace std; 11 | 12 | struct node { 13 | double dis;//distance from source vertex 14 | int u;//indicates vertex 15 | 16 | bool operator>(const node& a) const { return dis > a.dis; }//operator overload 17 | }; 18 | 19 | std::vector CPU_shortest_paths(std::vector>>& input_graph, int source) { 20 | //dijkstras-shortest-path-algorithm 21 | 22 | double inf = std::numeric_limits::max(); 23 | 24 | int N = input_graph.size(); 25 | 26 | std::vector distances; 27 | distances.resize(N, inf); // initial distance from source is inf 28 | 29 | if (source < 0 || source >= N) { 30 | std::cout << "Invalid source vertex" << std::endl;//Abnormal input judgment 31 | return distances; 32 | } 33 | 34 | distances[source] = 0;//Starting distance is 0 35 | std::vector vis(N, 0); 36 | 37 | std::priority_queue, greater > Q;//Using Heap Optimization Algorithm 38 | Q.push({0, source}); 39 | 40 | while (Q.size() > 0) { 41 | 42 | int u = Q.top().u; 43 | 44 | Q.pop();//remove vertex visited this round 45 | 46 | if (vis[u]) continue;//if vertex has already been visited,it shouldn't be pushed to queue again. 47 | vis[u] = 1;//mark 48 | 49 | for (auto edge : input_graph[u]) { 50 | //Traverse all adjacent vertexs of a vertex 51 | int v = edge.first;//vertex pointed by edge 52 | double w = edge.second;//weight of edge 53 | //use v to update path cost 54 | if (distances[v] > distances[u] + w) { 55 | //If the path cost is smaller, update the new path cost 56 | distances[v] = distances[u] + w; 57 | Q.push({distances[v], v});//add new vertex to queue 58 | } 59 | } 60 | 61 | } 62 | 63 | return distances; 64 | } 65 | 66 | std::vector> CPU_SSSP(graph_structure& graph, std::string src_v) { 67 | std::vector ssspVec = CPU_shortest_paths(graph.OUTs, graph.vertex_str_to_id[src_v]); 68 | return graph.res_trans_id_val(ssspVec); 69 | } 70 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | project(Test LANGUAGES CXX) 3 | 4 | option(BUILD_CPU "Build the CPU version" ON) 5 | option(BUILD_GPU "Build the GPU version" OFF) 6 | 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -O3") 10 | 11 | if(BUILD_CPU) 12 | set(EXECUTABLE_NAME_CPU "${PROJECT_NAME}_CPU") 13 | set(CPU_example "CPU_example") 14 | set(EXECUTABLE_OUTPUT_PATH_CPU ${PROJECT_SOURCE_DIR}/build/bin_cpu) 15 | 16 | add_executable(${EXECUTABLE_NAME_CPU} src/LDBC/LDBC_CPU_adj_list.cpp) 17 | add_executable(${CPU_example} src/CPU_adj_list/CPU_example.cpp) 18 | 19 | target_include_directories(${EXECUTABLE_NAME_CPU} PUBLIC include) 20 | target_include_directories(${CPU_example} PUBLIC include) 21 | 22 | set_target_properties(${EXECUTABLE_NAME_CPU} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_CPU}) 23 | set_target_properties(${CPU_example} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_CPU}) 24 | endif() 25 | 26 | if(BUILD_GPU) 27 | enable_language(CUDA) 28 | find_package(CUDA REQUIRED) 29 | 30 | set(EXECUTABLE_NAME_GPU "${PROJECT_NAME}_GPU") 31 | set(GPU_example "GPU_example") 32 | set(EXECUTABLE_OUTPUT_PATH_GPU ${PROJECT_SOURCE_DIR}/build/bin_gpu) 33 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") 34 | 35 | add_executable(${EXECUTABLE_NAME_GPU} src/LDBC/LDBC_GPU_csr.cu) 36 | add_executable(${GPU_example} src/GPU_csr/GPU_example.cu) 37 | 38 | target_include_directories(${EXECUTABLE_NAME_GPU} PUBLIC ${CUDA_INCLUDE_DIRS}) 39 | target_include_directories(${GPU_example} PUBLIC ${CUDA_INCLUDE_DIRS}) 40 | 41 | target_link_libraries(${EXECUTABLE_NAME_GPU} ${CUDA_LIBRARIES}) 42 | target_link_libraries(${GPU_example} ${CUDA_LIBRARIES}) 43 | 44 | target_include_directories(${EXECUTABLE_NAME_GPU} PUBLIC include) 45 | target_include_directories(${GPU_example} PUBLIC include) 46 | 47 | set_target_properties(${EXECUTABLE_NAME_GPU} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_GPU}) 48 | set_target_properties(${EXECUTABLE_NAME_GPU} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 49 | set_target_properties(${GPU_example} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_GPU}) 50 | set_target_properties(${GPU_example} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 51 | endif() -------------------------------------------------------------------------------- /include/CPU_adj_list/binary_save_read_vector_of_vectors.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | template 8 | void binary_save_vector_of_vectors(std::string path, const std::vector >& myVector); 9 | 10 | template 11 | void binary_read_vector_of_vectors(std::string path, std::vector>& myVector); 12 | 13 | template 14 | void binary_save_vector_of_vectors(std::string path, const std::vector >& myVector) 15 | { 16 | std::ofstream FILE(path, std::ios::out | std::ofstream::binary); 17 | 18 | // Store size of the outer vector 19 | int s1 = myVector.size(); 20 | FILE.write(reinterpret_cast(&s1), sizeof(s1)); 21 | 22 | // Now write each vector one by one 23 | for (auto& v : myVector) { 24 | // Store its size 25 | int size = v.size(); 26 | FILE.write(reinterpret_cast(&size), sizeof(size)); 27 | if (size == 0) 28 | { 29 | continue; 30 | } 31 | // Store its contents 32 | FILE.write(reinterpret_cast(&v[0]), v.size() * sizeof(T)); 33 | } 34 | FILE.close(); 35 | } 36 | 37 | template 38 | void binary_read_vector_of_vectors(std::string path, std::vector>& myVector) 39 | { 40 | std::vector>().swap(myVector); 41 | 42 | std::ifstream FILE(path, std::ios::in | std::ifstream::binary); 43 | 44 | int size = 0; 45 | FILE.read(reinterpret_cast(&size), sizeof(size)); 46 | if (!FILE) 47 | { 48 | std::cout << "Unable to open file " << path << std::endl << "Please check the file location or file name." << std::endl; // throw an error message 49 | exit(1); // end the program 50 | } 51 | myVector.resize(size); 52 | for (int n = 0; n < size; ++n) { 53 | int size2 = 0; 54 | FILE.read(reinterpret_cast(&size2), sizeof(size2)); 55 | T f; 56 | for (int k = 0; k < size2; ++k) { 57 | FILE.read(reinterpret_cast(&f), sizeof(f)); 58 | myVector[n].push_back(f); 59 | } 60 | std::vector(myVector[n]).swap(myVector[n]); 61 | } 62 | std::vector>(myVector).swap(myVector); 63 | } 64 | 65 | 66 | /* 67 | ---------an example main file------------- 68 | #include 69 | 70 | int main() 71 | { 72 | ; 73 | } 74 | ------------------- 75 | */ 76 | -------------------------------------------------------------------------------- /include/CPU_adj_list/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class ThreadPool { 15 | public: 16 | ThreadPool(size_t); 17 | template 18 | auto enqueue(F&& f, Args&&... args) 19 | -> std::future::type>; 20 | ~ThreadPool(); 21 | private: 22 | // need to keep track of threads so we can join them 23 | std::vector< std::thread > workers; 24 | // the task queue 25 | std::queue< std::function > tasks; 26 | 27 | // synchronization 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | // the constructor just launches some amount of workers 34 | inline ThreadPool::ThreadPool(size_t threads) 35 | : stop(false) 36 | { 37 | for(size_t i = 0;i task; 44 | 45 | { 46 | std::unique_lock lock(this->queue_mutex); 47 | this->condition.wait(lock, 48 | [this]{ return this->stop || !this->tasks.empty(); }); 49 | if(this->stop && this->tasks.empty()) 50 | return; 51 | task = std::move(this->tasks.front()); 52 | this->tasks.pop(); 53 | } 54 | 55 | task(); 56 | } 57 | } 58 | ); 59 | } 60 | 61 | // add new work item to the pool 62 | template 63 | auto ThreadPool::enqueue(F&& f, Args&&... args) 64 | -> std::future::type> 65 | { 66 | using return_type = typename std::result_of::type; 67 | 68 | auto task = std::make_shared< std::packaged_task >( 69 | std::bind(std::forward(f), std::forward(args)...) 70 | ); 71 | 72 | std::future res = task->get_future(); 73 | { 74 | std::unique_lock lock(queue_mutex); 75 | 76 | // don't allow enqueueing after stopping the pool 77 | if(stop) 78 | throw std::runtime_error("enqueue on stopped ThreadPool"); 79 | 80 | tasks.emplace([task](){ (*task)(); }); 81 | } 82 | condition.notify_one(); 83 | return res; 84 | } 85 | 86 | // the destructor joins all threads 87 | inline ThreadPool::~ThreadPool() 88 | { 89 | { 90 | std::unique_lock lock(queue_mutex); 91 | stop = true; 92 | } 93 | condition.notify_all(); 94 | for(std::thread &worker: workers) 95 | worker.join(); 96 | } 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /src/GPU_csr/GPU_example.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() { 11 | ios::sync_with_stdio(false); 12 | std::cin.tie(0), std::cout.tie(0); 13 | 14 | graph_structure graph; // directed graph 15 | 16 | // Add vertices and edges 17 | graph.add_vertice("one"); 18 | graph.add_vertice("two"); 19 | graph.add_vertice("three"); 20 | graph.add_vertice("four"); 21 | graph.add_vertice("five"); 22 | graph.add_vertice("R"); 23 | 24 | graph.add_edge("one", "two", 0.8); 25 | graph.add_edge("two", "three", 1); 26 | graph.add_edge("two", "R", 1); 27 | graph.add_edge("two", "four", 0.1); 28 | graph.add_edge("R", "three", 1); 29 | graph.add_edge("one", "three", 1); 30 | graph.add_edge("one", "four", 1); 31 | graph.add_edge("four", "three", 1); 32 | graph.add_edge("four", "five", 1); 33 | 34 | // Remove a vertex 35 | graph.remove_vertice("R"); 36 | 37 | // Add a vertex 38 | graph.add_vertice("six"); 39 | 40 | // Remove an edge 41 | graph.remove_edge("two", "four"); 42 | 43 | // Add an edge 44 | graph.add_edge("one", "six", 1); 45 | 46 | // Transform to CSR 47 | CSR_graph csr_graph = toCSR(graph); 48 | 49 | // BFS 50 | std::cout << "Running BFS..." << std::endl; 51 | std::vector> gpu_bfs_res = Cuda_Bfs(graph, csr_graph, "one"); 52 | std::cout << "BFS result: " << std::endl; 53 | for (auto& res : gpu_bfs_res) 54 | std::cout << res.first << " " << res.second << std::endl; 55 | 56 | // Connected Components 57 | std::cout << "Running Connected Components..." << std::endl; 58 | std::vector> gpu_wcc_res = Cuda_WCC(graph, csr_graph); 59 | std::cout << "Connected Components result: " << std::endl; 60 | for (auto& res : gpu_wcc_res) 61 | std::cout << res.first << " " << res.second << std::endl; 62 | 63 | // SSSP 64 | std::cout << "Running SSSP..." << std::endl; 65 | std::vector> gpu_sssp_res = Cuda_SSSP(graph, csr_graph, "one"); 66 | std::cout << "SSSP result: " << std::endl; 67 | for (auto& res : gpu_sssp_res) 68 | std::cout << res.first << " " << res.second << std::endl; 69 | 70 | // PageRank 71 | std::cout << "Running PageRank..." << std::endl; 72 | std::vector> gpu_pr_res = Cuda_PR(graph, csr_graph, 10, 0.85); 73 | std::cout << "PageRank result: " << std::endl; 74 | for (auto& res : gpu_pr_res) 75 | std::cout << res.first << " " << res.second << std::endl; 76 | 77 | // Community Detection 78 | std::cout << "Running Community Detection..." << std::endl; 79 | std::vector> gpu_cd_res = Cuda_CDLP(graph, csr_graph, 10); 80 | std::cout << "Community Detection result: " << std::endl; 81 | for (auto& res : gpu_cd_res) 82 | std::cout << res.first << " " << res.second << std::endl; 83 | 84 | return 0; 85 | } -------------------------------------------------------------------------------- /src/CPU_adj_list/CPU_example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() 11 | { 12 | ios::sync_with_stdio(false); 13 | std::cin.tie(0), std::cout.tie(0); 14 | 15 | graph_structure graph; // directed graph 16 | 17 | // Add vertices and edges 18 | graph.add_vertice("one"); 19 | graph.add_vertice("two"); 20 | graph.add_vertice("three"); 21 | graph.add_vertice("four"); 22 | graph.add_vertice("five"); 23 | graph.add_vertice("R"); 24 | 25 | graph.add_edge("one", "two", 0.8); 26 | graph.add_edge("two", "three", 1); 27 | graph.add_edge("two", "R", 1); 28 | graph.add_edge("two", "four", 0.1); 29 | graph.add_edge("R", "three", 1); 30 | graph.add_edge("one", "three", 1); 31 | graph.add_edge("one", "four", 1); 32 | graph.add_edge("four", "three", 1); 33 | graph.add_edge("four", "five", 1); 34 | 35 | // Remove a vertex 36 | graph.remove_vertice("R"); 37 | 38 | // Add a vertex 39 | graph.add_vertice("six"); 40 | 41 | // Remove an edge 42 | graph.remove_edge("two", "four"); 43 | 44 | // Add an edge 45 | graph.add_edge("one", "six", 1); 46 | 47 | // BFS 48 | std::cout << "Running BFS..." << std::endl; 49 | std::vector> cpu_bfs_result = CPU_Bfs(graph, "one"); 50 | std::cout << "BFS result: " << std::endl; 51 | for (auto& res : cpu_bfs_result) 52 | std::cout << res.first << " " << res.second << std::endl; 53 | 54 | // Connected Components 55 | std::cout << "Running Connected Components..." << std::endl; 56 | std::vector> cpu_connected_components_result = CPU_WCC(graph); 57 | std::cout << "Connected Components result: " << std::endl; 58 | for (auto& res : cpu_connected_components_result) 59 | std::cout << res.first << " " << res.second << std::endl; 60 | 61 | // SSSP 62 | std::cout << "Running SSSP..." << std::endl; 63 | std::vector> cpu_sssp_result = CPU_SSSP(graph, "one"); 64 | std::cout << "SSSP result: " << std::endl; 65 | for (auto& res : cpu_sssp_result) 66 | std::cout << res.first << " " << res.second << std::endl; 67 | 68 | // PageRank 69 | std::cout << "Running PageRank..." << std::endl; 70 | std::vector> cpu_pagerank_result = CPU_PR(graph, 10, 0.85); 71 | std::cout << "PageRank result: " << std::endl; 72 | for (auto& res : cpu_pagerank_result) 73 | std::cout << res.first << " " << res.second << std::endl; 74 | 75 | // Community Detection 76 | std::cout << "Running Community Detection..." << std::endl; 77 | std::vector> cpu_community_detection_result = CPU_CDLP(graph, 10); 78 | std::cout << "Community Detection result: " << std::endl; 79 | for (auto& res : cpu_community_detection_result) 80 | std::cout << res.first << " " << res.second << std::endl; 81 | 82 | return 0; 83 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_PageRank.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // PageRank Algorithm 9 | // call this function like: ans_cpu = CDLP(graph.INs, graph.OUTs, damp, graph.cdlp_max_its); 10 | // used to show the relevance and importance of vertices in the graph 11 | // return the pagerank of each vertex based on the graph, damping factor and number of iterations. 12 | std::vector PageRank (std::vector>>& in_edge, 13 | std::vector>>& out_edge, double damp, int iters) { 14 | 15 | int N = in_edge.size(); // number of vertices in the graph 16 | 17 | std::vector rank(N, 1 / N); // The initial pagerank of each vertex is 1/|V| 18 | std::vector new_rank(N); // temporarily stores the updated pagerank 19 | 20 | double d = damp; // damping factor 21 | double teleport = (1 - damp) / N; // teleport mechanism 22 | 23 | std::vector sink; // the set of sink vertices 24 | for (int i = 0; i < N; i++) 25 | { 26 | if (out_edge[i].size() == 0) 27 | sink.push_back(i); // record the sink vertices 28 | } 29 | 30 | for (int i = 0; i < iters; i++) { // continue for a fixed number of iterations 31 | double sink_sum = 0; 32 | for (int i = 0; i < sink.size(); i++) // If the out-degree of the vertex is zero, it is a sink node 33 | { 34 | sink_sum += rank[sink[i]]; // calculate the sinksum, which is the sum of the pagerank value of all sink vertices 35 | } 36 | 37 | double x = sink_sum * d / N + teleport; // sum of sinksum and teleport 38 | 39 | ThreadPool pool_dynamic(100); 40 | std::vector> results_dynamic; 41 | for (int q = 0; q < 100; q++) 42 | { 43 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &rank, &out_edge, &new_rank, &x] 44 | { 45 | int start = (long long)q * N / 100, end = std::min((long long)N - 1, (long long)(q + 1) * N / 100); 46 | for (int i = start; i <= end; i++) { 47 | rank[i] /= out_edge[i].size(); 48 | new_rank[i] = x; // record redistributed from sinks and teleport value 49 | } 50 | 51 | return 1; })); 52 | } 53 | for (auto&& result : results_dynamic) 54 | { 55 | result.get(); 56 | } 57 | std::vector>().swap(results_dynamic); 58 | 59 | for (int q = 0; q < 100; q++) 60 | { 61 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &in_edge, &rank, &new_rank, &d] 62 | { 63 | int start = (long long)q * N / 100, end = std::min((long long)N - 1, (long long)(q + 1) * N / 100); 64 | for (int v = start; v <= end; v++) { 65 | double tmp = 0; // sum the rank and then multiply damping to improve running efficiency 66 | for (auto& y : in_edge[v]) { 67 | tmp = tmp + rank[y.first]; // calculate the importance value for each vertex 68 | } 69 | new_rank[v] += d * tmp; 70 | } 71 | return 1; })); 72 | } 73 | for (auto&& result : results_dynamic) 74 | { 75 | result.get(); 76 | } 77 | std::vector>().swap(results_dynamic); 78 | 79 | 80 | rank.swap(new_rank); // store the updated pagerank in the rank 81 | } 82 | return rank; // return the pagerank of each vertex 83 | } 84 | 85 | // PageRank Algorithm 86 | // return the pagerank of each vertex based on the graph, damping factor and number of iterations. 87 | // the type of the vertex and pagerank are string 88 | std::vector> CPU_PR (graph_structure& graph, int iterations, double damping) { 89 | std::vector prValueVec = PageRank(graph.INs, graph.OUTs, damping, iterations); // get the pagerank in double type 90 | return graph.res_trans_id_val(prValueVec); 91 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_sssp_pre.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | std::vector CPU_shortest_paths_pre(std::vector>>& input_graph, int source, std::vector& pre_v) { 13 | //dijkstras-shortest-path-algorithm 14 | 15 | double inf = std::numeric_limits::max(); 16 | 17 | int N = input_graph.size(); 18 | 19 | std::vector distances; 20 | distances.resize(N, inf); // initial distance from source is inf 21 | 22 | if (source < 0 || source >= N) { 23 | std::cout << "Invalid source vertex" << std::endl;//Abnormal input judgment 24 | return distances; 25 | } 26 | 27 | distances[source] = 0;//Starting distance is 0 28 | std::vector vis(N, 0); 29 | 30 | std::priority_queue, greater > Q;//Using Heap Optimization Algorithm 31 | Q.push({0, source}); 32 | 33 | while (Q.size() > 0) { 34 | 35 | int u = Q.top().u; 36 | 37 | Q.pop();//remove vertex visited this round 38 | 39 | if (vis[u]) continue;//if vertex has already been visited,it shouldn't be pushed to queue again. 40 | vis[u] = 1;//mark 41 | 42 | for (auto edge : input_graph[u]) { 43 | //Traverse all adjacent vertexs of a vertex 44 | int v = edge.first;//vertex pointed by edge 45 | double w = edge.second;//weight of edge 46 | //use v to update path cost 47 | if (distances[v] > distances[u] + w) { 48 | //If the path cost is smaller, update the new path cost 49 | distances[v] = distances[u] + w; 50 | pre_v[v] = u; 51 | Q.push({distances[v], v});//add new vertex to queue 52 | } 53 | } 54 | 55 | } 56 | 57 | return distances; 58 | } 59 | 60 | std::vector> CPU_SSSP_pre(graph_structure& graph, std::string src_v, std::vector& pre_v) { 61 | pre_v.resize(graph.V, -1); 62 | std::vector ssspVec = CPU_shortest_paths_pre(graph.OUTs, graph.vertex_str_to_id[src_v], pre_v); 63 | 64 | // check the pre_v 65 | /*for (int i = 0; i < graph.V; i++) { 66 | double dis = ssspVec[i]; 67 | int pre = pre_v[i]; 68 | int now = i; 69 | double sum = 0; 70 | while (now != graph.vertex_str_to_id[src_v]) { 71 | bool ff = false; 72 | //std::cout << "pre: " << pre << " now: " << now << std::endl; 73 | for (auto edge : graph.OUTs[pre]) { 74 | //std::cout << "there is an edge from " << pre << " to " << edge.first << " with weight " << edge.second << std::endl; 75 | if (edge.first == now) { 76 | sum += edge.second; 77 | now = pre; 78 | pre = pre_v[pre]; 79 | ff = true; 80 | break; 81 | } 82 | } 83 | if (!ff) { 84 | std::cout << "Not found!" << std::endl; 85 | break; 86 | } 87 | } 88 | if (fabs(dis - sum) > 1e-4) { 89 | std::cout << "Error: pre_v is wrong!" << std::endl; 90 | std::cout << "dis: " << dis << " sum: " << sum << std::endl; 91 | } 92 | }*/ 93 | 94 | return graph.res_trans_id_val(ssspVec); 95 | } 96 | 97 | std::vector> path_query(graph_structure& graph, std::string src_v, std::string dst_v, std::vector& pre_v) { 98 | int dst_id = graph.vertex_str_to_id[dst_v]; 99 | int src_id = graph.vertex_str_to_id[src_v]; 100 | 101 | std::vector> path; 102 | 103 | if (src_id < 0 || src_id >= graph.V || dst_id < 0 || dst_id >= graph.V) { 104 | std::cout << "Invalid source or destination vertex" << std::endl; 105 | return path; 106 | } 107 | 108 | if (pre_v[dst_id] == -1) { 109 | std::cout << "No path from " << src_v << " to " << dst_v << std::endl; 110 | return path; 111 | } 112 | 113 | int now = dst_id; 114 | while (now != src_id) { 115 | path.push_back(std::make_pair(graph.vertex_id_to_str[pre_v[now]].first, graph.vertex_id_to_str[now].first)); 116 | if (pre_v[now] == -1 && now != src_id) { 117 | std::cout << "Error: pre_v is wrong!" << std::endl; 118 | break; 119 | } 120 | now = pre_v[now]; 121 | } 122 | 123 | std::reverse(path.begin(), path.end()); 124 | 125 | return path; 126 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_Community_Detection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Community Detection Using Label Propagation 10 | // call this function like:ans_cpu = CDLP(graph.INs, graph.OUTs, graph.vertex_id_to_str, graph.cdlp_max_its); 11 | // Returns label of the graph based on the graph and number of iterations. 12 | std::vector CDLP(graph_structure& graph, int iters) 13 | { 14 | auto& in_edges = graph.INs; // incoming edges of each vertex in the graph 15 | auto& out_edges = graph.OUTs; // outgoing edges of each vertex in the graph 16 | 17 | int N = in_edges.size(); // number of vertices in the graph 18 | std::vector label(N); // record the label of the vertex 19 | std::iota(std::begin(label), std::end(label), 0); 20 | std::vector new_label(N); // temporarily stores the updated label 21 | 22 | ThreadPool pool_dynamic(100); 23 | std::vector> results_dynamic; 24 | 25 | for (int k = 0; k < iters; k++) // continue for a fixed number of iterations 26 | { 27 | for (int q = 0; q < 100; q++) 28 | { 29 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &in_edges, &out_edges, &label, &new_label] 30 | { 31 | int start = (long long)q * N / 100, end = std::min((long long)N - 1, (long long)(q + 1) * N / 100); 32 | for (int i = start; i <= end; i++) { 33 | 34 | std::unordered_map count; // record the label information of the neighbor vertex. the first keyword is the label and the second keyword is the number of occurrences 35 | for (auto& x : in_edges[i]) // traverse the incoming edges of vertex i 36 | { 37 | count[label[x.first]]++; // count the number of label occurrences of the neighbor vertices 38 | } 39 | for (auto& x : out_edges[i]) // traverse the outcoming edges of vertex i 40 | { 41 | count[label[x.first]]++; // count the number of label occurrences of the neighbor vertices 42 | } 43 | int maxcount = 0; // the maximum number of maxlabel occurrences, the initial value is set to 0, which means that all existing labels can be recorded 44 | int maxlabel = label[i]; // consider the possibility of isolated points, the initial label is label[i] instead of 0 45 | for (std::pair p : count) // traversal the label statistics protector of the neighbor node 46 | { 47 | if (p.second > maxcount) // the number of label occurrences currently traversed is greater than the recorded value 48 | { 49 | maxcount = p.second; // update the label 50 | maxlabel = p.first; 51 | } 52 | else if (p.second == maxcount) // take a smaller value when the number of label occurrences is the same 53 | { 54 | maxlabel = std::min(p.first, maxlabel); 55 | } 56 | } 57 | 58 | new_label[i] = maxlabel; // record the maxlabel 59 | 60 | } 61 | return 1; })); 62 | } 63 | for (auto&& result : results_dynamic) 64 | { 65 | result.get(); 66 | } 67 | std::vector>().swap(results_dynamic); // clear results dynamic 68 | 69 | std::swap(new_label, label); // store labels of type string 70 | } 71 | 72 | std::vectorres(N); 73 | for (int i = 0; i < N; i++) 74 | { 75 | res[i] = graph.vertex_id_to_str[label[i]].first; // convert the label to string and store it in res 76 | } 77 | 78 | return res; 79 | } 80 | 81 | // Community Detection Using Label Propagation 82 | // Returns label of the graph based on the graph and number of iterations. 83 | // the type of the vertex and label are string 84 | std::vector> CPU_CDLP(graph_structure& graph, int iterations) 85 | { 86 | std::vector cdlpVec = CDLP(graph, iterations); // get the labels of each vertex. vector index is the id of vertex 87 | 88 | std::vector> res; // store results, the first value in pair records the vertex id, and the second value records the label 89 | int size = cdlpVec.size(); 90 | for (int i = 0; i < size; i++) 91 | res.push_back(std::make_pair(graph.vertex_id_to_str[i].first, cdlpVec[i])); // for each vertex, get its string number and store it in res 92 | 93 | return res; // return the results 94 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_connected_components.hpp: -------------------------------------------------------------------------------- 1 | // #pragma once 2 | 3 | // #include 4 | // #include 5 | // #include 6 | 7 | // template // T is float or double 8 | // std::vector CPU_connected_components(std::vector>>& input_graph, std::vector>>& output_graph) { 9 | // //Using BFS method to find connectivity vectors starting from each node 10 | // /*this is to find connected_components using breadth first search; time complexity O(|V|+|E|); 11 | // related content: https://www.boost.org/doc/libs/1_68_0/boost/graph/connected_components.hpp 12 | // https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm*/ 13 | 14 | // std::vector parent; 15 | 16 | // /*time complexity: O(V)*/ 17 | // int N = input_graph.size(); 18 | // std::vector discovered(N, false); 19 | // parent.resize(N); 20 | // //Vector initialization 21 | // for (int i = 0; i < N; i++) { 22 | 23 | // if (discovered[i] == false) { 24 | // //If the node has not yet been added to the connected component, search for the connected component starting from the node 25 | // /*below is a depth first search; time complexity O(|V|+|E|)*/ 26 | // std::queue Q; // Queue is a data structure designed to operate in FIFO (First in First out) context. 27 | // Q.push(i); 28 | // parent[i] = i; 29 | // discovered[i] = true; 30 | // while (Q.size() > 0) { 31 | // int v = Q.front(); 32 | // Q.pop(); //Removing that vertex from queue,whose neighbour will be visited now 33 | 34 | // for (auto& x : input_graph[v]) { 35 | // int adj_v = x.first; 36 | // if (discovered[adj_v] == false) { 37 | // Q.push(adj_v); 38 | // parent[adj_v] = parent[v]; 39 | // discovered[adj_v] = true; 40 | // } 41 | // } 42 | // for (auto& x : output_graph[v]) { 43 | // int adj_v = x.first; 44 | // if (discovered[adj_v] == false) { 45 | // Q.push(adj_v); 46 | // parent[adj_v] = parent[v]; 47 | // discovered[adj_v] = true; 48 | // } 49 | // } 50 | // } 51 | // } 52 | // } 53 | // return parent; 54 | // } 55 | 56 | // std::vector> CPU_WCC(graph_structure & graph){ 57 | // std::vector wccVec = CPU_connected_components(graph.OUTs, graph.INs); 58 | // return graph.res_trans_id_id(wccVec); 59 | // } 60 | 61 | #pragma once 62 | 63 | #include 64 | #include 65 | #include 66 | #include 67 | 68 | template // T is float or double 69 | std::vector CPU_connected_components(std::vector>>& input_graph, std::vector>>& output_graph) { 70 | //Using BFS method to find connectivity vectors starting from each node 71 | /*this is to find connected_components using breadth first search; time complexity O(|V|+|E|); 72 | related content: https://www.boost.org/doc/libs/1_68_0/boost/graph/connected_components.hpp 73 | https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm*/ 74 | 75 | /*time complexity: O(V)*/ 76 | 77 | int N = input_graph.size(); 78 | 79 | std::vector component; 80 | component.resize(N); 81 | for (int u = 0; u < N; u++) { 82 | component[u] = u; 83 | } 84 | 85 | int change = true; 86 | while (change) { 87 | change = false; 88 | 89 | ThreadPool pool_dynamic(100); 90 | std::vector> results_dynamic; 91 | for (long long q = 0; q < 100; q++) { 92 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &change, &input_graph, &component] 93 | { 94 | int start = q * N / 100, end = std::min(N - 1, (int)((q + 1) * N / 100)); 95 | for (int u = start; u <= end; u++) { 96 | for (auto& x : input_graph[u]) { 97 | int v = x.first; 98 | int comp_u = component[u]; 99 | int comp_v = component[v]; 100 | if (comp_u == comp_v) continue; 101 | int high_comp = comp_u > comp_v ? comp_u : comp_v; 102 | int low_comp = comp_u + (comp_v - high_comp); 103 | if (high_comp == component[high_comp]) { 104 | change = true; 105 | component[high_comp] = low_comp; 106 | } 107 | } 108 | } 109 | return 1; })); 110 | } 111 | for (auto&& result : results_dynamic) 112 | { 113 | result.get(); 114 | } 115 | std::vector>().swap(results_dynamic); 116 | 117 | for (long long q = 0; q < 100; q++) { 118 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &component] 119 | { 120 | int start = q * N / 100, end = std::min(N - 1, (int)((q + 1) * N / 100)); 121 | for (int u = start; u <= end; u++) { 122 | while (component[u] != component[component[u]]) { 123 | component[u] = component[component[u]]; 124 | } 125 | } 126 | return 1; })); 127 | } 128 | for (auto&& result : results_dynamic) 129 | { 130 | result.get(); 131 | } 132 | std::vector>().swap(results_dynamic); 133 | } 134 | 135 | return component; 136 | } 137 | 138 | std::vector> CPU_WCC(graph_structure & graph){ 139 | std::vector wccVec = CPU_connected_components(graph.OUTs, graph.INs); 140 | return graph.res_trans_id_id(wccVec); 141 | } 142 | -------------------------------------------------------------------------------- /include/CPU_adj_list/sorted_vector_binary_operations.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | for a sorted vector>, this dunction conducted binary (divide and conquer) operations on this vector; 7 | 8 | the int values are unique and sorted from small to large 9 | 10 | https://blog.csdn.net/EbowTang/article/details/50770315 11 | */ 12 | 13 | 14 | template 15 | bool sorted_vector_binary_operations_search(std::vector>& input_vector, int key); 16 | 17 | template 18 | T sorted_vector_binary_operations_search_weight(std::vector>& input_vector, int key); 19 | 20 | 21 | template 22 | int sorted_vector_binary_operations_search_position(std::vector>& input_vector, int key); 23 | 24 | template 25 | void sorted_vector_binary_operations_erase(std::vector>& input_vector, int key); 26 | 27 | template 28 | int sorted_vector_binary_operations_insert(std::vector>& input_vector, int key, T load); 29 | 30 | 31 | template 32 | bool sorted_vector_binary_operations_search(std::vector>& input_vector, int key) { 33 | 34 | /*return true if key is in vector; time complexity O(log n)*/ 35 | 36 | int left = 0, right = input_vector.size() - 1; 37 | 38 | while (left <= right) { 39 | int mid = left + ((right - left) / 2); // mid is between left and right (may be equal); 40 | if (input_vector[mid].first == key) { 41 | return true; 42 | } 43 | else if (input_vector[mid].first > key) { 44 | right = mid - 1; 45 | } 46 | else { 47 | left = mid + 1; 48 | } 49 | } 50 | 51 | return false; 52 | 53 | } 54 | 55 | template 56 | T sorted_vector_binary_operations_search_weight(std::vector>& input_vector, int key) { 57 | 58 | /*return std::numeric_limits::max() if key is not in vector; time complexity O(log n)*/ 59 | 60 | int left = 0, right = input_vector.size() - 1; 61 | 62 | while (left <= right) { 63 | int mid = left + ((right - left) / 2); // mid is between left and right (may be equal); 64 | if (input_vector[mid].first == key) { 65 | return input_vector[mid].second; 66 | } 67 | else if (input_vector[mid].first > key) { 68 | right = mid - 1; 69 | } 70 | else { 71 | left = mid + 1; 72 | } 73 | } 74 | 75 | return std::numeric_limits::max(); 76 | 77 | } 78 | 79 | template 80 | int sorted_vector_binary_operations_search_position(std::vector>& input_vector, int key) { 81 | 82 | /*return -1 if key is not in vector; time complexity O(log n)*/ 83 | 84 | int left = 0, right = input_vector.size() - 1; 85 | 86 | while (left <= right) { 87 | int mid = left + ((right - left) / 2); 88 | if (input_vector[mid].first == key) { 89 | return mid; 90 | } 91 | else if (input_vector[mid].first > key) { 92 | right = mid - 1; 93 | } 94 | else { 95 | left = mid + 1; 96 | } 97 | } 98 | 99 | return -1; 100 | 101 | } 102 | 103 | template 104 | void sorted_vector_binary_operations_erase(std::vector>& input_vector, int key) { 105 | 106 | /*erase key from vector; time complexity O(log n + size()-position ), which is O(n) in the worst case, as 107 | the time complexity of erasing an element from a vector is the number of elements behind this element*/ 108 | 109 | if (input_vector.size() > 0) { 110 | int left = 0, right = input_vector.size() - 1; 111 | 112 | while (left <= right) { 113 | int mid = left + ((right - left) / 2); 114 | if (input_vector[mid].first == key) { 115 | input_vector.erase(input_vector.begin() + mid); 116 | break; 117 | } 118 | else if (input_vector[mid].first > key) { 119 | right = mid - 1; 120 | } 121 | else { 122 | left = mid + 1; 123 | } 124 | } 125 | } 126 | 127 | } 128 | 129 | template 130 | int sorted_vector_binary_operations_insert(std::vector>& input_vector, int key, T load) { 131 | 132 | /*return the inserted position; 133 | 134 | insert into vector, if key is already inside, then load is updated; time complexity O(log n + size()-position ), which is O(n) in the worst case, as 135 | the time complexity of inserting an element into a vector is the number of elements behind this element*/ 136 | 137 | int left = 0, right = input_vector.size() - 1; 138 | 139 | while (left <= right) // it will be skept when input_vector.size() == 0 140 | { 141 | int mid = left + ((right - left) / 2); // mid is between left and right (may be equal); 142 | if (input_vector[mid].first == key) { 143 | input_vector[mid].second = load; 144 | return mid; 145 | } 146 | else if (input_vector[mid].first > key) { 147 | right = mid - 1; // the elements after right are always either empty, or have larger keys than input key 148 | } 149 | else { 150 | left = mid + 1; // the elements before left are always either empty, or have smaller keys than input key 151 | } 152 | } 153 | 154 | /*the following code is used when key is not in vector, i.e., left > right, specifically, left = right + 1; 155 | the elements before left are always either empty, or have smaller keys than input key; 156 | the elements after right are always either empty, or have larger keys than input key; 157 | so, the input key should be insert between right and left at this moment*/ 158 | input_vector.insert(input_vector.begin() + left, { key,load }); 159 | return left; 160 | } -------------------------------------------------------------------------------- /include/GPU_csr/GPU_csr.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cuda_runtime.h" 3 | #include 4 | #include 5 | #include 6 | /*for GPU*/ 7 | template 8 | class CSR_graph 9 | { 10 | //CSR has space efficiency and is easy to use by GPUs. 11 | public: 12 | CSR_graph() {} 13 | ~CSR_graph(); 14 | std::vector INs_Neighbor_start_pointers, OUTs_Neighbor_start_pointers, ALL_start_pointers; // Neighbor_start_pointers[i] is the start point of neighbor information of vertex i in Edges and Edge_weights 15 | /* 16 | Now, Neighbor_sizes[i] = Neighbor_start_pointers[i + 1] - Neighbor_start_pointers[i]. 17 | And Neighbor_start_pointers[V] = Edges.size() = Edge_weights.size() = the total number of edges. 18 | */ 19 | std::vector INs_Edges, OUTs_Edges,all_Edges; // Edges[Neighbor_start_pointers[i]] is the start of Neighbor_sizes[i] neighbor IDs 20 | std::vector INs_Edge_weights, OUTs_Edge_weights; // Edge_weights[Neighbor_start_pointers[i]] is the start of Neighbor_sizes[i] edge weights 21 | int *in_pointer, *out_pointer, *in_edge, *out_edge, *all_pointer, *all_edge;//All_edge has merged in_edge and out_edge, mainly used on CDLP 22 | double *in_edge_weight, *out_edge_weight; 23 | size_t E_all; 24 | }; 25 | 26 | template 27 | // CSR_graph toCSR(graph_structure& graph) 28 | CSR_graph toCSR(graph_structure& graph) 29 | { 30 | 31 | CSR_graph ARRAY; 32 | 33 | int V = graph.size(); 34 | ARRAY.INs_Neighbor_start_pointers.resize(V + 1); // Neighbor_start_pointers[V] = Edges.size() = Edge_weights.size() = the total number of edges. 35 | ARRAY.OUTs_Neighbor_start_pointers.resize(V + 1); 36 | ARRAY.ALL_start_pointers.resize(V + 1); 37 | 38 | int pointer = 0; 39 | for (int i = 0; i < V; i++) 40 | { 41 | ARRAY.INs_Neighbor_start_pointers[i] = pointer; 42 | for (auto &xx : graph.INs[i]) 43 | { 44 | ARRAY.INs_Edges.push_back(xx.first); 45 | ARRAY.INs_Edge_weights.push_back(xx.second); 46 | } 47 | pointer += graph.INs[i].size(); 48 | } 49 | ARRAY.INs_Neighbor_start_pointers[V] = pointer; 50 | 51 | pointer = 0; 52 | for (int i = 0; i < V; i++) 53 | { 54 | ARRAY.OUTs_Neighbor_start_pointers[i] = pointer; 55 | for (auto &xx : graph.OUTs[i]) 56 | { 57 | ARRAY.OUTs_Edges.push_back(xx.first); 58 | ARRAY.OUTs_Edge_weights.push_back(xx.second); 59 | } 60 | pointer += graph.OUTs[i].size(); 61 | } 62 | ARRAY.OUTs_Neighbor_start_pointers[V] = pointer; 63 | 64 | pointer = 0; 65 | for (int i = 0; i < V; i++) 66 | { 67 | ARRAY.ALL_start_pointers[i] = pointer; 68 | for (auto &xx : graph.INs[i]) 69 | { 70 | ARRAY.all_Edges.push_back(xx.first); 71 | } 72 | for (auto &xx : graph.OUTs[i]) 73 | { 74 | ARRAY.all_Edges.push_back(xx.first); 75 | } 76 | pointer += graph.INs[i].size() + graph.OUTs[i].size(); 77 | } 78 | ARRAY.ALL_start_pointers[V] = pointer; 79 | 80 | size_t E_in = ARRAY.INs_Edges.size(); 81 | size_t E_out = ARRAY.OUTs_Edges.size(); 82 | size_t E_all = E_in+E_out; 83 | ARRAY.E_all = E_all; 84 | cudaMallocManaged((void**)&ARRAY.in_pointer, (V + 1) * sizeof(int)); 85 | cudaMallocManaged((void**)&ARRAY.out_pointer, (V + 1) * sizeof(int)); 86 | cudaMallocManaged((void**)&ARRAY.all_pointer, (V + 1) * sizeof(int)); 87 | cudaMallocManaged((void**)&ARRAY.in_edge, E_in * sizeof(int)); 88 | cudaMallocManaged((void**)&ARRAY.out_edge, E_out * sizeof(int)); 89 | cudaMallocManaged((void**)&ARRAY.all_edge, E_all * sizeof(int)); 90 | cudaMallocManaged((void**)&ARRAY.in_edge_weight, E_in * sizeof(double)); 91 | cudaMallocManaged((void**)&ARRAY.out_edge_weight, E_out * sizeof(double)); 92 | 93 | cudaDeviceSynchronize(); 94 | cudaError_t error = cudaGetLastError(); 95 | if (error != cudaSuccess) { 96 | printf("CUDA error: %s\n", cudaGetErrorString(error)); 97 | } 98 | 99 | cudaMemcpy(ARRAY.in_pointer, ARRAY.INs_Neighbor_start_pointers.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice); 100 | cudaMemcpy(ARRAY.out_pointer, ARRAY.OUTs_Neighbor_start_pointers.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice); 101 | cudaMemcpy(ARRAY.all_pointer, ARRAY.ALL_start_pointers.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice); 102 | cudaMemcpy(ARRAY.in_edge, ARRAY.INs_Edges.data(), E_in * sizeof(int), cudaMemcpyHostToDevice); 103 | cudaMemcpy(ARRAY.out_edge, ARRAY.OUTs_Edges.data(), E_out * sizeof(int), cudaMemcpyHostToDevice); 104 | cudaMemcpy(ARRAY.all_edge, ARRAY.all_Edges.data(), E_all * sizeof(int), cudaMemcpyHostToDevice); 105 | cudaMemcpy(ARRAY.in_edge_weight, ARRAY.INs_Edge_weights.data(), E_in * sizeof(double), cudaMemcpyHostToDevice); 106 | cudaMemcpy(ARRAY.out_edge_weight, ARRAY.OUTs_Edge_weights.data(), E_out * sizeof(double), cudaMemcpyHostToDevice); 107 | 108 | return ARRAY; 109 | } 110 | 111 | template 112 | CSR_graph::~CSR_graph() 113 | { 114 | cudaFree(in_pointer); 115 | cudaFree(out_pointer); 116 | cudaFree(all_pointer); 117 | cudaFree(in_edge); 118 | cudaFree(out_edge); 119 | cudaFree(all_edge); 120 | cudaFree(in_edge_weight); 121 | cudaFree(out_edge_weight); 122 | } -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_connected_components.cuh: -------------------------------------------------------------------------------- 1 | #ifndef WCCG 2 | #define WCCG 3 | #include "cuda_runtime.h" 4 | #include 5 | #include "device_launch_parameters.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #define WCCG_THREAD_PER_BLOCK 512 14 | 15 | __global__ void parent_init(int *parent, int N) 16 | { 17 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 18 | 19 | if (tid < N) // tid decides process which vertex 20 | { 21 | parent[tid] = tid; // each vertex is initially labeled by itself 22 | } 23 | } 24 | __global__ void compress(int *parent, int N) 25 | { 26 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 27 | 28 | if (tid < N) // tid decides process which vertex 29 | { 30 | while (parent[tid] != parent[parent[tid]]) 31 | { 32 | parent[tid] = parent[parent[tid]]; 33 | } 34 | } 35 | } 36 | __global__ void get_freq(int *parent, int *freq, int N) 37 | { 38 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 39 | 40 | if (tid < N) // tid decides process which vertex 41 | atomicAdd(&freq[parent[tid]], 1); 42 | } 43 | 44 | 45 | __global__ void sampling(int *all_pointer, int *all_edge, int *parent, int N, int neighbor_round) 46 | { 47 | 48 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 49 | int u = tid; 50 | if (tid < N) // tid decides process which vertex 51 | { 52 | 53 | int i = all_pointer[u] + neighbor_round; 54 | if (i < all_pointer[u + 1]) 55 | { 56 | int v = all_edge[i]; 57 | int p1 = parent[u], p2 = parent[v]; 58 | while (p1 != p2) 59 | { // link 60 | int h = p1 > p2 ? p1 : p2; 61 | int l = p2 >= p1 ? p1 : p2; 62 | int check = atomicCAS(&parent[h], h, l); 63 | if (check == h) 64 | { 65 | break; 66 | } 67 | p1 = parent[parent[h]]; 68 | p2 = parent[l]; 69 | } 70 | } 71 | } 72 | } 73 | __global__ void full_link(int *all_pointer, int *all_edge, int *parent, int most, int N, int neighbor_round) 74 | { 75 | 76 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 77 | int u = tid; 78 | if (tid < N) // tid decides process which vertex 79 | { 80 | if (parent[tid] == most) 81 | { 82 | return; 83 | } 84 | 85 | for (int i = all_pointer[u] + neighbor_round; i < all_pointer[u + 1]; i++) 86 | { 87 | int v = all_edge[i]; 88 | int p1 = parent[u], p2 = parent[v]; 89 | while (p1 != p2) 90 | { // link 91 | int h = p1 > p2 ? p1 : p2; 92 | int l = p2 >= p1 ? p1 : p2; 93 | int check = atomicCAS(&parent[h], h, l); 94 | if (check == h) 95 | { 96 | break; 97 | } 98 | p1 = parent[parent[h]]; 99 | p2 = parent[l]; 100 | } 101 | } 102 | } 103 | } 104 | std::vector WCC_GPU(graph_structure &graph, CSR_graph &input_graph) 105 | { 106 | int N = graph.size(); // number of vertices in the graph 107 | 108 | dim3 init_label_block((N + WCCG_THREAD_PER_BLOCK - 1) / WCCG_THREAD_PER_BLOCK, 1, 1); // the number of blocks used in the gpu 109 | dim3 init_label_thread(WCCG_THREAD_PER_BLOCK, 1, 1); // the number of threads used in the gpu 110 | 111 | int *all_edge = input_graph.in_edge; // graph stored in csr format 112 | int *all_pointer = input_graph.in_pointer; 113 | int *parent = nullptr; 114 | int *freq = nullptr; 115 | cudaMallocManaged((void **)&parent, N * sizeof(int)); 116 | cudaMallocManaged((void **)&freq, N * sizeof(int)); 117 | cudaMemset(freq, 0, N * sizeof(int)); 118 | parent_init<<>>(parent, N); 119 | cudaDeviceSynchronize(); 120 | int it = 0, ITERATION = 2; // number of iterations 121 | while (it < ITERATION) // continue for a fixed number of iterations 122 | { 123 | sampling<<>>(all_pointer, all_edge, parent, N, it); 124 | cudaDeviceSynchronize(); 125 | compress<<>>(parent, N); 126 | cudaDeviceSynchronize(); 127 | it++; 128 | } 129 | get_freq<<>>(parent, freq, N); 130 | int *c = thrust::max_element(thrust::device, freq, freq + N); 131 | int most_f_element = *c; 132 | full_link<<>>(all_pointer, all_edge, parent, most_f_element, N, ITERATION); 133 | cudaDeviceSynchronize(); 134 | compress<<>>(parent, N); 135 | cudaDeviceSynchronize(); 136 | 137 | std::vector result(N); 138 | cudaMemcpy(result.data(), parent, N * sizeof(int), cudaMemcpyDeviceToHost); 139 | cudaFree(parent); 140 | cudaFree(freq); 141 | return result; 142 | } 143 | std::vector> Cuda_WCC(graph_structure &graph, CSR_graph &csr_graph) 144 | { 145 | std::vector wccVecGPU = WCC_GPU(graph, csr_graph); 146 | return graph.res_trans_id_id(wccVecGPU); 147 | } 148 | #endif -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_BFS.cuh: -------------------------------------------------------------------------------- 1 | #ifndef GPU_BFS 2 | #define GPU_BFS 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | 14 | // template 15 | std::vector cuda_bfs(CSR_graph &input_graph, int source_vertex, int max_depth = INT_MAX); 16 | 17 | std::vector> Cuda_Bfs(graph_structure &graph, CSR_graph &csr_graph, std::string src_v, int min_depth = 0, int max_depth = INT_MAX); 18 | 19 | __global__ void bfs_Relax(int *start, int *edge, int *depth, int *visited, int *queue, int *queue_size) 20 | { 21 | //Relax is performed on each queue node, which traverses all neighboring nodes of that round and relaxes the corresponding distance 22 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 23 | 24 | if (idx < *queue_size) 25 | { 26 | int v = queue[idx]; 27 | 28 | for (int i = start[v]; i < start[v + 1]; i++) 29 | { 30 | // Traverse adjacent edges 31 | int new_v = edge[i]; 32 | 33 | int new_depth = depth[v] + 1; 34 | 35 | int old = atomicMin(&depth[new_v], new_depth);//Update distance using atomic operations to avoid conflict 36 | if (new_depth < old) 37 | { 38 | visited[new_v] = 1; 39 | } 40 | } 41 | } 42 | } 43 | 44 | __global__ void bfs_CompactQueue(int V, int *next_queue, int *next_queue_size, int *visited) 45 | { 46 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 47 | if (idx < V && visited[idx]) 48 | { 49 | //If the node has been accessed in this round, it will be added to the queue for the next round 50 | int pos = atomicAdd(next_queue_size, 1); 51 | next_queue[pos] = idx; 52 | visited[idx] = 0; 53 | } 54 | } 55 | 56 | 57 | 58 | // template 59 | std::vector cuda_bfs(CSR_graph &input_graph, int source, int max_depth) 60 | { 61 | /* The GPU code for breadth first search uses queues to traverse the graph and record depth, 62 | which is also used to prevent duplicate traversal */ 63 | int V = input_graph.OUTs_Neighbor_start_pointers.size() - 1; 64 | int E = input_graph.OUTs_Edges.size(); 65 | 66 | int *depth; 67 | int *edge = input_graph.out_edge; 68 | 69 | int *start = input_graph.out_pointer; 70 | int *visited; 71 | 72 | int *queue, *next_queue; 73 | int *queue_size, *next_queue_size; 74 | 75 | cudaMallocManaged((void **)&depth, V * sizeof(int)); 76 | cudaMallocManaged((void **)&visited, V * sizeof(int)); 77 | cudaMallocManaged((void **)&queue, V * sizeof(int)); 78 | cudaMallocManaged((void **)&next_queue, V * sizeof(int)); 79 | cudaMallocManaged((void **)&queue_size, sizeof(int)); 80 | cudaMallocManaged((void **)&next_queue_size, sizeof(int)); 81 | 82 | cudaDeviceSynchronize(); 83 | 84 | cudaError_t cuda_status = cudaGetLastError(); 85 | if (cuda_status != cudaSuccess) 86 | { 87 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 88 | return std::vector(); 89 | } 90 | 91 | for (int i = 0; i < V; i++) 92 | { 93 | depth[i] = max_depth; 94 | visited[i] = 0; 95 | } 96 | depth[source] = 0; 97 | 98 | *queue_size = 1; // At first, there was only the root node in the queue 99 | queue[0] = source; 100 | *next_queue_size = 0; 101 | 102 | int threadsPerBlock = 1024; 103 | int numBlocks = 0; 104 | int QBlocks = (V + threadsPerBlock - 1) / threadsPerBlock; 105 | std::vector res(V, max_depth); 106 | 107 | while (*queue_size > 0) 108 | { 109 | numBlocks = (*queue_size + threadsPerBlock - 1) / threadsPerBlock; 110 | bfs_Relax<<>>(start, edge, depth, visited, queue, queue_size); 111 | cudaDeviceSynchronize(); 112 | 113 | cudaError_t cuda_status = cudaGetLastError(); 114 | if (cuda_status != cudaSuccess) 115 | { 116 | fprintf(stderr, "Relax kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 117 | return res; 118 | } 119 | 120 | bfs_CompactQueue<<>>(V, next_queue, next_queue_size, visited); 121 | cudaDeviceSynchronize(); 122 | 123 | cuda_status = cudaGetLastError(); 124 | if (cuda_status != cudaSuccess) 125 | { 126 | fprintf(stderr, "CompactQueue kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 127 | return res; 128 | } 129 | 130 | std::swap(queue, next_queue); 131 | *queue_size = *next_queue_size; 132 | *next_queue_size = 0; 133 | /* After each round of updates, exchange pointers between the new and old queues, 134 | using the new queue as the traversal queue for the next round and the old queue as the new queue for the next round */ 135 | } 136 | 137 | cudaMemcpy(res.data(), depth, V * sizeof(int), cudaMemcpyDeviceToHost); 138 | 139 | cudaFree(depth); 140 | cudaFree(visited); 141 | cudaFree(queue); 142 | cudaFree(next_queue); 143 | cudaFree(queue_size); 144 | cudaFree(next_queue_size); 145 | 146 | return res; 147 | } 148 | 149 | std::vector> Cuda_Bfs(graph_structure &graph, CSR_graph &csr_graph, std::string src_v, int min_depth, int max_depth) 150 | { 151 | int src_v_id = graph.vertex_str_to_id[src_v]; 152 | std::vector gpuBfsVec = cuda_bfs(csr_graph, src_v_id, max_depth); 153 | 154 | return graph.res_trans_id_val(gpuBfsVec); 155 | } 156 | 157 | #endif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RucGraph - a fast graph database system on CPU/GPU platforms 2 | 3 | RucGraph is a lightweight graph database system that uses both CPUs and GPUs to efficiently perform graph analyses, such as Shortest Path, PageRank, Community Detection etc. 4 | 5 | 6 | - "Ruc" is the abbreviation of "[Renmin University of China](https://www.ruc.edu.cn/)". 7 | 8 | 9 | - RucGraph works efficiently on large graphs with billions of vertices and edges. In particular, on [LDBC Graphalytics Benchmarks](https://ldbcouncil.org/benchmarks/graphalytics/), RucGraph is 10 times faster than [neo4j](https://neo4j.com) on CPUs, and 50 times faster than [neo4j](https://neo4j.com) on GPUs. 10 | 11 | 12 | ## Graph data structures & algorithms 13 | 14 | RucGraph is now using [Adjacency Lists](https://www.geeksforgeeks.org/adjacency-list-meaning-definition-in-dsa/) to store graphs in CPU memory, and using [Sparse Matrix Representations](https://www.geeksforgeeks.org/sparse-matrix-representations-set-3-csr/) (CSRs) to store graphs in GPU memory. 15 | 16 | More diversified functions, such as using Adjacency Lists in GPU memory, is now under development. 17 | 18 | 19 | We have implemented 5 graph analysis algorithms on both CPUs and GPUs to date: Breadth-First Search (BFS), PageRank (PR), Weakly Connected Components (WCC), Community Detection using Label Propagation (CDLP), Single-Source Shortest Paths (SSSP). The pseudo codes of these algorithms can be found in the end of [the LDBC Graphalytics Benchmark handbook](https://arxiv.org/pdf/2011.15028). Nevertheless, our implementations are optimized for parallel computation, and may be considerably different from these pseudo codes. 20 | 21 | 22 | ## Code File structures 23 | 24 | - `include/`: header files 25 | 26 | - `include/CPU_adj_list/`: header files for operating Adjacency Lists on CPUs 27 | 28 | - `include/CPU_adj_list/CPU_adj_list.hpp`: an Adjacency List on CPUs 29 | 30 | - `include/CPU_adj_list/algorithm/`: header files for graph analysis operators on CPUs, such as Shortest Path, PageRank, Community Detection operators; these operators have passed the LDBC Graphalytics Benchmark test 31 | 32 | 33 | 34 | - `include/GPU_csr/`: header files for operating CSRs on GPUs 35 | 36 | - `include/GPU_csr/GPU_csr.hpp`: a CSR on GPUs 37 | 38 | - `include/GPU_csr/algorithm/`: header files for graph analysis operators on GPUs, such as Shortest Path, PageRank, Community Detection operators; these operators have also passed the LDBC Graphalytics Benchmark test 39 | 40 | 41 | - `include/LDBC/`: header files for performing the LDBC Graphalytics Benchmark test 42 | 43 | 44 | 45 |
46 | 47 | 48 | - `src/`: source files 49 | - `src/CPU_adj_list/CPU_example.cpp`: an example of performing graph analysis operators on CPUs 50 | - `src/GPU_csr/GPU_example.cu`: an example of performing graph analysis operators on GPUs 51 | - `src/LDBC/LDBC_CPU_adj_list.cpp`: the source codes of performing the LDBC Graphalytics Benchmark test on CPUs 52 | - `src/LDBC/LDBC_GPU_csr.cu`: the source codes of performing the LDBC Graphalytics Benchmark test on GPUs 53 | 54 | 55 | 56 | ## Copy & Run 57 | 58 | Here, we show how to build & run RucGraph on a Linux server with the Ubuntu 20.04 system, 2 Intel(R) Xeon(R) Gold 5218 CPUs, and an NVIDIA GeForce RTX 3090 GPU. The environment is as follows. 59 | 60 | - `cmake --version`: cmake version 3.27.9 61 | - `g++ --version`: g++ (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 62 | - `nvidia-smi`: NVIDIA-SMI 550.54.14 / Driver Version: 550.54.14 / CUDA Version: 12.4 63 | 64 | 65 | First, download the files onto the server, e.g., onto the following path: `/home/username/RucGraph`. Second, enter the following commands on a terminal at this path: 66 | 67 | ```shell 68 | username@server:~/RucGraph$ mkdir build 69 | username@server:~/RucGraph$ cd build 70 | username@server:~/RucGraph/build$ cmake .. -DBUILD_CPU=ON -DBUILD_GPU=ON 71 | username@server:~/RucGraph/build$ make 72 | username@server:~/RucGraph/build$ ./bin_cpu/CPU_example 73 | username@server:~/RucGraph/build$ ./bin_gpu/GPU_example 74 | username@server:~/RucGraph/build$ ./bin_cpu/Test_CPU 75 | username@server:~/RucGraph/build$ ./bin_gpu/Test_GPU 76 | ``` 77 | 78 | There are some explanations for the above commands: 79 | 80 | - `-DBUILD_CPU=ON -DBUILD_GPU=ON` is to compile both CPU and GPU codes. If GPUs are not available, then we can change `-DBUILD_GPU=ON` to `-DBUILD_GPU=OFF`. 81 | 82 | 83 | - `./bin_cpu/CPU_example` is to run the source codes at `src/CPU_adj_list/CPU_example.cpp` 84 | 85 | - `./bin_gpu/GPU_example` is to run the source codes at `src/GPU_csr/GPU_example.cu` 86 | 87 | - `./bin_cpu/Test_CPU` is to run the source codes at `src/LDBC/LDBC_CPU_adj_list.cpp` 88 | 89 | - `./bin_gpu/Test_GPU` is to run the source codes at `src/LDBC/LDBC_GPU_csr.cu` 90 | 91 | We can run "CPU_example" and "GPU_example" without any graph dataset. The outputs of graph analysis operators will be printed on the terminal. 92 | 93 | Nevertheless, before running "Test_CPU" and "Test_GPU", we need to download the [LDBC Graphalytics datasets](https://repository.surfsara.nl/datasets/cwi/graphalytics) at first. Then, when running "Test_CPU" and "Test_GPU", the program will ask us to input the data path and name sequentially. 94 | ```shell 95 | Please input the data directory: # The program asks 96 | /home/username/data # Input the data path 97 | Please input the graph name: # The program asks 98 | datagen-7_5-fb # Input a data name 99 | ``` 100 | 101 | After inputting the data path and name, the program will perform the LDBC Graphalytics Benchmark test for this dataset. Specifically, the program will print some parameters of this test, as well as the consumed times of different graph analysis operators on this dataset. 102 | 103 | 104 | ## License 105 | 106 | RucGraph is released under the [Apache 2.0 license](LICENSE.txt). 107 | 108 | ## Contact 109 | 110 | Please contact yahuisun@ruc.edu.cn for any enquiry. 111 | 112 | 113 | -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_sssp_pre.cuh: -------------------------------------------------------------------------------- 1 | #ifndef SSSP_PRE_H 2 | #define SSSP_PRE_H 3 | 4 | #include 5 | 6 | #include "cuda_runtime.h" 7 | #include "device_launch_parameters.h" 8 | 9 | #include 10 | #include 11 | 12 | __global__ void Relax_pre(int* out_pointer, int* out_edge, double* out_edge_weight, double* dis, int* queue, int* queue_size, int* visited, int* pre, int* mutex) { 13 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 14 | 15 | if (idx < *queue_size) { 16 | int v = queue[idx]; 17 | 18 | for (int i = out_pointer[v]; i < out_pointer[v + 1]; i++) { 19 | int new_v = out_edge[i]; 20 | double weight = out_edge_weight[i]; 21 | 22 | double new_w = dis[v] + weight; 23 | 24 | // try doing relaxation 25 | // mutex is used 26 | while (atomicCAS(&mutex[new_v], 0, 1) != 0); 27 | 28 | if (new_w < dis[new_v]) { 29 | dis[new_v] = new_w; 30 | // update the previous vertex 31 | pre[new_v] = v; 32 | atomicExch(&visited[new_v], 1); 33 | // share the updated distance with other threads in different blocks 34 | __threadfence(); 35 | } 36 | 37 | atomicExch(&mutex[new_v], 0); 38 | } 39 | } 40 | } 41 | 42 | void gpu_sssp_pre(CSR_graph& input_graph, int source, std::vector& distance, std::vector& pre_v, double max_dis) { 43 | // the only difference between this function and the previous one is that we need to record the previous vertex 44 | int V = input_graph.OUTs_Neighbor_start_pointers.size() - 1; 45 | int E = input_graph.OUTs_Edges.size(); 46 | 47 | double* dis; 48 | int* out_edge = input_graph.out_edge; 49 | double* out_edge_weight = input_graph.out_edge_weight; 50 | int* out_pointer = input_graph.out_pointer; 51 | int* visited; 52 | 53 | int* queue, * next_queue; 54 | int* queue_size, * next_queue_size; 55 | int* mutex; 56 | int* pre; 57 | 58 | cudaMallocManaged((void**)&dis, V * sizeof(double)); 59 | cudaMallocManaged((void**)&visited, V * sizeof(int)); 60 | cudaMallocManaged((void**)&queue, V * sizeof(int)); 61 | cudaMallocManaged((void**)&next_queue, V * sizeof(int)); 62 | cudaMallocManaged((void**)&queue_size, sizeof(int)); 63 | cudaMallocManaged((void**)&next_queue_size, sizeof(int)); 64 | 65 | cudaMallocManaged((void**)&mutex, V * sizeof(int)); 66 | cudaMallocManaged((void**)&pre, V * sizeof(int)); 67 | 68 | cudaDeviceSynchronize(); 69 | cudaError_t cuda_status = cudaGetLastError(); 70 | if (cuda_status != cudaSuccess) { 71 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 72 | return; 73 | } 74 | 75 | for (int i = 0; i < V; i++) { 76 | dis[i] = max_dis; 77 | visited[i] = 0; 78 | mutex[i] = 0; 79 | pre[i] = -1; 80 | } 81 | dis[source] = 0; 82 | 83 | 84 | *queue_size = 1; 85 | queue[0] = source; 86 | *next_queue_size = 0; 87 | 88 | int threadsPerBlock = 1024; 89 | int numBlocks = 0; 90 | 91 | while (*queue_size > 0) { 92 | numBlocks = (*queue_size + threadsPerBlock - 1) / threadsPerBlock; 93 | Relax_pre <<< numBlocks, threadsPerBlock >>> (out_pointer, out_edge, out_edge_weight, dis, queue, queue_size, visited, pre, mutex); 94 | cudaDeviceSynchronize(); 95 | 96 | cudaError_t cuda_status = cudaGetLastError(); 97 | if (cuda_status != cudaSuccess) { 98 | fprintf(stderr, "Relax kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 99 | return; 100 | } 101 | 102 | numBlocks = (V + threadsPerBlock - 1) / threadsPerBlock; 103 | CompactQueue <<< numBlocks, threadsPerBlock >>> (V, next_queue, next_queue_size, visited); 104 | cudaDeviceSynchronize(); 105 | 106 | cuda_status = cudaGetLastError(); 107 | if (cuda_status != cudaSuccess) { 108 | fprintf(stderr, "CompactQueue kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 109 | return; 110 | } 111 | 112 | std::swap(queue, next_queue); 113 | 114 | *queue_size = *next_queue_size; 115 | *next_queue_size = 0; 116 | } 117 | 118 | cudaMemcpy(distance.data(), dis, V * sizeof(double), cudaMemcpyDeviceToHost); 119 | cudaMemcpy(pre_v.data(), pre, V * sizeof(int), cudaMemcpyDeviceToHost); 120 | 121 | cudaFree(dis); 122 | cudaFree(visited); 123 | cudaFree(queue); 124 | cudaFree(next_queue); 125 | cudaFree(queue_size); 126 | cudaFree(next_queue_size); 127 | cudaFree(mutex); 128 | cudaFree(pre); 129 | 130 | return; 131 | } 132 | 133 | std::vector> Cuda_SSSP_pre(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, std::vector& pre_v, double max_dis) { 134 | int src_v_id = graph.vertex_str_to_id[src_v]; 135 | std::vector gpuSSSPvec(graph.V, 0); 136 | pre_v.resize(graph.V); 137 | gpu_sssp_pre(csr_graph, src_v_id, gpuSSSPvec, pre_v, max_dis); 138 | 139 | // check the correctness of the previous vertex 140 | /*for (int i = 0; i < graph.V; i++) { 141 | double dis = gpuSSSPvec[i]; 142 | int pre = pre_v[i]; 143 | int now = i; 144 | double sum = 0; 145 | while (pre != -1) { 146 | bool ff = false; 147 | for (auto edge : graph.OUTs[pre]) { 148 | if (edge.first == now) { 149 | sum += edge.second; 150 | now = pre; 151 | pre = pre_v[pre]; 152 | ff = true; 153 | break; 154 | } 155 | } 156 | if (!ff) { 157 | std::cout << "Not found!" << std::endl; 158 | break; 159 | } 160 | } 161 | if (fabs(sum - dis) > 1e-4) { 162 | std::cout << "Error: pre_v is wrong!" << std::endl; 163 | std::cout << "sum: " << sum << " dis: " << dis << std::endl; 164 | } 165 | }*/ 166 | 167 | return graph.res_trans_id_val(gpuSSSPvec); 168 | } 169 | 170 | #endif -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_shortest_paths.cuh: -------------------------------------------------------------------------------- 1 | #ifndef WS_SSSP_H 2 | #define WS_SSSP_H 3 | 4 | #include 5 | 6 | #include "cuda_runtime.h" 7 | #include "device_launch_parameters.h" 8 | 9 | #include 10 | 11 | __device__ __forceinline__ double atomicMinDouble (double * addr, double value); 12 | 13 | __global__ void Relax(int* offsets, int* edges, double* weights, double* dis, int* queue, int* queue_size, int* visited); 14 | __global__ void CompactQueue(int V, int* next_queue, int* next_queue_size, int* visited); 15 | void gpu_shortest_paths(CSR_graph& input_graph, int source, std::vector& distance, double max_dis = 10000000000); 16 | void gpu_sssp_pre(CSR_graph& input_graph, int source, std::vector& distance, std::vector& pre_v, double max_dis = 10000000000); 17 | 18 | std::vector> Cuda_SSSP(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, double max_dis = 10000000000); 19 | std::vector> Cuda_SSSP_pre(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, std::vector& pre_v, double max_dis = 10000000000); 20 | 21 | // this function is used to get the minimum value of double type atomically 22 | __device__ __forceinline__ double atomicMinDouble (double * addr, double value) { 23 | double old; 24 | old = __longlong_as_double(atomicMin((long long *)addr, __double_as_longlong(value))); 25 | return old; 26 | } 27 | 28 | __global__ void Relax(int* out_pointer, int* out_edge, double* out_edge_weight, double* dis, int* queue, int* queue_size, int* visited) { 29 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 30 | 31 | if (idx < *queue_size) { 32 | int v = queue[idx]; 33 | 34 | // for all adjacent vertices 35 | for (int i = out_pointer[v]; i < out_pointer[v + 1]; i++) { 36 | int new_v = out_edge[i]; 37 | double weight = out_edge_weight[i]; 38 | 39 | double new_w = dis[v] + weight; 40 | 41 | // try doing relaxation 42 | double old = atomicMinDouble(&dis[new_v], new_w); 43 | 44 | if (old <= new_w) 45 | continue; 46 | 47 | // if the distance is updated, set the vertex as visited 48 | atomicExch(&visited[new_v], 1); 49 | } 50 | } 51 | } 52 | 53 | __global__ void CompactQueue(int V, int* next_queue, int* next_queue_size, int* visited) { 54 | // this function is used to ensure that each necessary vertex is only pushed into the queue once 55 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 56 | if (idx < V && visited[idx]) { 57 | int pos = atomicAdd(next_queue_size, 1); 58 | next_queue[pos] = idx; 59 | // reset the visited flag 60 | visited[idx] = 0; 61 | } 62 | } 63 | 64 | void gpu_shortest_paths(CSR_graph& input_graph, int source, std::vector& distance, double max_dis) { 65 | int V = input_graph.OUTs_Neighbor_start_pointers.size() - 1; 66 | int E = input_graph.OUTs_Edges.size(); 67 | 68 | double* dis; 69 | int* out_edge = input_graph.out_edge; 70 | double* out_edge_weight = input_graph.out_edge_weight; 71 | int* out_pointer = input_graph.out_pointer; 72 | int* visited; 73 | 74 | int* queue, * next_queue; 75 | int* queue_size, * next_queue_size; 76 | 77 | // allocate memory on GPU 78 | cudaMallocManaged((void**)&dis, V * sizeof(double)); 79 | cudaMallocManaged((void**)&visited, V * sizeof(int)); 80 | cudaMallocManaged((void**)&queue, V * sizeof(int)); 81 | cudaMallocManaged((void**)&next_queue, V * sizeof(int)); 82 | cudaMallocManaged((void**)&queue_size, sizeof(int)); 83 | cudaMallocManaged((void**)&next_queue_size, sizeof(int)); 84 | 85 | // synchronize the device 86 | cudaDeviceSynchronize(); 87 | cudaError_t cuda_status = cudaGetLastError(); 88 | if (cuda_status != cudaSuccess) { 89 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 90 | return; 91 | } 92 | 93 | for (int i = 0; i < V; i++) { 94 | // initialize the distance array and visited array 95 | dis[i] = max_dis; 96 | visited[i] = 0; 97 | } 98 | dis[source] = 0; 99 | 100 | 101 | *queue_size = 1; 102 | queue[0] = source; 103 | *next_queue_size = 0; 104 | 105 | int threadsPerBlock = 1024; 106 | int numBlocks = 0; 107 | 108 | while (*queue_size > 0) { 109 | numBlocks = (*queue_size + threadsPerBlock - 1) / threadsPerBlock; 110 | // launch the kernel function to relax the edges 111 | Relax <<< numBlocks, threadsPerBlock >>> (out_pointer, out_edge, out_edge_weight, dis, queue, queue_size, visited); 112 | cudaDeviceSynchronize(); 113 | 114 | cudaError_t cuda_status = cudaGetLastError(); 115 | if (cuda_status != cudaSuccess) { 116 | fprintf(stderr, "Relax kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 117 | return; 118 | } 119 | 120 | numBlocks = (V + threadsPerBlock - 1) / threadsPerBlock; 121 | // do the compact operation 122 | CompactQueue <<< numBlocks, threadsPerBlock >>> (V, next_queue, next_queue_size, visited); 123 | cudaDeviceSynchronize(); 124 | 125 | cuda_status = cudaGetLastError(); 126 | if (cuda_status != cudaSuccess) { 127 | fprintf(stderr, "CompactQueue kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 128 | return; 129 | } 130 | 131 | // swap the queue and next_queue 132 | std::swap(queue, next_queue); 133 | 134 | *queue_size = *next_queue_size; 135 | *next_queue_size = 0; 136 | } 137 | 138 | cudaMemcpy(distance.data(), dis, V * sizeof(double), cudaMemcpyDeviceToHost); 139 | 140 | // free the memory 141 | cudaFree(dis); 142 | cudaFree(visited); 143 | cudaFree(queue); 144 | cudaFree(next_queue); 145 | cudaFree(queue_size); 146 | cudaFree(next_queue_size); 147 | 148 | return; 149 | } 150 | 151 | std::vector> Cuda_SSSP(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, double max_dis) { 152 | int src_v_id = graph.vertex_str_to_id[src_v]; 153 | std::vector gpuSSSPvec(graph.V, 0); 154 | gpu_shortest_paths(csr_graph, src_v_id, gpuSSSPvec, max_dis); 155 | 156 | // transfer the vertex id to vertex name 157 | return graph.res_trans_id_val(gpuSSSPvec); 158 | } 159 | 160 | #endif -------------------------------------------------------------------------------- /src/LDBC/LDBC_CPU_adj_list.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | int main() 14 | { 15 | ios::sync_with_stdio(false); 16 | std::cin.tie(0); 17 | std::cout.tie(0); 18 | 19 | //freopen("../input.txt", "r", stdin); 20 | 21 | std::string directory; 22 | std::cout << "Please input the data directory: " << std::endl; 23 | std::cin >> directory; 24 | 25 | if (directory.back() != '/') 26 | directory += "/"; 27 | 28 | std::string graph_name; 29 | std::cout << "Please input the graph name: " << std::endl; 30 | std::cin >> graph_name; 31 | 32 | std::string config_file_path = directory + graph_name + ".properties"; 33 | 34 | LDBC graph(directory, graph_name); 35 | graph.read_config(config_file_path); //Read the ldbc configuration file to obtain key parameter information in the file 36 | 37 | auto begin = std::chrono::high_resolution_clock::now(); 38 | graph.load_graph(); //Read the vertex and edge files corresponding to the configuration file, // The vertex information in graph is converted to csr format for storage 39 | auto end = std::chrono::high_resolution_clock::now(); 40 | double load_ldbc_time = std::chrono::duration_cast(end - begin).count() / 1e9; // s 41 | printf("load_ldbc_time cost time: %f s\n", load_ldbc_time); 42 | 43 | std::vector> result_all; 44 | 45 | if (1) { 46 | if (graph.sup_bfs) { 47 | double cpu_bfs_time = 0; 48 | 49 | try{ 50 | std::vector> cpu_bfs_result; 51 | begin = std::chrono::high_resolution_clock::now(); 52 | cpu_bfs_result = CPU_Bfs(graph, graph.bfs_src_name); 53 | end = std::chrono::high_resolution_clock::now(); 54 | cpu_bfs_time = std::chrono::duration_cast(end - begin).count() / 1e9; 55 | printf("CPU BFS cost time: %f s\n", cpu_bfs_time); 56 | 57 | if(Bfs_checker(graph, cpu_bfs_result, graph.base_path + "-BFS")) 58 | result_all.push_back(std::make_pair("BFS", std::to_string(cpu_bfs_time))); 59 | else 60 | result_all.push_back(std::make_pair("BFS", "wrong")); 61 | } 62 | catch(...) { 63 | result_all.push_back(std::make_pair("BFS", "failed!")); 64 | } 65 | } 66 | else 67 | result_all.push_back(std::make_pair("BFS", "N/A")); 68 | } 69 | 70 | if (1) { 71 | if (graph.sup_sssp) { 72 | double cpu_sssp_time = 0; 73 | 74 | try { 75 | //std::vector pre_v; 76 | begin = std::chrono::high_resolution_clock::now(); 77 | std::vector> cpu_sssp_result = CPU_SSSP(graph, graph.sssp_src_name); 78 | //std::vector> cpu_sssp_result = CPU_SSSP_pre(graph, graph.sssp_src_name, pre_v); 79 | end = std::chrono::high_resolution_clock::now(); 80 | /*std::vector> path = path_query(graph, graph.sssp_src_name, "338", pre_v); 81 | for (auto p : path) 82 | std::cout << p.first << " -> " << p.second << std::endl;*/ 83 | cpu_sssp_time = std::chrono::duration_cast(end - begin).count() / 1e9; 84 | printf("CPU SSSP cost time: %f s\n", cpu_sssp_time); 85 | 86 | if (SSSP_checker(graph, cpu_sssp_result, graph.base_path + "-SSSP")) 87 | result_all.push_back(std::make_pair("SSSP", std::to_string(cpu_sssp_time))); 88 | else 89 | result_all.push_back(std::make_pair("SSSP", "wrong")); 90 | } 91 | catch(...) { 92 | result_all.push_back(std::make_pair("SSSP", "failed!")); 93 | } 94 | } 95 | else 96 | result_all.push_back(std::make_pair("SSSP", "N/A")); 97 | } 98 | 99 | if (1) { 100 | if (graph.sup_wcc) { 101 | double cpu_wcc_time = 0; 102 | 103 | try { 104 | begin = std::chrono::high_resolution_clock::now(); 105 | std::vector> cpu_wcc_result = CPU_WCC(graph); 106 | end = std::chrono::high_resolution_clock::now(); 107 | cpu_wcc_time = std::chrono::duration_cast(end - begin).count() / 1e9; 108 | printf("CPU WCC cost time: %f s\n", cpu_wcc_time); 109 | 110 | if (WCC_checker(graph, cpu_wcc_result, graph.base_path + "-WCC")) 111 | result_all.push_back(std::make_pair("WCC", std::to_string(cpu_wcc_time))); 112 | else 113 | result_all.push_back(std::make_pair("WCC", "wrong")); 114 | } 115 | catch(...) { 116 | result_all.push_back(std::make_pair("WCC", "failed!")); 117 | } 118 | } 119 | else 120 | result_all.push_back(std::make_pair("WCC", "N/A")); 121 | } 122 | 123 | if (1) { 124 | if (graph.sup_pr) { 125 | double cpu_pr_time = 0; 126 | 127 | try { 128 | begin = std::chrono::high_resolution_clock::now(); 129 | std::vector> cpu_pr_result = CPU_PR(graph, graph.pr_its, graph.pr_damping); 130 | end = std::chrono::high_resolution_clock::now(); 131 | cpu_pr_time = std::chrono::duration_cast(end - begin).count() / 1e9; 132 | printf("CPU PageRank cost time: %f s\n", cpu_pr_time); 133 | 134 | if (PR_checker(graph, cpu_pr_result, graph.base_path + "-PR")) 135 | result_all.push_back(std::make_pair("PageRank", std::to_string(cpu_pr_time))); 136 | else 137 | result_all.push_back(std::make_pair("PageRank", "wrong")); 138 | } 139 | catch(...) { 140 | result_all.push_back(std::make_pair("PageRank", "failed!")); 141 | } 142 | } 143 | else 144 | result_all.push_back(std::make_pair("PageRank", "N/A")); 145 | } 146 | 147 | if (1) { 148 | if (graph.sup_cdlp) { 149 | double cpu_cdlp_time = 0; 150 | 151 | try { 152 | begin = std::chrono::high_resolution_clock::now(); 153 | std::vector> cpu_cdlp_result = CPU_CDLP(graph, graph.cdlp_max_its); 154 | end = std::chrono::high_resolution_clock::now(); 155 | cpu_cdlp_time = std::chrono::duration_cast(end - begin).count() / 1e9; 156 | printf("CPU Community Detection cost time: %f s\n", cpu_cdlp_time); 157 | 158 | if (CDLP_checker(graph, cpu_cdlp_result, graph.base_path + "-CDLP")) 159 | result_all.push_back(std::make_pair("CommunityDetection", std::to_string(cpu_cdlp_time))); 160 | else 161 | result_all.push_back(std::make_pair("CommunityDetection", "wrong")); 162 | } 163 | catch(...) { 164 | result_all.push_back(std::make_pair("CommunityDetection", "failed!")); 165 | } 166 | } 167 | else 168 | result_all.push_back(std::make_pair("CommunityDetection", "N/A")); 169 | } 170 | 171 | std::cout << "Result: " << std::endl; 172 | int res_size = result_all.size(); 173 | for (int i = 0; i < res_size; i++) { 174 | std::cout << result_all[i].second; 175 | if (i != res_size - 1) 176 | std::cout << ","; 177 | } 178 | std::cout << std::endl; 179 | 180 | graph.save_to_CSV(result_all, "./result-cpu.csv"); 181 | 182 | //freopen("/dev/tty", "r", stdin); 183 | 184 | return 0; 185 | } 186 | -------------------------------------------------------------------------------- /include/CPU_adj_list/CPU_adj_list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | template // weight_type may be int, long long int, float, double... 17 | class graph_structure 18 | { 19 | //The data structure used on the CPU provides common operations such as adding nodes and edges on the graph 20 | 21 | /* Special note that the actual labels of nodes on the graph may be unordered. 22 | In the end, we provide some functions for converting from continuous labels (used in operators) to actual labels (original data) */ 23 | public: 24 | int V = 0; // the number of vertices 25 | long long E = 0; // the number of edges 26 | 27 | // OUTs[u] = v means there is an edge starting from u to v 28 | std::vector>> OUTs; 29 | // INs is transpose of OUTs. INs[u] = v means there is an edge starting from v to u 30 | std::vector>> INs; 31 | 32 | /*constructors*/ 33 | graph_structure() {} 34 | graph_structure(int n) 35 | { 36 | V = n; 37 | OUTs.resize(n); // initialize n vertices 38 | INs.resize(n); 39 | } 40 | int size() 41 | { 42 | return V; 43 | } 44 | 45 | /*class member functions*/ 46 | inline void add_edge(int, int, weight_type); // this function can change edge weights 47 | 48 | inline void remove_edge(int, int);//Remove any edge that connects two vertices 49 | inline void remove_edge(std::string, std::string);//Remove any edge that connects two vertices 50 | inline void remove_all_adjacent_edges(int);//Remove all edges, the input params is vertex numbers 51 | 52 | inline bool contain_edge(int, int); // whether there is an edge 53 | inline weight_type edge_weight(int, int); //get edge weight 54 | inline long long int edge_number(); // the total number of edges 55 | 56 | inline void print();//print graph 57 | inline void clear();// clear graph 58 | inline int out_degree(int);//get graph out degree 59 | inline int in_degree(int);//get graph in degree 60 | 61 | std::unordered_map vertex_str_to_id; // vertex_str_to_id[vertex_name] = vertex_id 62 | std::vector> vertex_id_to_str; // vertex_id_to_str[vertex_id].first = vertex_name, vertex_id_to_str[vertex_id].second = whether the vertex is valid 63 | 64 | std::queue invalid_vertex_id; // store the invalid vertex id 65 | 66 | int add_vertice(std::string); // Read the vertex information as a string 67 | void remove_vertice(std::string); // Remove the vertex information as a string 68 | void add_edge(std::string, std::string, weight_type); 69 | 70 | template 71 | std::vector> res_trans_id_val(std::vector &res); 72 | std::vector> res_trans_id_id(std::vector &wcc_res); 73 | }; 74 | 75 | /*class member functions*/ 76 | 77 | template 78 | int graph_structure::add_vertice(std::string vertex) 79 | { 80 | if (vertex_str_to_id.find(vertex) == vertex_str_to_id.end()) 81 | { 82 | if (invalid_vertex_id.empty()) { 83 | vertex_id_to_str.push_back(std::make_pair(vertex, true)); 84 | vertex_str_to_id[vertex] = V++; 85 | std::vector> x; 86 | OUTs.push_back(x); 87 | INs.push_back(x); 88 | } 89 | else { 90 | int v = invalid_vertex_id.front(); 91 | invalid_vertex_id.pop(); 92 | vertex_id_to_str[v].first = vertex; 93 | vertex_id_to_str[v].second = true; 94 | vertex_str_to_id[vertex] = v; 95 | 96 | std::cout << "Recover vertex id " << v << std::endl; 97 | } 98 | } 99 | return vertex_str_to_id[vertex]; 100 | } 101 | 102 | template 103 | void graph_structure::remove_vertice(std::string vertex) { 104 | // if the vertex is not exist, return 105 | if (vertex_str_to_id.find(vertex) == vertex_str_to_id.end()) { 106 | std::cerr << "vertex " << vertex << " not exist!" << std::endl; 107 | return; 108 | } 109 | int v = vertex_str_to_id[vertex]; 110 | remove_all_adjacent_edges(v); 111 | vertex_str_to_id.erase(vertex); 112 | vertex_id_to_str[v].second = false; 113 | invalid_vertex_id.push(v); 114 | } 115 | 116 | template 117 | void graph_structure::add_edge(int e1, int e2, weight_type ec) 118 | { 119 | sorted_vector_binary_operations_insert(OUTs[e1], e2, ec); 120 | sorted_vector_binary_operations_insert(INs[e2], e1, ec); 121 | } 122 | 123 | template 124 | void graph_structure::add_edge(std::string e1, std::string e2, weight_type ec) 125 | { 126 | E++; 127 | int v1 = add_vertice(e1); 128 | int v2 = add_vertice(e2); 129 | add_edge(v1, v2, ec); 130 | } 131 | 132 | template 133 | void graph_structure::remove_edge(int e1, int e2) 134 | { 135 | sorted_vector_binary_operations_erase(OUTs[e1], e2); 136 | sorted_vector_binary_operations_erase(INs[e2], e1); 137 | } 138 | 139 | template 140 | void graph_structure::remove_edge(std::string e1, std::string e2) 141 | { 142 | if (vertex_str_to_id.find(e1) == vertex_str_to_id.end()) { 143 | std::cerr << "vertex " << e1 << " not exist!" << std::endl; 144 | return; 145 | } 146 | if (vertex_str_to_id.find(e2) == vertex_str_to_id.end()) { 147 | std::cerr << "vertex " << e2 << " not exist!" << std::endl; 148 | return; 149 | } 150 | int v1 = vertex_str_to_id[e1]; 151 | int v2 = vertex_str_to_id[e2]; 152 | remove_edge(v1, v2); 153 | } 154 | 155 | template 156 | void graph_structure::remove_all_adjacent_edges(int v) 157 | { 158 | for (auto it = OUTs[v].begin(); it != OUTs[v].end(); it++) 159 | sorted_vector_binary_operations_erase(INs[it->first], v); 160 | 161 | for (auto it = INs[v].begin(); it != INs[v].end(); it++) 162 | sorted_vector_binary_operations_erase(OUTs[it->first], v); 163 | 164 | std::vector>().swap(OUTs[v]); 165 | std::vector>().swap(INs[v]); 166 | } 167 | 168 | template 169 | bool graph_structure::contain_edge(int e1, int e2) 170 | { 171 | return sorted_vector_binary_operations_search(OUTs[e1], e2); 172 | } 173 | template 174 | weight_type graph_structure::edge_weight(int e1, int e2) 175 | { 176 | return sorted_vector_binary_operations_search_weight(OUTs[e1], e2); 177 | } 178 | template 179 | long long int graph_structure::edge_number() 180 | { 181 | long long int num = 0; 182 | for (auto it : OUTs) 183 | num = num + it.size(); 184 | 185 | return num; 186 | } 187 | template 188 | void graph_structure::clear() 189 | { 190 | std::vector>>().swap(OUTs); 191 | std::vector>>().swap(INs); 192 | } 193 | template 194 | int graph_structure::out_degree(int v) 195 | { 196 | return OUTs[v].size(); 197 | } 198 | template 199 | int graph_structure::in_degree(int v) 200 | { 201 | return INs[v].size(); 202 | } 203 | template 204 | void graph_structure::print() 205 | { 206 | 207 | std::cout << "graph_structure_print:" << std::endl; 208 | 209 | for (int i = 0; i < V; i++) 210 | { 211 | std::cout << "Vertex " << i << " OUTs List: "; 212 | int v_size = OUTs[i].size(); 213 | for (int j = 0; j < v_size; j++) 214 | { 215 | std::cout << "<" << OUTs[i][j].first << "," << OUTs[i][j].second << "> "; 216 | } 217 | std::cout << std::endl; 218 | } 219 | std::cout << "graph_structure_print END" << std::endl; 220 | } 221 | 222 | template 223 | template 224 | std::vector> graph_structure::res_trans_id_val(std::vector &res) 225 | { 226 | std::vector> res_str; 227 | int res_size = res.size(); 228 | for (int i = 0; i < res_size; i++) { 229 | if (vertex_id_to_str[i].second) 230 | res_str.push_back(std::make_pair(vertex_id_to_str[i].first, res[i])); 231 | } 232 | 233 | return res_str; 234 | } 235 | 236 | template 237 | std::vector> graph_structure::res_trans_id_id(std::vector &wcc_res) 238 | { 239 | std::vector> res_str; 240 | int res_size = wcc_res.size(); 241 | for (int i = 0; i < res_size; i++) { 242 | if (vertex_id_to_str[i].second) { 243 | if (vertex_id_to_str[wcc_res[i]].second) 244 | res_str.push_back(std::make_pair(vertex_id_to_str[i].first, vertex_id_to_str[wcc_res[i]].first)); 245 | else 246 | std::cerr << "vertex " << vertex_id_to_str[wcc_res[i]].first << " not exist!" << std::endl; 247 | } 248 | } 249 | 250 | return res_str; 251 | } 252 | -------------------------------------------------------------------------------- /src/LDBC/LDBC_GPU_csr.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | int main() 16 | { 17 | std::ios::sync_with_stdio(false); 18 | std::cin.tie(0); 19 | std::cout.tie(0); 20 | 21 | //freopen("../input.txt", "r", stdin); 22 | 23 | std::vector> result_all; 24 | 25 | std::string directory; 26 | std::cout << "Please input the data directory: " << std::endl; 27 | std::cin >> directory; 28 | 29 | if (directory.back() != '/') 30 | directory += "/"; 31 | 32 | std::string graph_name; 33 | std::cout << "Please input the graph name: " << std::endl; 34 | std::cin >> graph_name; 35 | 36 | std::string config_file_path = directory + graph_name + ".properties"; 37 | 38 | LDBC graph(directory, graph_name); 39 | graph.read_config(config_file_path); //Read the ldbc configuration file to obtain key parameter information in the file 40 | 41 | auto begin = std::chrono::high_resolution_clock::now(); 42 | graph.load_graph(); //Read the vertex and edge files corresponding to the configuration file, // The vertex information in graph is converted to csr format for storage 43 | auto end = std::chrono::high_resolution_clock::now(); 44 | double load_ldbc_time = std::chrono::duration_cast(end - begin).count() / 1e9; // s 45 | printf("load_ldbc_time cost time: %f s\n", load_ldbc_time); 46 | 47 | begin = std::chrono::high_resolution_clock::now(); 48 | CSR_graph csr_graph = toCSR(graph); 49 | end = std::chrono::high_resolution_clock::now(); 50 | double graph_to_csr_time = std::chrono::duration_cast(end - begin).count() / 1e9; // s 51 | std::cout << "Number of vertices: " << csr_graph.OUTs_Neighbor_start_pointers.size()-1 << std::endl; 52 | std::cout << "Number of edges: " << csr_graph.OUTs_Edges.size() << std::endl; 53 | printf("graph_to_csr_time cost time: %f s\n", graph_to_csr_time); 54 | 55 | if (1) { 56 | if (graph.sup_bfs) { 57 | double gpu_bfs_time = 0; 58 | 59 | try { 60 | std::vector> bfs_result; 61 | begin = std::chrono::high_resolution_clock::now(); 62 | bfs_result = Cuda_Bfs(graph, csr_graph, graph.bfs_src_name); 63 | end = std::chrono::high_resolution_clock::now(); 64 | gpu_bfs_time = std::chrono::duration_cast(end - begin).count() / 1e9; 65 | printf("GPU BFS cost time: %f s\n", gpu_bfs_time); 66 | 67 | if (Bfs_checker(graph, bfs_result, graph.base_path + "-BFS")) 68 | result_all.push_back(std::make_pair("BFS", std::to_string(gpu_bfs_time))); 69 | else 70 | result_all.push_back(std::make_pair("BFS", "wrong")); 71 | } 72 | catch (...) { 73 | result_all.push_back(std::make_pair("BFS", "failed!")); 74 | } 75 | } 76 | else 77 | result_all.push_back(std::make_pair("BFS", "N/A")); 78 | 79 | } 80 | 81 | if (1) { 82 | if (graph.sup_sssp) { 83 | double gpu_sssp_time = 0; 84 | 85 | try { 86 | std::vector> sssp_result; 87 | //std::vector pre_v; 88 | begin = std::chrono::high_resolution_clock::now(); 89 | sssp_result = Cuda_SSSP(graph, csr_graph, graph.sssp_src_name, std::numeric_limits::max()); 90 | //sssp_result = Cuda_SSSP_pre(graph, csr_graph, graph.sssp_src_name, pre_v, std::numeric_limits::max()); 91 | end = std::chrono::high_resolution_clock::now(); 92 | gpu_sssp_time = std::chrono::duration_cast(end - begin).count() / 1e9; 93 | printf("GPU SSSP cost time: %f s\n", gpu_sssp_time); 94 | if (SSSP_checker(graph, sssp_result, graph.base_path + "-SSSP")) 95 | result_all.push_back(std::make_pair("SSSP", std::to_string(gpu_sssp_time))); 96 | else 97 | result_all.push_back(std::make_pair("SSSP", "wrong")); 98 | 99 | /*std::vector> path = path_query(graph, graph.sssp_src_name, "338", pre_v); 100 | for (auto p : path) { 101 | std::cout << p.first << "->" << p.second << std::endl; 102 | }*/ 103 | } 104 | catch (...) { 105 | result_all.push_back(std::make_pair("SSSP", "failed!")); 106 | } 107 | } 108 | else 109 | result_all.push_back(std::make_pair("SSSP", "N/A")); 110 | } 111 | 112 | if (1) { 113 | if (graph.sup_wcc) { 114 | double gpu_wcc_time = 0; 115 | 116 | try { 117 | std::vector> wcc_result; 118 | begin = std::chrono::high_resolution_clock::now(); 119 | wcc_result = Cuda_WCC(graph, csr_graph); 120 | end = std::chrono::high_resolution_clock::now(); 121 | gpu_wcc_time = std::chrono::duration_cast(end - begin).count() / 1e9; 122 | printf("GPU WCC cost time: %f s\n", gpu_wcc_time); 123 | if (WCC_checker(graph, wcc_result, graph.base_path + "-WCC")) 124 | result_all.push_back(std::make_pair("WCC", std::to_string(gpu_wcc_time))); 125 | else 126 | result_all.push_back(std::make_pair("WCC", "wrong")); 127 | } 128 | catch (...) { 129 | result_all.push_back(std::make_pair("WCC", "failed!")); 130 | } 131 | } 132 | else 133 | result_all.push_back(std::make_pair("WCC", "N/A")); 134 | } 135 | 136 | if (1) { 137 | if (graph.sup_pr) { 138 | double gpu_pr_time = 0; 139 | 140 | try { 141 | std::vector> pr_result; 142 | begin = std::chrono::high_resolution_clock::now(); 143 | pr_result = Cuda_PR(graph, csr_graph, graph.pr_its, graph.pr_damping); 144 | end = std::chrono::high_resolution_clock::now(); 145 | gpu_pr_time = std::chrono::duration_cast(end - begin).count() / 1e9; 146 | printf("GPU PageRank cost time: %f s\n", gpu_pr_time); 147 | if (PR_checker(graph, pr_result, graph.base_path + "-PR")) 148 | result_all.push_back(std::make_pair("PR", std::to_string(gpu_pr_time))); 149 | else 150 | result_all.push_back(std::make_pair("PR", "wrong")); 151 | } 152 | catch (...) { 153 | result_all.push_back(std::make_pair("PR", "failed!")); 154 | } 155 | } 156 | else 157 | result_all.push_back(std::make_pair("PR", "N/A")); 158 | } 159 | 160 | if (1) { 161 | if (graph.sup_cdlp) { 162 | double gpu_cdlp_time = 0; 163 | 164 | try { 165 | std::vector> cdlp_result; 166 | begin = std::chrono::high_resolution_clock::now(); 167 | cdlp_result = Cuda_CDLP(graph, csr_graph, graph.cdlp_max_its); 168 | end = std::chrono::high_resolution_clock::now(); 169 | gpu_cdlp_time = std::chrono::duration_cast(end - begin).count() / 1e9; 170 | printf("GPU Community Detection cost time: %f s\n", gpu_cdlp_time); 171 | if (CDLP_checker(graph, cdlp_result, graph.base_path + "-CDLP")) 172 | result_all.push_back(std::make_pair("CDLP", std::to_string(gpu_cdlp_time))); 173 | else 174 | result_all.push_back(std::make_pair("CDLP", "wrong")); 175 | } 176 | catch (...) { 177 | result_all.push_back(std::make_pair("CDLP", "failed!")); 178 | } 179 | } 180 | else 181 | result_all.push_back(std::make_pair("CDLP", "N/A")); 182 | } 183 | 184 | std::cout << "Result: " << std::endl; 185 | int res_size = result_all.size(); 186 | for (int i = 0; i < res_size; i++) { 187 | std::cout << result_all[i].second; 188 | if (i != res_size - 1) 189 | std::cout << ","; 190 | } 191 | std::cout << std::endl; 192 | 193 | graph.save_to_CSV(result_all, "./result-gpu.csv"); 194 | 195 | //freopen("/dev/tty", "r", stdin); 196 | 197 | return 0; 198 | } 199 | -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_Community_Detection.cuh: -------------------------------------------------------------------------------- 1 | #ifndef CDLPGPU 2 | #define CDLPGPU 3 | 4 | #include "cuda_runtime.h" 5 | #include 6 | #include "device_launch_parameters.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | using namespace std; 17 | #define CD_THREAD_PER_BLOCK 512 18 | 19 | __global__ void Label_init(int *labels, int *all_pointer, int N); 20 | __global__ void LabelPropagation(int *all_pointer, int *prop_labels, int *labels, int *all_edge, int N); 21 | __global__ void Get_New_Label(int *all_pointer, int *prop_labels, int *new_labels, int N); 22 | void checkCudaError(cudaError_t err, const char* msg); 23 | void checkDeviceProperties(); 24 | 25 | void CDLP_GPU(graph_structure& graph, CSR_graph& input_graph, std::vector& res, int max_iterations); 26 | 27 | std::vector> Cuda_CDLP(graph_structure& graph, CSR_graph& input_graph, int max_iterations); 28 | 29 | // propagate the label, the label of the neighbor vertex is propagated in parallel 30 | __global__ void LabelPropagation(int *all_pointer, int *prop_labels, int *labels, int *all_edge, int N) 31 | { 32 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 33 | 34 | if (tid >= 0 && tid < N) 35 | { 36 | for (int c = all_pointer[tid]; c < all_pointer[tid + 1]; c++) // traverse the neighbor of the tid vertex 37 | { 38 | prop_labels[c] = labels[all_edge[c]]; // record the label of the neighbor vertex 39 | } 40 | } 41 | } 42 | 43 | // Initialize all labels at once with GPU.Initially 44 | // each vertex v is assigned a unique label which matches its identifier. 45 | __global__ void Label_init(int *labels, int *all_pointer, int N) 46 | { 47 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 48 | 49 | if (tid >= 0 && tid < N) // tid decides process which vertex 50 | { 51 | labels[tid] = tid; // each vertex is initially labeled by itself 52 | } 53 | } 54 | 55 | // each thread is responsible for one vertex 56 | // every segmentation are sorted 57 | // count Frequency from the start in the global_space_for_label to the end in the global_space_for_label 58 | // the new labels are stroed in the new_labels 59 | __global__ void Get_New_Label(int *all_pointer, int *prop_labels, int *new_labels, int N) 60 | { 61 | // Use GPU to propagate all labels at the same time. 62 | int tid = blockDim.x * blockIdx.x + threadIdx.x; // tid decides process which vertex 63 | if (tid >= 0 && tid < N) { 64 | int maxlabel = prop_labels[all_pointer[tid]], maxcount = 0; // the label that appears the most times and its number of occurrences 65 | for (int c = all_pointer[tid], last_label = prop_labels[all_pointer[tid]], last_count = 0; c < all_pointer[tid + 1]; c++) // traverse the neighbor vertex label data in order 66 | { 67 | if (prop_labels[c] == last_label) 68 | { 69 | last_count++; // add up the number of label occurrences 70 | if (last_count > maxcount) // the number of label occurrences currently traversed is greater than the recorded value 71 | { 72 | maxcount = last_count; // update maxcount and maxlabel 73 | maxlabel = last_label; 74 | } 75 | } 76 | else 77 | { 78 | last_label = prop_labels[c]; // a new label appears, updates the label and number of occurrences 79 | last_count = 1; 80 | } 81 | } 82 | new_labels[tid] = maxlabel; // record the maxlabel 83 | } 84 | } 85 | 86 | // Community Detection Using Label Propagation on GPU 87 | // Returns label of the graph based on the graph and number of iterations. 88 | void CDLP_GPU(graph_structure& graph, CSR_graph& input_graph, std::vector& res, int max_iterations) 89 | { 90 | int N = graph.size(); // number of vertices in the graph 91 | dim3 init_label_block((N + CD_THREAD_PER_BLOCK - 1) / CD_THREAD_PER_BLOCK, 1, 1); // the number of blocks used in the gpu 92 | dim3 init_label_thread(CD_THREAD_PER_BLOCK, 1, 1); // the number of threads used in the gpu 93 | 94 | int* all_edge = input_graph.all_edge; // graph stored in csr format 95 | int* all_pointer = input_graph.all_pointer; 96 | 97 | int* prop_labels = nullptr; 98 | int* new_prop_labels = nullptr; 99 | int* new_labels = nullptr; 100 | int* labels = nullptr; 101 | 102 | int CD_ITERATION = max_iterations; // fixed number of iterations 103 | long long E = input_graph.E_all; // number of edges in the graph 104 | cudaMallocManaged((void**)&new_labels, N * sizeof(int)); 105 | cudaMallocManaged((void**)&labels, N * sizeof(int)); 106 | cudaMallocManaged((void**)&prop_labels, E * sizeof(int)); 107 | cudaMallocManaged((void**)&new_prop_labels, E * sizeof(int)); 108 | 109 | cudaDeviceSynchronize(); // synchronize, ensure the cudaMalloc is complete 110 | cudaError_t cuda_status = cudaGetLastError(); 111 | if (cuda_status != cudaSuccess) // use the cudaGetLastError to check for possible cudaMalloc errors 112 | { 113 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 114 | return; 115 | } 116 | 117 | Label_init<<>>(labels, all_pointer, N); // initialize all labels at once with GPU 118 | 119 | cudaDeviceSynchronize(); // synchronize, ensure the label initialization is complete 120 | cuda_status = cudaGetLastError(); 121 | if (cuda_status != cudaSuccess) // use the cudaGetLastError to check for possible label initialization errors 122 | { 123 | fprintf(stderr, "Label init failed: %s\n", cudaGetErrorString(cuda_status)); 124 | return; 125 | } 126 | 127 | int it = 0; // number of iterations 128 | // Determine temporary device storage requirements 129 | void *d_temp_storage = NULL; 130 | size_t temp_storage_bytes = 0; 131 | cub::DeviceSegmentedSort::SortKeys( 132 | d_temp_storage, temp_storage_bytes, prop_labels, new_prop_labels, 133 | E, N, all_pointer, all_pointer + 1); // sort the labels of each vertex's neighbors 134 | 135 | cudaDeviceSynchronize(); 136 | cuda_status = cudaGetLastError(); 137 | if (cuda_status != cudaSuccess) 138 | { 139 | fprintf(stderr, "Sort failed: %s\n", cudaGetErrorString(cuda_status)); 140 | return; 141 | } 142 | 143 | cudaError_t err = cudaMalloc(&d_temp_storage, temp_storage_bytes); 144 | if (err != cudaSuccess) 145 | { 146 | cerr << "Error: " << "Malloc failed" << " (" << cudaGetErrorString(err) << ")" << endl; 147 | return; 148 | } 149 | 150 | while (it < CD_ITERATION) // continue for a fixed number of iterations 151 | { 152 | LabelPropagation<<>>(all_pointer, prop_labels, labels, all_edge, N); // calculate the neighbor label array for each vertex 153 | cudaDeviceSynchronize(); // synchronize, ensure the label propagation is complete 154 | 155 | cuda_status = cudaGetLastError(); // check for errors 156 | if (cuda_status != cudaSuccess) { 157 | fprintf(stderr, "LabelPropagation failed: %s\n", cudaGetErrorString(cuda_status)); 158 | return; 159 | } 160 | 161 | // Run sorting operation 162 | cub::DeviceSegmentedSort::SortKeys( 163 | d_temp_storage, temp_storage_bytes, prop_labels, new_prop_labels, 164 | E, N, all_pointer, all_pointer + 1); // sort the labels of each vertex's neighbors 165 | cudaDeviceSynchronize(); 166 | 167 | cuda_status = cudaGetLastError(); // check for errors 168 | if (cuda_status != cudaSuccess) { 169 | fprintf(stderr, "Sort failed: %s\n", cudaGetErrorString(cuda_status)); 170 | return; 171 | } 172 | 173 | Get_New_Label<<>>(all_pointer, new_prop_labels, new_labels, N); // generate a new vertex label by label propagation information 174 | 175 | cudaDeviceSynchronize(); 176 | 177 | cuda_status = cudaGetLastError(); // check for errors 178 | if (cuda_status != cudaSuccess) { 179 | fprintf(stderr, "Get_New_Label failed: %s\n", cudaGetErrorString(cuda_status)); 180 | return; 181 | } 182 | 183 | it++; // record number of iterations 184 | std::swap(labels, new_labels); // store the updated label in the labels 185 | } 186 | cudaFree(prop_labels); // free memory 187 | cudaFree(new_prop_labels); 188 | cudaFree(new_labels); 189 | cudaFree(d_temp_storage); 190 | 191 | res.resize(N); 192 | 193 | for (int i = 0; i < N; i++) 194 | { 195 | res[i] = graph.vertex_id_to_str[labels[i]].first; // convert the label to string and store it in res 196 | } 197 | 198 | cudaFree(labels); 199 | } 200 | 201 | // check whether cuda errors occur and output error information 202 | void checkCudaError(cudaError_t err, const char *msg) 203 | { 204 | if (err != cudaSuccess) 205 | { 206 | cerr << "Error: " << msg << " (" << cudaGetErrorString(err) << ")" << endl; // output error message 207 | exit(EXIT_FAILURE); 208 | } 209 | } 210 | 211 | // Community Detection Using Label Propagation on GPU 212 | // Returns label of the graph based on the graph and number of iterations. 213 | // the type of the vertex and label are string 214 | std::vector> Cuda_CDLP(graph_structure& graph, CSR_graph& input_graph, int max_iterations) { 215 | std::vector result; 216 | CDLP_GPU(graph, input_graph, result, max_iterations); // get the labels of each vertex. vector index is the id of vertex 217 | 218 | std::vector> res; 219 | int size = result.size(); 220 | for (int i = 0; i < size; i++) 221 | res.push_back(std::make_pair(graph.vertex_id_to_str[i].first, result[i])); // for each vertex, get its string number and store it in res 222 | 223 | return res; // return the results 224 | } 225 | 226 | #endif -------------------------------------------------------------------------------- /include/LDBC/ldbc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // the LDBC class records the graph operation parameters and graph structure 5 | // defines functions for reading configuration files and graph structure files 6 | template 7 | class LDBC : public graph_structure { 8 | public: 9 | // class initializer 10 | LDBC() : graph_structure() {} 11 | LDBC(int n) : graph_structure(n) {} 12 | LDBC(std::string directory, std::string name) : graph_structure() { 13 | this->base_path = directory + name; 14 | } 15 | 16 | bool is_directed = true; // direct graph or undirect graph 17 | bool is_weight = false; // weight graph or no weight graph 18 | bool is_sssp_weight = true; // the weight of sssp 19 | 20 | bool sup_bfs = false; // records the graph operator to be computed 21 | bool sup_cdlp = false; 22 | bool sup_pr = false; 23 | bool sup_wcc = false; 24 | bool sup_sssp = false; 25 | std::string bfs_src_name; // get bfs vertex source 26 | std::string sssp_src_name; // get sssp vertex source 27 | std::string base_path; // graph structure file storage path 28 | int bfs_src = 0; // define bfs vertex source is 0 29 | int cdlp_max_its = 10; // cdlp algo max iterator num 30 | int pr_its = 10; // pr algo iterator num 31 | int sssp_src = 0; // define sssp vertex source is 0 32 | double pr_damping = 0.85; // pr algorithm damping coefficient 33 | 34 | void load_graph(); 35 | void read_config(std::string config_path); 36 | 37 | void save_to_CSV(std::vector>& res, std::string file_path); 38 | }; 39 | 40 | // read config file 41 | // the specific information includes the number of edges, the number of vertices, the parameters of graph operators, etc 42 | template 43 | void LDBC::read_config(std::string config_path) { 44 | std::ifstream file(config_path); 45 | std::string line; 46 | 47 | if (!file.is_open()) { // unable to open file 48 | std::cerr << "Unable to open file: " << config_path << std::endl; 49 | return; 50 | } 51 | 52 | std::cout << "Reading config file..." << std::endl; 53 | 54 | while (getline(file, line)) { // read the config file line-by-line 55 | if (line.empty() || line[0] == '#') // invalid information, blank lines or comment lines 56 | continue; 57 | 58 | auto split_str = parse_string(line, " = "); // read configuration entries and their configuration values 59 | 60 | if (split_str.size() != 2) { 61 | std::cerr << "Invalid line: " << line << std::endl; 62 | continue; 63 | } 64 | 65 | auto key = split_str[0]; // configuration entry 66 | auto value = split_str[1]; // configuration value 67 | 68 | auto parts = parse_string(key, "."); 69 | if (parts.size() >= 2) { 70 | if (parts.back() == "vertex-file") // Reading *.properties file to get vertex file. eg. datagen-7_5-fb.v 71 | std::cout << "vertex_file: " << value << std::endl; 72 | else if (parts.back() == "edge-file") // Reading *.properties file to get edge file 73 | std::cout << "edge_file: " << value << std::endl; 74 | else if (parts.back() == "vertices") // Reading *.properties file to get the number of vertices 75 | std::cout << "V: " << value << std::endl; 76 | else if (parts.back() == "edges") // Reading *.properties file to get the number of edges 77 | std::cout << "E: " << value << std::endl; 78 | else if (parts.back() == "directed") { // Reading *.properties file to knows whether the graph is directed or undirected 79 | if (value == "false") 80 | this->is_directed = false; 81 | else 82 | this->is_directed = true; 83 | std::cout << "is_directed: " << this->is_directed << std::endl; 84 | } 85 | else if (parts.back() == "names") {//eg. graph.datagen-7_5-fb.edge-properties.names = weight 86 | if (value == "weight") 87 | this->is_weight = true; 88 | else 89 | this->is_weight = false; 90 | std::cout << "is_weight: " << this->is_weight << std::endl; 91 | } 92 | else if (parts.back() == "algorithms") { // gets the type of algorithm contained in the configuration file 93 | auto algorithms = parse_string(value, ", "); 94 | for (auto& algorithm : algorithms) { 95 | if (algorithm == "bfs") 96 | sup_bfs = true; 97 | else if (algorithm == "cdlp") 98 | sup_cdlp = true; 99 | else if (algorithm == "pr") 100 | sup_pr = true; 101 | else if (algorithm == "sssp") 102 | sup_sssp = true; 103 | else if (algorithm == "wcc") 104 | sup_wcc = true; 105 | } 106 | std::cout << "bfs: " << sup_bfs << std::endl; 107 | std::cout << "cdlp: " << sup_cdlp << std::endl; 108 | std::cout << "pr: " << sup_pr << std::endl; 109 | std::cout << "sssp: " << sup_sssp << std::endl; 110 | std::cout << "wcc: " << sup_wcc << std::endl; 111 | } 112 | else if (parts.back() == "cdlp-max-iterations") { // iteration parameters in Community Detection 113 | cdlp_max_its = stoi(value); 114 | std::cout << "cdlp_max_its: " << cdlp_max_its << std::endl; 115 | } 116 | else if (parts.back() == "pr-damping-factor") { // damping factor in PageRank 117 | pr_damping = stod(value); 118 | std::cout << "pr_damping: " << pr_damping << std::endl; 119 | } 120 | else if (parts.back() == "pr-num-iterations") { // number of iterations in PageRank 121 | pr_its = stoi(value); 122 | std::cout << "pr_its: " << pr_its << std::endl; 123 | } 124 | else if (parts.back() == "sssp-weight-property") { // weight property in sssp 125 | if (value == "weight") 126 | this->is_sssp_weight = true; 127 | else 128 | this->is_sssp_weight = false; 129 | std::cout << "is_sssp_weight: " << this->is_sssp_weight << std::endl; 130 | } 131 | else if (parts.back() == "max-iterations") { 132 | cdlp_max_its = stoi(value); 133 | std::cout << "cdlp_max_its: " << cdlp_max_its << std::endl; 134 | } 135 | else if (parts.back() == "damping-factor") { 136 | pr_damping = stod(value); 137 | std::cout << "pr_damping: " << pr_damping << std::endl; 138 | } 139 | else if (parts.back() == "num-iterations") { 140 | pr_its = stoi(value); 141 | std::cout << "pr_its: " << pr_its << std::endl; 142 | } 143 | else if (parts.back() == "weight-property") { 144 | if (value == "weight") 145 | this->is_sssp_weight = true; 146 | else 147 | this->is_sssp_weight = false; 148 | std::cout << "is_sssp_weight: " << this->is_sssp_weight << std::endl; 149 | } 150 | else if (parts.back() == "source-vertex") { 151 | if (parts[parts.size() - 2] == "bfs") { 152 | bfs_src_name = value; // get bfs source vertex; eg. graph.datagen-7_5-fb.bfs.source-vertex = 6 153 | std::cout << "bfs_source_vertex: " << value << std::endl; 154 | } 155 | else { 156 | sssp_src_name = value; // get sssp source vertex; eg. graph.datagen-7_5-fb.sssp.source-vertex = 6 157 | std::cout << "sssp_source_vertex: " << value << std::endl; 158 | } 159 | } 160 | } 161 | } 162 | std::cout << "Done." << std::endl; // read complete 163 | file.close(); 164 | } 165 | 166 | // read the structure of the graph, including vertices and edges 167 | template 168 | void LDBC::load_graph() { 169 | 170 | std::string vertex_file_path; 171 | vertex_file_path = this->base_path + ".v"; // file with ".v" suffix stores vertices information 172 | 173 | std::cout << "Loading vertices..." << std::endl; 174 | std::string line_content; 175 | std::ifstream myfile(vertex_file_path); // open the vertex data file 176 | 177 | if (myfile.is_open()) { 178 | while (getline(myfile, line_content)) // read data line by line 179 | this->add_vertice(line_content); // Parsed the read data 180 | myfile.close(); 181 | } 182 | else { // Unable to open file 183 | std::cout << "Unable to open file " << vertex_file_path << std::endl 184 | << "Please check the file location or file name." << std::endl; 185 | getchar(); 186 | exit(1); 187 | } 188 | 189 | std::cout << "Done." << std::endl; 190 | if (sup_bfs) { 191 | if (this->vertex_str_to_id.find(bfs_src_name) == this->vertex_str_to_id.end()) { // bfs_src_name from read_configure 192 | std::cout << "Invalid source vertex for BFS" << std::endl; 193 | getchar(); 194 | exit(1); 195 | } 196 | else 197 | bfs_src = this->vertex_str_to_id[bfs_src_name]; 198 | } 199 | 200 | if (sup_sssp) { 201 | if (this->vertex_str_to_id.find(sssp_src_name) == this->vertex_str_to_id.end()) { // sssp_src_name from read_configure 202 | std::cout << "Invalid source vertex for SSSP" << std::endl; 203 | getchar(); 204 | exit(1); 205 | } 206 | else 207 | sssp_src = this->vertex_str_to_id[sssp_src_name]; 208 | } 209 | 210 | std::string edge_file_path; 211 | edge_file_path = this->base_path + ".e"; // file with ".e" suffix stores edges information 212 | 213 | std::cout << "Loading edges..." << std::endl; 214 | myfile.open(edge_file_path); // open the edge data file 215 | 216 | if (myfile.is_open()) { 217 | while (getline(myfile, line_content)) { // read data line by line 218 | std::vector Parsed_content = parse_string(line_content, " "); 219 | if (Parsed_content.size() < 2) { 220 | std::cerr << "Invalid edge input!" << std::endl; 221 | continue; 222 | } 223 | weight_type ec = Parsed_content.size() > 2 ? std::stod(Parsed_content[2]) : 1; // get weight 224 | this->add_edge(Parsed_content[0], Parsed_content[1], ec); // add edge 225 | if (!is_directed) { // undirected graphs require additional opposite edges 226 | this->add_edge(Parsed_content[1], Parsed_content[0], ec); 227 | } 228 | } 229 | myfile.close(); 230 | } 231 | else { // Unable to open file 232 | std::cout << "Unable to open file " << edge_file_path << std::endl 233 | << "Please check the file location or file name." << std::endl; 234 | getchar(); 235 | exit(1); 236 | } 237 | std::cout << "Done." << std::endl; 238 | } 239 | 240 | // save the results in csv format to the given path 241 | template 242 | void LDBC::save_to_CSV(std::vector>& res, std::string file_path) { 243 | std::ofstream out(file_path, std::ios::app); 244 | 245 | int res_size = res.size(); 246 | for (int i = 0; i < res_size; i++) { 247 | out << res[i].second; 248 | if (i != res_size - 1) 249 | out << ","; 250 | } 251 | out << std::endl; 252 | 253 | out.close(); 254 | } 255 | -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_PageRank.cuh: -------------------------------------------------------------------------------- 1 | // PageRank_update.cuh 2 | #ifndef PAGERANK_CUH_ 3 | #define PAGERANK_CUH_ 4 | 5 | #include "cuda_runtime.h" 6 | #include 7 | #include "device_launch_parameters.h" 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | // Constants 15 | #define SMALL_BOUND 6 16 | #define NORMAL_BOUND 96 17 | #define THREAD_PER_BLOCK 512 18 | 19 | // Function prototypes 20 | 21 | // CUDA kernels 22 | __device__ double _atomicAdd(double* address, double val); 23 | __global__ void importance(double *npr, double *pr, double damp, int *in_edge, int *in_pointer, int GRAPHSIZE); 24 | __global__ void calculate_sink(double *pr, int *N_out_zero_gpu, int out_zero_size, double *sink_sum); 25 | __global__ void initialization(double *pr, double *outs, int *out_pointer, int N); 26 | __global__ void calculate_acc(double *pr,int *in_edge, int begin,int end,double *acc); 27 | __global__ void Antecedent_division(double *pr,double *npr, double *outs,double redi_tele, int N); 28 | 29 | void GPU_PR(graph_structure &graph, CSR_graph& csr_graph, vector &result, int iterations, double damping); 30 | 31 | std::vector> Cuda_PR(graph_structure &graph, CSR_graph &csr_graph, int iterations, double damping); 32 | 33 | // The gpu version of the pagerank algorithm 34 | // return the pagerank of each vertex based on the graph, damping factor and number of iterations. 35 | // the pagerank value is stored in the results 36 | void GPU_PR (graph_structure &graph, CSR_graph& csr_graph, vector& result, int iterations, double damping) 37 | { 38 | int N = graph.V; // number of vertices in the graph 39 | double teleport = (1 - damping) / N; // teleport mechanism 40 | 41 | int* in_pointer = csr_graph.in_pointer; 42 | int* out_pointer = csr_graph.out_pointer; 43 | int* in_edge = csr_graph.in_edge; 44 | int* sink_vertex_gpu = nullptr; 45 | double* sink_sum = nullptr; 46 | double* pr = nullptr; 47 | double* npr = nullptr; 48 | double* outs = nullptr; 49 | 50 | dim3 blockPerGrid, threadPerGrid; 51 | 52 | vector sink_vertexs; 53 | 54 | cudaMallocManaged(&outs, N * sizeof(double)); 55 | cudaMallocManaged(&sink_sum, sizeof(double)); 56 | cudaMallocManaged(&npr, N * sizeof(double)); 57 | cudaMallocManaged(&pr, N * sizeof(double)); 58 | 59 | cudaDeviceSynchronize(); // synchronize, ensure the cudaMalloc is complete 60 | 61 | cudaError_t cuda_status = cudaGetLastError(); 62 | if (cuda_status != cudaSuccess) // use the cudaGetLastError to check for possible cudaMalloc errors 63 | { 64 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 65 | return; 66 | } 67 | 68 | for (int i = 0; i < N; i++) // traverse all vertices 69 | { 70 | if (graph.OUTs[i].size()==0) // this means that the vertex has no edges 71 | { 72 | sink_vertexs.push_back(i); // record the sink vertices 73 | } 74 | } 75 | int out_zero_size = sink_vertexs.size(); // the number of sink vertices 76 | cudaMallocManaged(&sink_vertex_gpu, sink_vertexs.size() * sizeof(int)); 77 | cudaDeviceSynchronize(); 78 | cudaMemcpy(sink_vertex_gpu, sink_vertexs.data(), sink_vertexs.size() * sizeof(int), cudaMemcpyHostToDevice); 79 | 80 | cuda_status = cudaGetLastError(); 81 | if (cuda_status != cudaSuccess) 82 | { 83 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 84 | return; 85 | } 86 | 87 | blockPerGrid.x = (N + THREAD_PER_BLOCK - 1) / THREAD_PER_BLOCK; // the number of blocks used in the gpu 88 | threadPerGrid.x = THREAD_PER_BLOCK; // the number of threads used in the gpu 89 | 90 | int iteration = 0; // number of iterations 91 | 92 | initialization<<>>(pr, outs, out_pointer, N); // initializes the pagerank and calculates the reciprocal of the out-degree 93 | cudaDeviceSynchronize(); 94 | while (iteration < iterations) // continue for a fixed number of iterations 95 | { 96 | *sink_sum = 0; 97 | calculate_sink<<>>(pr, sink_vertex_gpu, out_zero_size, sink_sum); // calculate the sinksum 98 | cudaDeviceSynchronize(); 99 | *sink_sum = (*sink_sum) * damping / N; // the redistributed value of sink vertices 100 | Antecedent_division<<>>(pr, npr, outs, teleport + (*sink_sum), N); 101 | cudaDeviceSynchronize(); 102 | importance<<>>(npr, pr, damping, in_edge, in_pointer, N); // calculate importance 103 | cudaDeviceSynchronize(); 104 | 105 | std::swap(pr, npr); // store the updated pagerank in the rank 106 | iteration++; 107 | } 108 | 109 | result.resize(N); 110 | cudaMemcpy(result.data(), pr, N * sizeof(double), cudaMemcpyDeviceToHost); // get gpu PR algorithm result 111 | 112 | cudaFree(pr); // free memory 113 | cudaFree(npr); 114 | cudaFree(outs); 115 | cudaFree(sink_vertex_gpu); 116 | cudaFree(sink_sum); 117 | } 118 | 119 | // initialization of the pagerank state 120 | __global__ void initialization(double *pr, double *outs, int *out_pointer, int N) 121 | { 122 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 123 | if (tid >= 0 && tid < N) 124 | { 125 | pr[tid] = 1 / N; // the initial pagerank is 1/N 126 | if (out_pointer[tid + 1] - out_pointer[tid]) // determine whether the vertex has out-edge 127 | outs[tid] = 1 / (out_pointer[tid + 1] - out_pointer[tid]); // calculate the reciprocal of the out-degree of each vertex to facilitate subsequent calculations 128 | else 129 | outs[tid] = 0; // consider importance value to be 0 for sink vertices 130 | } 131 | } 132 | 133 | // compute division in advance, pr(u)/Nout(u), which is used to calculate the importance value 134 | __global__ void Antecedent_division(double *pr,double *npr, double *outs,double redi_tele, int N) 135 | { 136 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 137 | if (tid >= 0 && tid < N) 138 | { 139 | pr[tid] *= outs[tid]; 140 | npr[tid] = redi_tele; // the sum of redistributed value and teleport value 141 | } 142 | } 143 | 144 | // calculate importance 145 | __global__ void importance(double *npr, double *pr, double damp, int *in_edge, int *in_pointer, int GRAPHSIZE) 146 | { 147 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 148 | 149 | if (tid >= 0 && tid < GRAPHSIZE) 150 | { 151 | // begin and end of in edges 152 | double acc = 0; // sum of u belongs to Nin(v) 153 | for (int c = in_pointer[tid]; c < in_pointer[tid + 1]; c++) 154 | { // val_col[c] is neighbor,rank get PR(u) row_value is denominator i.e. Nout 155 | acc += pr[in_edge[c]]; 156 | } 157 | npr[tid] = acc * damp; // scaling is damping factor 158 | } 159 | return; 160 | } 161 | 162 | // A reduction pattern was used to sum up 163 | // the sum of the pagerank values of the incoming edges is calculated 164 | __global__ void calculate_acc(double *pr,int *in_edge, int begin,int end,double *acc){ 165 | extern __shared__ double temp[]; // Declare shared memory 166 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 167 | int stid = threadIdx.x; 168 | 169 | if (tid < end) 170 | { 171 | temp[stid] = pr[in_edge[tid+begin]]; // the pagerank value of the incoming edge 172 | } 173 | else 174 | { 175 | temp[stid] = 0; 176 | } 177 | __syncthreads(); // wait unitl finish Loading data into shared memory 178 | 179 | for (int i = blockDim.x / 2; i > 0; i >>= 1) // get the sum of sink by reducing kernel function 180 | { 181 | if (stid < i) 182 | { 183 | temp[stid] += temp[stid + i]; 184 | } 185 | __syncthreads(); // Synchronize again to ensure that each step of the reduction operation is completed 186 | } 187 | if (stid == 0) 188 | { 189 | _atomicAdd(acc, temp[0]); // Write the result of each thread block into the output array 190 | } 191 | } 192 | 193 | // A reduction pattern was used to sum up the sink value 194 | __global__ void calculate_sink(double *pr, int *N_out_zero_gpu, int out_zero_size, double *sink_sum) 195 | { 196 | extern __shared__ double sink[]; // Declare shared memory 197 | int tid = blockIdx.x * blockDim.x + threadIdx.x; 198 | int stid = threadIdx.x; 199 | 200 | if (tid < out_zero_size) 201 | { 202 | sink[stid] = pr[N_out_zero_gpu[tid]]; // get PR(w) 203 | } 204 | else 205 | { 206 | sink[stid] = 0; // not the out-degree zero vertex 207 | } 208 | __syncthreads(); // wait unitl finish Loading data into shared memory 209 | 210 | for (int i = blockDim.x / 2; i > 0; i >>= 1) // get the sum of sink by reducing kernel function 211 | { 212 | if (stid < i) 213 | { 214 | sink[stid] += sink[stid + i]; 215 | } 216 | __syncthreads(); // Synchronize again to ensure that each step of the reduction operation is completed 217 | } 218 | if (stid == 0) 219 | { 220 | _atomicAdd(sink_sum, sink[0]); // Write the result of each thread block into the output array 221 | } 222 | } 223 | 224 | // Implementing atomic operations, 225 | // that is, ensuring that adding operations to a specific 226 | // memory location in a multi-threaded environment are thread safe. 227 | __device__ double _atomicAdd(double *address, double val) 228 | { 229 | unsigned long long int *address_as_ull = (unsigned long long int *)address; 230 | unsigned long long int old = *address_as_ull, assumed; 231 | do 232 | { 233 | assumed = old; 234 | old = atomicCAS(address_as_ull, assumed, 235 | __double_as_longlong(val + __longlong_as_double(assumed))); 236 | } while (assumed != old); 237 | return __longlong_as_double(old); 238 | } 239 | 240 | // PageRank Algorithm on GPU 241 | // return the pagerank of each vertex based on the graph, damping factor and number of iterations. 242 | // the type of the vertex and pagerank are string 243 | std::vector> Cuda_PR(graph_structure &graph, CSR_graph &csr_graph, int iterations, double damping){ 244 | std::vector result; 245 | GPU_PR(graph, csr_graph, result, iterations, damping); // get the pagerank in double type 246 | return graph.res_trans_id_val(result); // return the results in string type 247 | } 248 | 249 | #endif // PAGERANK_CUH_ 250 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /include/LDBC/checker.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | bool compare(std::vector& a, std::vector& b) { 10 | return a[0] < b[0]; 11 | } 12 | 13 | // checker for the bfs graph operator 14 | // return check results(true or false) that based on graphs, results, and baseline. 15 | bool Bfs_checker(graph_structure& graph, std::vector>& res, std::string base_line_file) { 16 | 17 | int size = res.size(); // get the result size 18 | 19 | if (size != graph.V) { // the result size does not match the graph size 20 | std::cout << "Size of BFS results is not equal to the number of vertices!" << std::endl; 21 | return false; 22 | } 23 | 24 | std::ifstream base_line(base_line_file); // read the baseline file 25 | 26 | if (!base_line.is_open()) { // failed to open the baseline file 27 | std::cout << "Baseline file not found!" << std::endl; 28 | return false; 29 | } 30 | 31 | std::vector id_res(graph.V, -1); 32 | for (auto &p : res) 33 | id_res[graph.vertex_str_to_id[p.first]] = p.second; // convert vertex id of string type to integer and stores in id-res 34 | 35 | int id = 0; 36 | std::string line; 37 | while (std::getline(base_line, line)) { // check each item in the baseline file 38 | std::vector tokens; 39 | tokens = parse_string(line, " "); 40 | if (tokens.size() != 2) { // Baseline file format error 41 | std::cout << "Baseline file format error!" << std::endl; 42 | base_line.close(); 43 | return false; 44 | } 45 | if (id >= size) { // id >= size means that more files are read from baseline than in results 46 | std::cout << "Size of baseline file is larger than the result!" << std::endl; 47 | base_line.close(); 48 | return false; 49 | } 50 | if (graph.vertex_str_to_id.find(tokens[0]) == graph.vertex_str_to_id.end()) { // the vertex cannot be found in results 51 | std::cout << "Baseline file contains a vertex that is not in the graph!" << std::endl; 52 | base_line.close(); 53 | return false; 54 | } 55 | int v_id = graph.vertex_str_to_id[tokens[0]]; 56 | if (id_res[v_id] != std::stol(tokens[1])) { // the results are different from the baseline 57 | if (!(id_res[v_id] == INT_MAX && std::stol(tokens[1]) == LLONG_MAX)) { // make sure it's not different because of the maximum value 58 | std::cout << "Baseline file and GPU BFS results are not the same!" << std::endl; 59 | std::cout << "Baseline file: " << tokens[0] << " " << tokens[1] << std::endl; 60 | std::cout << "BFS result: " << graph.vertex_id_to_str[v_id].first << " " << id_res[v_id] << std::endl; 61 | base_line.close(); 62 | return false; 63 | } 64 | } 65 | id++; 66 | } 67 | if (id != size) { // id != size means that more reults item than baseline 68 | std::cout << "Size of baseline file is smaller than the result!" << std::endl; 69 | base_line.close(); 70 | return false; 71 | } 72 | 73 | std::cout << "BFS results are correct!" << std::endl; 74 | base_line.close(); 75 | return true; // BFS results are correct, return true 76 | } 77 | 78 | void set_root(std::vector& parent, int v) { 79 | if (parent[v] == v) 80 | return; 81 | set_root(parent, parent[v]); 82 | parent[v] = parent[parent[v]]; 83 | } 84 | 85 | // checker for the WCC graph operator 86 | // return check results(true or false) that based on graphs, results, and baseline. 87 | bool WCC_checker(graph_structure& graph, std::vector>& res, std::string base_line_file) { 88 | std::vector> temp; // record the connection components for each vertex in the results 89 | temp.resize(graph.V); 90 | for (auto &p : res) 91 | temp[graph.vertex_str_to_id[p.second]].push_back(graph.vertex_str_to_id[p.first]); // add the vertex to its appropriate Weakly Connected Components 92 | std::vector> components; // vector components[i] indicate that vertices in the i-th connection components in results 93 | for (int i = 0; i < graph.V; i++) { 94 | if (temp[i].size() > 0) 95 | components.push_back(temp[i]); // extract every Weakly Connected Components from temp 96 | } 97 | 98 | int size = components.size(); 99 | for (auto &v : components) { 100 | if (!v.size()) { 101 | std::cout << "One of WCC results is empty!" << std::endl; 102 | return false; 103 | } 104 | std::sort(v.begin(), v.end()); // sort the vertices of the same connected components 105 | } 106 | 107 | std::sort(components.begin(), components.end(), compare); 108 | 109 | std::ifstream base_line(base_line_file); 110 | 111 | if (!base_line.is_open()) { // failed to open the baseline file 112 | std::cout << "Baseline file not found!" << std::endl; 113 | return false; 114 | } 115 | 116 | std::vector> base_res; // vector base_res[i] indicate that vertices in the i-th connection components in baseline 117 | std::vector base_components; // record the connection components for each vertex in the baseline 118 | 119 | base_components.resize(graph.V, 0); 120 | 121 | std::string line; 122 | 123 | while (std::getline(base_line, line)) { // read the baseline line by line 124 | std::vector tokens; 125 | tokens = parse_string(line, " "); 126 | if (tokens.size() != 2) { // Baseline file format error 127 | std::cout << "Baseline file format error!" << std::endl; 128 | base_line.close(); 129 | return false; 130 | } 131 | base_components[graph.vertex_str_to_id[tokens[0]]] = graph.vertex_str_to_id[tokens[1]]; // store baseline file per row value to component 132 | } 133 | 134 | for (int i = 0; i < graph.V; i++) 135 | set_root(base_components, i); 136 | 137 | std::vector> componentLists(graph.V); 138 | 139 | // the following operations are the same as the results operations, but work with baseline data 140 | for (int i = 0; i < graph.V; i++) { 141 | componentLists[base_components[i]].push_back(i); 142 | } 143 | 144 | for (int i = 0; i < graph.V; i++) { 145 | if (componentLists[i].size() > 0) 146 | base_res.push_back(componentLists[i]); 147 | } 148 | 149 | for (auto &v : base_res) { 150 | if (!v.size()) { 151 | std::cout << "One of baseline WCC results is empty!" << std::endl; 152 | base_line.close(); 153 | return false; 154 | } 155 | std::sort(v.begin(), v.end()); 156 | } 157 | 158 | std::sort(base_res.begin(), base_res.end(), compare); 159 | 160 | if (size != base_res.size()) { 161 | std::cout << "Baseline file and WCC results are not the same!" << std::endl; 162 | std::cout << "Baseline total component is " << base_res.size() << std::endl; 163 | std::cout << "WCC result total component is " << components.size() << std::endl; 164 | return false; 165 | } 166 | 167 | for (int i = 0; i < size; i++) { // compare each Weakly Connected Component 168 | if (base_res[i].size() != components[i].size()) { // different sizes mean different results and baseline 169 | std::cout << "Baseline file and WCC results are not the same!" << std::endl; 170 | std::cout << "Baseline component size is " << base_res[i].size() << std::endl; 171 | std::cout << "WCC result component size is " << components[i].size() << std::endl; 172 | return false; 173 | } 174 | for (int j = 0; j < base_res[i].size(); j++) { 175 | if (base_res[i][j] != components[i][j]) { // since both baseline and results are ordered, simply compare the elements in order 176 | std::cout << "Baseline file and WCC results are not the same!" << std::endl; 177 | std::cout << "Difference at: " << graph.vertex_id_to_str[base_res[i][j]].first << " " << graph.vertex_id_to_str[components[i][j]].first << std::endl; 178 | base_line.close(); 179 | return false; 180 | } 181 | } 182 | } 183 | 184 | std::cout << "WCC results are correct!" << std::endl; 185 | base_line.close(); 186 | return true; // WCC results are correct, return true 187 | } 188 | 189 | // checker for the SSSP graph operator 190 | // return check results(true or false) that based on graphs, results, and baseline. 191 | bool SSSP_checker(graph_structure& graph, std::vector>& res, std::string base_line_file) { 192 | 193 | int size = res.size(); // get the result size 194 | 195 | if (size != graph.V) { // the result size does not match the graph size 196 | std::cout << "Size of SSSP results is not equal to the number of vertices!" << std::endl; 197 | return false; 198 | } 199 | 200 | std::ifstream base_line(base_line_file); // read the baseline file 201 | 202 | if (!base_line.is_open()) { // failed to open the baseline file 203 | std::cout << "Baseline file not found!" << std::endl; 204 | return false; 205 | } 206 | 207 | std::vector id_res(graph.V, INT_MAX); 208 | 209 | for (auto &p : res) 210 | id_res[graph.vertex_str_to_id[p.first]] = p.second; // convert vertex id of string type to integer and stores in id-res 211 | 212 | int id = 0; 213 | std::string line; 214 | while (std::getline(base_line, line)) { // check each item in the baseline file 215 | std::vector tokens; 216 | tokens = parse_string(line, " "); 217 | if (tokens.size() != 2) { // Baseline file format error 218 | std::cout << "Baseline file format error!" << std::endl; 219 | base_line.close(); 220 | return false; 221 | } 222 | if (id >= size) { // id >= size means that more files are read from baseline than in results 223 | std::cout << "Size of baseline file is larger than the result!" << std::endl; 224 | base_line.close(); 225 | return false; 226 | } 227 | 228 | if (graph.vertex_str_to_id.find(tokens[0]) == graph.vertex_str_to_id.end()) { // the vertex cannot be found in results 229 | std::cout << "Baseline file contains a vertex that is not in the graph!" << std::endl; 230 | base_line.close(); 231 | return false; 232 | } 233 | int v_id = graph.vertex_str_to_id[tokens[0]]; 234 | 235 | if (tokens[1] == "infinity" || tokens[1] == "inf") { // "infinity" in baseline, so check wether the results is max 236 | if (id_res[v_id] != std::numeric_limits::max()) { 237 | std::cout << "Baseline file and SSSP results are not the same!" << std::endl; 238 | std::cout << "Baseline file: " << tokens[0] << " " << tokens[1] << std::endl; 239 | std::cout << "SSSP result: " << graph.vertex_id_to_str[v_id].first << " " << id_res[v_id] << std::endl; 240 | base_line.close(); 241 | return false; 242 | } 243 | } 244 | else if (fabs(id_res[v_id] - std::stod(tokens[1])) > 1e-4) { // set the error range to 1e-4, and answers within the range are considered to be correct 245 | std::cout << "Baseline file and SSSP results are not the same!" << std::endl; 246 | std::cout << "Baseline file: " << tokens[0] << " " << tokens[1] << std::endl; 247 | std::cout << "SSSP result: " << graph.vertex_id_to_str[v_id].first << " " << id_res[v_id] << std::endl; 248 | base_line.close(); 249 | return false; 250 | } 251 | id++; 252 | } 253 | if (id != size) { // id != size means that more reults item than baseline 254 | std::cout << "Size of baseline file is smaller than the result!" << std::endl; 255 | base_line.close(); 256 | return false; 257 | } 258 | 259 | std::cout << "SSSP results are correct!" << std::endl; 260 | base_line.close(); 261 | return true; // SSSP results are correct, return true 262 | } 263 | 264 | // checker for the PageRank graph operator 265 | // return check results(true or false) that based on graphs, results, and baseline. 266 | bool PR_checker(graph_structure& graph, std::vector>& res, std::string base_line_file) { 267 | 268 | int size = res.size(); // get the result size 269 | 270 | std::vector id_res(graph.V, 0); 271 | 272 | for (auto &p : res) 273 | id_res[graph.vertex_str_to_id[p.first]] = p.second; // convert vertex id of string type to integer and stores in id-res 274 | 275 | if (size != graph.V) { // the result size does not match the graph size 276 | std::cout << "Size of PageRank results is not equal to the number of vertices!" << std::endl; 277 | return false; 278 | } 279 | 280 | std::ifstream base_line(base_line_file); 281 | 282 | if (!base_line.is_open()) { // failed to open the baseline file 283 | std::cout << "Baseline file not found!" << std::endl; 284 | return false; 285 | } 286 | 287 | int id = 0; 288 | std::string line; 289 | while (std::getline(base_line, line)) { // check each item in the baseline file 290 | std::vector tokens; 291 | tokens = parse_string(line, " "); 292 | if (tokens.size() != 2) { // Baseline file format error 293 | std::cout << "Baseline file format error!" << std::endl; 294 | base_line.close(); 295 | return false; 296 | } 297 | if (id >= size) { // id >= size means that more files are read from baseline than in results 298 | std::cout << "Size of baseline file is larger than the result!" << std::endl; 299 | base_line.close(); 300 | return false; 301 | } 302 | 303 | if (graph.vertex_str_to_id.find(tokens[0]) == graph.vertex_str_to_id.end()) { // the vertex cannot be found in results 304 | std::cout << "Baseline file contains a vertex that is not in the graph!" << std::endl; 305 | base_line.close(); 306 | return false; 307 | } 308 | int v_id = graph.vertex_str_to_id[tokens[0]]; 309 | 310 | if (fabs(id_res[v_id] - std::stod(tokens[1])) > 1e-2) { // set the error range to 1e-2, and answers within the range are considered to be correct 311 | std::cout << "Baseline file and PageRank results are not the same!" << std::endl; 312 | std::cout << "Baseline file: " << tokens[0] << " " << tokens[1] << std::endl; 313 | std::cout << "PageRank result: " << graph.vertex_id_to_str[v_id].first << " " << id_res[v_id] << std::endl; 314 | base_line.close(); 315 | return false; 316 | } 317 | id++; 318 | } 319 | if (id != size) { // id != size means that more reults item than baseline 320 | std::cout << "Size of baseline file is smaller than the result!" << std::endl; 321 | base_line.close(); 322 | return false; 323 | } 324 | 325 | std::cout << "PageRank results are correct!" << std::endl; 326 | base_line.close(); 327 | return true; // PageRank results are correct, return true 328 | } 329 | 330 | // checker for the PageRank graph operator 331 | // return check results(true or false) that based on graphs, results, and baseline. 332 | bool CDLP_checker(graph_structure& graph, std::vector>& res, std::string base_line_file) { 333 | int size = res.size(); // get the result size 334 | 335 | std::vector id_res; 336 | 337 | for (auto &p : res) 338 | id_res.push_back(p.second); // store the results into id res in order 339 | 340 | if (size != graph.V) { // the result size does not match the graph size 341 | std::cout << "Size of CDLP results is not equal to the number of vertices!" << std::endl; 342 | return false; 343 | } 344 | 345 | std::ifstream base_line(base_line_file); 346 | 347 | if (!base_line.is_open()) { // failed to open the baseline file 348 | std::cout << "Baseline file not found!" << std::endl; 349 | return false; 350 | } 351 | 352 | int id = 0; 353 | std::string line; 354 | while (std::getline(base_line, line)) { // check each item in the baseline file 355 | std::vector tokens; 356 | tokens = parse_string(line, " "); 357 | if (tokens.size() != 2) { // Baseline file format error 358 | std::cout << "Baseline file format error!" << std::endl; 359 | base_line.close(); 360 | return false; 361 | } 362 | if (id >= size) { // id >= size means that more files are read from baseline than in results 363 | std::cout << "Size of baseline file is larger than the result!" << std::endl; 364 | base_line.close(); 365 | return false; 366 | } 367 | 368 | if (graph.vertex_str_to_id.find(tokens[0]) == graph.vertex_str_to_id.end()) { // the vertex cannot be found in results 369 | std::cout << "Baseline file contains a vertex that is not in the graph!" << std::endl; 370 | base_line.close(); 371 | return false; 372 | } 373 | int v_id = graph.vertex_str_to_id[tokens[0]]; 374 | 375 | if (id_res[v_id] != tokens[1]) { // the results are different from the baseline 376 | std::cout << "Baseline file and CDLP results are not the same!" << std::endl; 377 | std::cout << "Baseline file: " << tokens[0] << " " << tokens[1] << std::endl; 378 | std::cout << "CDLP result: " << id_res[v_id] << std::endl; 379 | base_line.close(); 380 | return false; 381 | } 382 | id++; 383 | } 384 | if (id != size) { // id != size means that more reults item than baseline 385 | std::cout << "Size of baseline file is smaller than the result!" << std::endl; 386 | base_line.close(); 387 | return false; 388 | } 389 | 390 | std::cout << "CDLP results are correct!" << std::endl; 391 | base_line.close(); 392 | return true; // CDLP results are correct, return true 393 | } 394 | --------------------------------------------------------------------------------