├── include ├── CPU_adj_list │ ├── parse_string.hpp │ ├── algorithm │ │ ├── CPU_BFS.hpp │ │ ├── CPU_shortest_paths.hpp │ │ ├── CPU_PageRank.hpp │ │ ├── CPU_sssp_pre.hpp │ │ ├── CPU_Community_Detection.hpp │ │ └── CPU_connected_components.hpp │ ├── binary_save_read_vector_of_vectors.hpp │ ├── ThreadPool.h │ ├── sorted_vector_binary_operations.hpp │ └── CPU_adj_list.hpp ├── GPU_csr │ ├── GPU_csr.hpp │ └── algorithm │ │ ├── GPU_connected_components.cuh │ │ ├── GPU_BFS.cuh │ │ ├── GPU_sssp_pre.cuh │ │ ├── GPU_shortest_paths.cuh │ │ ├── GPU_Community_Detection.cuh │ │ └── GPU_PageRank.cuh └── LDBC │ ├── ldbc.hpp │ └── checker.hpp ├── CMakeLists.txt ├── src ├── GPU_csr │ └── GPU_example.cu ├── CPU_adj_list │ └── CPU_example.cpp └── LDBC │ ├── LDBC_CPU_adj_list.cpp │ └── LDBC_GPU_csr.cu ├── README.md └── LICENSE.txt /include/CPU_adj_list/parse_string.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | inline std::vector parse_string(std::string parse_target, std::string delimiter); 6 | 7 | inline std::vector parse_string(std::string parse_target, std::string delimiter) { 8 | 9 | std::vector Parsed_content; 10 | size_t pos = 0; 11 | std::string token; 12 | while ((pos = parse_target.find(delimiter)) != std::string::npos) { 13 | // find(const string& str, size_t pos = 0) function returns the position of the first occurrence of str in the string, or npos if the string is not found. 14 | token = parse_target.substr(0, pos); 15 | // The substr(size_t pos = 0, size_t n = npos) function returns a substring of the object, starting at position pos and of length npos 16 | Parsed_content.push_back(token); // store the subtr to the list 17 | parse_target.erase(0, pos + delimiter.length()); // remove the front substr and the first delimiter 18 | } 19 | Parsed_content.push_back(parse_target); // store the subtr to the list 20 | 21 | return Parsed_content; 22 | 23 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_BFS.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | template // T is float or double 10 | std::vector CPU_BFS(std::vector>> &input_graph, int root = 0, int min_depth = 0, int max_depth = INT_MAX) 11 | { 12 | /* The CPU code for breadth first search uses queues to traverse the graph and record depth, 13 | which is also used to prevent duplicate traversal */ 14 | int N = input_graph.size(); 15 | 16 | std::vector depth(N, INT_MAX); 17 | depth[root] = 0; 18 | std::vector searched_vertices; // use to prevent duplicate traversal while recording depth 19 | 20 | std::queue Q; // Queue is a data structure designed to operate in FIFO (First in First out) context. 21 | Q.push(root); 22 | while (Q.size() > 0) 23 | { 24 | int v = Q.front(); 25 | if (depth[v] >= min_depth && depth[v] <= max_depth) 26 | searched_vertices.push_back(v); 27 | 28 | Q.pop(); // Removing that vertex from queue,whose neighbour will be visited now 29 | 30 | if (depth[v] + 1 <= max_depth) 31 | { 32 | // Traversing node v in the graph yields a pair value, adjfirst being the adjacency point 33 | for (auto &adj : input_graph[v]) 34 | { /*processing all the neighbours of v*/ 35 | if (depth[adj.first] > depth[v] + 1) 36 | { 37 | // If the depth of adjacent points is greater, add them to the queue. Otherwise, it means that the adjacent points have already been traversed before 38 | depth[adj.first] = depth[v] + 1; 39 | Q.push(adj.first); 40 | } 41 | } 42 | } 43 | } 44 | 45 | return depth; 46 | } 47 | 48 | std::vector> CPU_Bfs(graph_structure &graph, std::string src_v, int min_depth = 0, int max_depth = INT_MAX) 49 | { 50 | std::vector depth = CPU_BFS(graph.OUTs, graph.vertex_str_to_id[src_v], min_depth, max_depth); 51 | return graph.res_trans_id_val(depth); 52 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_shortest_paths.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | using namespace std; 11 | 12 | struct node { 13 | double dis;//distance from source vertex 14 | int u;//indicates vertex 15 | 16 | bool operator>(const node& a) const { return dis > a.dis; }//operator overload 17 | }; 18 | 19 | std::vector CPU_shortest_paths(std::vector>>& input_graph, int source) { 20 | //dijkstras-shortest-path-algorithm 21 | 22 | double inf = std::numeric_limits::max(); 23 | 24 | int N = input_graph.size(); 25 | 26 | std::vector distances; 27 | distances.resize(N, inf); // initial distance from source is inf 28 | 29 | if (source < 0 || source >= N) { 30 | std::cout << "Invalid source vertex" << std::endl;//Abnormal input judgment 31 | return distances; 32 | } 33 | 34 | distances[source] = 0;//Starting distance is 0 35 | std::vector vis(N, 0); 36 | 37 | std::priority_queue, greater > Q;//Using Heap Optimization Algorithm 38 | Q.push({0, source}); 39 | 40 | while (Q.size() > 0) { 41 | 42 | int u = Q.top().u; 43 | 44 | Q.pop();//remove vertex visited this round 45 | 46 | if (vis[u]) continue;//if vertex has already been visited,it shouldn't be pushed to queue again. 47 | vis[u] = 1;//mark 48 | 49 | for (auto edge : input_graph[u]) { 50 | //Traverse all adjacent vertexs of a vertex 51 | int v = edge.first;//vertex pointed by edge 52 | double w = edge.second;//weight of edge 53 | //use v to update path cost 54 | if (distances[v] > distances[u] + w) { 55 | //If the path cost is smaller, update the new path cost 56 | distances[v] = distances[u] + w; 57 | Q.push({distances[v], v});//add new vertex to queue 58 | } 59 | } 60 | 61 | } 62 | 63 | return distances; 64 | } 65 | 66 | std::vector> CPU_SSSP(graph_structure& graph, std::string src_v) { 67 | std::vector ssspVec = CPU_shortest_paths(graph.OUTs, graph.vertex_str_to_id[src_v]); 68 | return graph.res_trans_id_val(ssspVec); 69 | } 70 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | project(Test LANGUAGES CXX) 3 | 4 | option(BUILD_CPU "Build the CPU version" ON) 5 | option(BUILD_GPU "Build the GPU version" OFF) 6 | 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -O3") 10 | 11 | if(BUILD_CPU) 12 | set(EXECUTABLE_NAME_CPU "${PROJECT_NAME}_CPU") 13 | set(CPU_example "CPU_example") 14 | set(EXECUTABLE_OUTPUT_PATH_CPU ${PROJECT_SOURCE_DIR}/build/bin_cpu) 15 | 16 | add_executable(${EXECUTABLE_NAME_CPU} src/LDBC/LDBC_CPU_adj_list.cpp) 17 | add_executable(${CPU_example} src/CPU_adj_list/CPU_example.cpp) 18 | 19 | target_include_directories(${EXECUTABLE_NAME_CPU} PUBLIC include) 20 | target_include_directories(${CPU_example} PUBLIC include) 21 | 22 | set_target_properties(${EXECUTABLE_NAME_CPU} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_CPU}) 23 | set_target_properties(${CPU_example} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_CPU}) 24 | endif() 25 | 26 | if(BUILD_GPU) 27 | enable_language(CUDA) 28 | find_package(CUDA REQUIRED) 29 | 30 | set(EXECUTABLE_NAME_GPU "${PROJECT_NAME}_GPU") 31 | set(GPU_example "GPU_example") 32 | set(EXECUTABLE_OUTPUT_PATH_GPU ${PROJECT_SOURCE_DIR}/build/bin_gpu) 33 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3") 34 | 35 | add_executable(${EXECUTABLE_NAME_GPU} src/LDBC/LDBC_GPU_csr.cu) 36 | add_executable(${GPU_example} src/GPU_csr/GPU_example.cu) 37 | 38 | target_include_directories(${EXECUTABLE_NAME_GPU} PUBLIC ${CUDA_INCLUDE_DIRS}) 39 | target_include_directories(${GPU_example} PUBLIC ${CUDA_INCLUDE_DIRS}) 40 | 41 | target_link_libraries(${EXECUTABLE_NAME_GPU} ${CUDA_LIBRARIES}) 42 | target_link_libraries(${GPU_example} ${CUDA_LIBRARIES}) 43 | 44 | target_include_directories(${EXECUTABLE_NAME_GPU} PUBLIC include) 45 | target_include_directories(${GPU_example} PUBLIC include) 46 | 47 | set_target_properties(${EXECUTABLE_NAME_GPU} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_GPU}) 48 | set_target_properties(${EXECUTABLE_NAME_GPU} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 49 | set_target_properties(${GPU_example} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH_GPU}) 50 | set_target_properties(${GPU_example} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) 51 | endif() -------------------------------------------------------------------------------- /include/CPU_adj_list/binary_save_read_vector_of_vectors.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | template 8 | void binary_save_vector_of_vectors(std::string path, const std::vector >& myVector); 9 | 10 | template 11 | void binary_read_vector_of_vectors(std::string path, std::vector>& myVector); 12 | 13 | template 14 | void binary_save_vector_of_vectors(std::string path, const std::vector >& myVector) 15 | { 16 | std::ofstream FILE(path, std::ios::out | std::ofstream::binary); 17 | 18 | // Store size of the outer vector 19 | int s1 = myVector.size(); 20 | FILE.write(reinterpret_cast(&s1), sizeof(s1)); 21 | 22 | // Now write each vector one by one 23 | for (auto& v : myVector) { 24 | // Store its size 25 | int size = v.size(); 26 | FILE.write(reinterpret_cast(&size), sizeof(size)); 27 | if (size == 0) 28 | { 29 | continue; 30 | } 31 | // Store its contents 32 | FILE.write(reinterpret_cast(&v[0]), v.size() * sizeof(T)); 33 | } 34 | FILE.close(); 35 | } 36 | 37 | template 38 | void binary_read_vector_of_vectors(std::string path, std::vector>& myVector) 39 | { 40 | std::vector>().swap(myVector); 41 | 42 | std::ifstream FILE(path, std::ios::in | std::ifstream::binary); 43 | 44 | int size = 0; 45 | FILE.read(reinterpret_cast(&size), sizeof(size)); 46 | if (!FILE) 47 | { 48 | std::cout << "Unable to open file " << path << std::endl << "Please check the file location or file name." << std::endl; // throw an error message 49 | exit(1); // end the program 50 | } 51 | myVector.resize(size); 52 | for (int n = 0; n < size; ++n) { 53 | int size2 = 0; 54 | FILE.read(reinterpret_cast(&size2), sizeof(size2)); 55 | T f; 56 | for (int k = 0; k < size2; ++k) { 57 | FILE.read(reinterpret_cast(&f), sizeof(f)); 58 | myVector[n].push_back(f); 59 | } 60 | std::vector(myVector[n]).swap(myVector[n]); 61 | } 62 | std::vector>(myVector).swap(myVector); 63 | } 64 | 65 | 66 | /* 67 | ---------an example main file------------- 68 | #include 69 | 70 | int main() 71 | { 72 | ; 73 | } 74 | ------------------- 75 | */ 76 | -------------------------------------------------------------------------------- /include/CPU_adj_list/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class ThreadPool { 15 | public: 16 | ThreadPool(size_t); 17 | template 18 | auto enqueue(F&& f, Args&&... args) 19 | -> std::future::type>; 20 | ~ThreadPool(); 21 | private: 22 | // need to keep track of threads so we can join them 23 | std::vector< std::thread > workers; 24 | // the task queue 25 | std::queue< std::function > tasks; 26 | 27 | // synchronization 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | // the constructor just launches some amount of workers 34 | inline ThreadPool::ThreadPool(size_t threads) 35 | : stop(false) 36 | { 37 | for(size_t i = 0;i task; 44 | 45 | { 46 | std::unique_lock lock(this->queue_mutex); 47 | this->condition.wait(lock, 48 | [this]{ return this->stop || !this->tasks.empty(); }); 49 | if(this->stop && this->tasks.empty()) 50 | return; 51 | task = std::move(this->tasks.front()); 52 | this->tasks.pop(); 53 | } 54 | 55 | task(); 56 | } 57 | } 58 | ); 59 | } 60 | 61 | // add new work item to the pool 62 | template 63 | auto ThreadPool::enqueue(F&& f, Args&&... args) 64 | -> std::future::type> 65 | { 66 | using return_type = typename std::result_of::type; 67 | 68 | auto task = std::make_shared< std::packaged_task >( 69 | std::bind(std::forward(f), std::forward(args)...) 70 | ); 71 | 72 | std::future res = task->get_future(); 73 | { 74 | std::unique_lock lock(queue_mutex); 75 | 76 | // don't allow enqueueing after stopping the pool 77 | if(stop) 78 | throw std::runtime_error("enqueue on stopped ThreadPool"); 79 | 80 | tasks.emplace([task](){ (*task)(); }); 81 | } 82 | condition.notify_one(); 83 | return res; 84 | } 85 | 86 | // the destructor joins all threads 87 | inline ThreadPool::~ThreadPool() 88 | { 89 | { 90 | std::unique_lock lock(queue_mutex); 91 | stop = true; 92 | } 93 | condition.notify_all(); 94 | for(std::thread &worker: workers) 95 | worker.join(); 96 | } 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /src/GPU_csr/GPU_example.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() { 11 | ios::sync_with_stdio(false); 12 | std::cin.tie(0), std::cout.tie(0); 13 | 14 | graph_structure graph; // directed graph 15 | 16 | // Add vertices and edges 17 | graph.add_vertice("one"); 18 | graph.add_vertice("two"); 19 | graph.add_vertice("three"); 20 | graph.add_vertice("four"); 21 | graph.add_vertice("five"); 22 | graph.add_vertice("R"); 23 | 24 | graph.add_edge("one", "two", 0.8); 25 | graph.add_edge("two", "three", 1); 26 | graph.add_edge("two", "R", 1); 27 | graph.add_edge("two", "four", 0.1); 28 | graph.add_edge("R", "three", 1); 29 | graph.add_edge("one", "three", 1); 30 | graph.add_edge("one", "four", 1); 31 | graph.add_edge("four", "three", 1); 32 | graph.add_edge("four", "five", 1); 33 | 34 | // Remove a vertex 35 | graph.remove_vertice("R"); 36 | 37 | // Add a vertex 38 | graph.add_vertice("six"); 39 | 40 | // Remove an edge 41 | graph.remove_edge("two", "four"); 42 | 43 | // Add an edge 44 | graph.add_edge("one", "six", 1); 45 | 46 | // Transform to CSR 47 | CSR_graph csr_graph = toCSR(graph); 48 | 49 | // BFS 50 | std::cout << "Running BFS..." << std::endl; 51 | std::vector> gpu_bfs_res = Cuda_Bfs(graph, csr_graph, "one"); 52 | std::cout << "BFS result: " << std::endl; 53 | for (auto& res : gpu_bfs_res) 54 | std::cout << res.first << " " << res.second << std::endl; 55 | 56 | // Connected Components 57 | std::cout << "Running Connected Components..." << std::endl; 58 | std::vector> gpu_wcc_res = Cuda_WCC(graph, csr_graph); 59 | std::cout << "Connected Components result: " << std::endl; 60 | for (auto& res : gpu_wcc_res) 61 | std::cout << res.first << " " << res.second << std::endl; 62 | 63 | // SSSP 64 | std::cout << "Running SSSP..." << std::endl; 65 | std::vector> gpu_sssp_res = Cuda_SSSP(graph, csr_graph, "one"); 66 | std::cout << "SSSP result: " << std::endl; 67 | for (auto& res : gpu_sssp_res) 68 | std::cout << res.first << " " << res.second << std::endl; 69 | 70 | // PageRank 71 | std::cout << "Running PageRank..." << std::endl; 72 | std::vector> gpu_pr_res = Cuda_PR(graph, csr_graph, 10, 0.85); 73 | std::cout << "PageRank result: " << std::endl; 74 | for (auto& res : gpu_pr_res) 75 | std::cout << res.first << " " << res.second << std::endl; 76 | 77 | // Community Detection 78 | std::cout << "Running Community Detection..." << std::endl; 79 | std::vector> gpu_cd_res = Cuda_CDLP(graph, csr_graph, 10); 80 | std::cout << "Community Detection result: " << std::endl; 81 | for (auto& res : gpu_cd_res) 82 | std::cout << res.first << " " << res.second << std::endl; 83 | 84 | return 0; 85 | } -------------------------------------------------------------------------------- /src/CPU_adj_list/CPU_example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | int main() 11 | { 12 | ios::sync_with_stdio(false); 13 | std::cin.tie(0), std::cout.tie(0); 14 | 15 | graph_structure graph; // directed graph 16 | 17 | // Add vertices and edges 18 | graph.add_vertice("one"); 19 | graph.add_vertice("two"); 20 | graph.add_vertice("three"); 21 | graph.add_vertice("four"); 22 | graph.add_vertice("five"); 23 | graph.add_vertice("R"); 24 | 25 | graph.add_edge("one", "two", 0.8); 26 | graph.add_edge("two", "three", 1); 27 | graph.add_edge("two", "R", 1); 28 | graph.add_edge("two", "four", 0.1); 29 | graph.add_edge("R", "three", 1); 30 | graph.add_edge("one", "three", 1); 31 | graph.add_edge("one", "four", 1); 32 | graph.add_edge("four", "three", 1); 33 | graph.add_edge("four", "five", 1); 34 | 35 | // Remove a vertex 36 | graph.remove_vertice("R"); 37 | 38 | // Add a vertex 39 | graph.add_vertice("six"); 40 | 41 | // Remove an edge 42 | graph.remove_edge("two", "four"); 43 | 44 | // Add an edge 45 | graph.add_edge("one", "six", 1); 46 | 47 | // BFS 48 | std::cout << "Running BFS..." << std::endl; 49 | std::vector> cpu_bfs_result = CPU_Bfs(graph, "one"); 50 | std::cout << "BFS result: " << std::endl; 51 | for (auto& res : cpu_bfs_result) 52 | std::cout << res.first << " " << res.second << std::endl; 53 | 54 | // Connected Components 55 | std::cout << "Running Connected Components..." << std::endl; 56 | std::vector> cpu_connected_components_result = CPU_WCC(graph); 57 | std::cout << "Connected Components result: " << std::endl; 58 | for (auto& res : cpu_connected_components_result) 59 | std::cout << res.first << " " << res.second << std::endl; 60 | 61 | // SSSP 62 | std::cout << "Running SSSP..." << std::endl; 63 | std::vector> cpu_sssp_result = CPU_SSSP(graph, "one"); 64 | std::cout << "SSSP result: " << std::endl; 65 | for (auto& res : cpu_sssp_result) 66 | std::cout << res.first << " " << res.second << std::endl; 67 | 68 | // PageRank 69 | std::cout << "Running PageRank..." << std::endl; 70 | std::vector> cpu_pagerank_result = CPU_PR(graph, 10, 0.85); 71 | std::cout << "PageRank result: " << std::endl; 72 | for (auto& res : cpu_pagerank_result) 73 | std::cout << res.first << " " << res.second << std::endl; 74 | 75 | // Community Detection 76 | std::cout << "Running Community Detection..." << std::endl; 77 | std::vector> cpu_community_detection_result = CPU_CDLP(graph, 10); 78 | std::cout << "Community Detection result: " << std::endl; 79 | for (auto& res : cpu_community_detection_result) 80 | std::cout << res.first << " " << res.second << std::endl; 81 | 82 | return 0; 83 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_PageRank.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // PageRank Algorithm 9 | // call this function like: ans_cpu = CDLP(graph.INs, graph.OUTs, damp, graph.cdlp_max_its); 10 | // used to show the relevance and importance of vertices in the graph 11 | // return the pagerank of each vertex based on the graph, damping factor and number of iterations. 12 | std::vector PageRank (std::vector>>& in_edge, 13 | std::vector>>& out_edge, double damp, int iters) { 14 | 15 | int N = in_edge.size(); // number of vertices in the graph 16 | 17 | std::vector rank(N, 1 / N); // The initial pagerank of each vertex is 1/|V| 18 | std::vector new_rank(N); // temporarily stores the updated pagerank 19 | 20 | double d = damp; // damping factor 21 | double teleport = (1 - damp) / N; // teleport mechanism 22 | 23 | std::vector sink; // the set of sink vertices 24 | for (int i = 0; i < N; i++) 25 | { 26 | if (out_edge[i].size() == 0) 27 | sink.push_back(i); // record the sink vertices 28 | } 29 | 30 | for (int i = 0; i < iters; i++) { // continue for a fixed number of iterations 31 | double sink_sum = 0; 32 | for (int i = 0; i < sink.size(); i++) // If the out-degree of the vertex is zero, it is a sink node 33 | { 34 | sink_sum += rank[sink[i]]; // calculate the sinksum, which is the sum of the pagerank value of all sink vertices 35 | } 36 | 37 | double x = sink_sum * d / N + teleport; // sum of sinksum and teleport 38 | 39 | ThreadPool pool_dynamic(100); 40 | std::vector> results_dynamic; 41 | for (int q = 0; q < 100; q++) 42 | { 43 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &rank, &out_edge, &new_rank, &x] 44 | { 45 | int start = (long long)q * N / 100, end = std::min((long long)N - 1, (long long)(q + 1) * N / 100); 46 | for (int i = start; i <= end; i++) { 47 | rank[i] /= out_edge[i].size(); 48 | new_rank[i] = x; // record redistributed from sinks and teleport value 49 | } 50 | 51 | return 1; })); 52 | } 53 | for (auto&& result : results_dynamic) 54 | { 55 | result.get(); 56 | } 57 | std::vector>().swap(results_dynamic); 58 | 59 | for (int q = 0; q < 100; q++) 60 | { 61 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &in_edge, &rank, &new_rank, &d] 62 | { 63 | int start = (long long)q * N / 100, end = std::min((long long)N - 1, (long long)(q + 1) * N / 100); 64 | for (int v = start; v <= end; v++) { 65 | double tmp = 0; // sum the rank and then multiply damping to improve running efficiency 66 | for (auto& y : in_edge[v]) { 67 | tmp = tmp + rank[y.first]; // calculate the importance value for each vertex 68 | } 69 | new_rank[v] += d * tmp; 70 | } 71 | return 1; })); 72 | } 73 | for (auto&& result : results_dynamic) 74 | { 75 | result.get(); 76 | } 77 | std::vector>().swap(results_dynamic); 78 | 79 | 80 | rank.swap(new_rank); // store the updated pagerank in the rank 81 | } 82 | return rank; // return the pagerank of each vertex 83 | } 84 | 85 | // PageRank Algorithm 86 | // return the pagerank of each vertex based on the graph, damping factor and number of iterations. 87 | // the type of the vertex and pagerank are string 88 | std::vector> CPU_PR (graph_structure& graph, int iterations, double damping) { 89 | std::vector prValueVec = PageRank(graph.INs, graph.OUTs, damping, iterations); // get the pagerank in double type 90 | return graph.res_trans_id_val(prValueVec); 91 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_sssp_pre.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | std::vector CPU_shortest_paths_pre(std::vector>>& input_graph, int source, std::vector& pre_v) { 13 | //dijkstras-shortest-path-algorithm 14 | 15 | double inf = std::numeric_limits::max(); 16 | 17 | int N = input_graph.size(); 18 | 19 | std::vector distances; 20 | distances.resize(N, inf); // initial distance from source is inf 21 | 22 | if (source < 0 || source >= N) { 23 | std::cout << "Invalid source vertex" << std::endl;//Abnormal input judgment 24 | return distances; 25 | } 26 | 27 | distances[source] = 0;//Starting distance is 0 28 | std::vector vis(N, 0); 29 | 30 | std::priority_queue, greater > Q;//Using Heap Optimization Algorithm 31 | Q.push({0, source}); 32 | 33 | while (Q.size() > 0) { 34 | 35 | int u = Q.top().u; 36 | 37 | Q.pop();//remove vertex visited this round 38 | 39 | if (vis[u]) continue;//if vertex has already been visited,it shouldn't be pushed to queue again. 40 | vis[u] = 1;//mark 41 | 42 | for (auto edge : input_graph[u]) { 43 | //Traverse all adjacent vertexs of a vertex 44 | int v = edge.first;//vertex pointed by edge 45 | double w = edge.second;//weight of edge 46 | //use v to update path cost 47 | if (distances[v] > distances[u] + w) { 48 | //If the path cost is smaller, update the new path cost 49 | distances[v] = distances[u] + w; 50 | pre_v[v] = u; 51 | Q.push({distances[v], v});//add new vertex to queue 52 | } 53 | } 54 | 55 | } 56 | 57 | return distances; 58 | } 59 | 60 | std::vector> CPU_SSSP_pre(graph_structure& graph, std::string src_v, std::vector& pre_v) { 61 | pre_v.resize(graph.V, -1); 62 | std::vector ssspVec = CPU_shortest_paths_pre(graph.OUTs, graph.vertex_str_to_id[src_v], pre_v); 63 | 64 | // check the pre_v 65 | /*for (int i = 0; i < graph.V; i++) { 66 | double dis = ssspVec[i]; 67 | int pre = pre_v[i]; 68 | int now = i; 69 | double sum = 0; 70 | while (now != graph.vertex_str_to_id[src_v]) { 71 | bool ff = false; 72 | //std::cout << "pre: " << pre << " now: " << now << std::endl; 73 | for (auto edge : graph.OUTs[pre]) { 74 | //std::cout << "there is an edge from " << pre << " to " << edge.first << " with weight " << edge.second << std::endl; 75 | if (edge.first == now) { 76 | sum += edge.second; 77 | now = pre; 78 | pre = pre_v[pre]; 79 | ff = true; 80 | break; 81 | } 82 | } 83 | if (!ff) { 84 | std::cout << "Not found!" << std::endl; 85 | break; 86 | } 87 | } 88 | if (fabs(dis - sum) > 1e-4) { 89 | std::cout << "Error: pre_v is wrong!" << std::endl; 90 | std::cout << "dis: " << dis << " sum: " << sum << std::endl; 91 | } 92 | }*/ 93 | 94 | return graph.res_trans_id_val(ssspVec); 95 | } 96 | 97 | std::vector> path_query(graph_structure& graph, std::string src_v, std::string dst_v, std::vector& pre_v) { 98 | int dst_id = graph.vertex_str_to_id[dst_v]; 99 | int src_id = graph.vertex_str_to_id[src_v]; 100 | 101 | std::vector> path; 102 | 103 | if (src_id < 0 || src_id >= graph.V || dst_id < 0 || dst_id >= graph.V) { 104 | std::cout << "Invalid source or destination vertex" << std::endl; 105 | return path; 106 | } 107 | 108 | if (pre_v[dst_id] == -1) { 109 | std::cout << "No path from " << src_v << " to " << dst_v << std::endl; 110 | return path; 111 | } 112 | 113 | int now = dst_id; 114 | while (now != src_id) { 115 | path.push_back(std::make_pair(graph.vertex_id_to_str[pre_v[now]].first, graph.vertex_id_to_str[now].first)); 116 | if (pre_v[now] == -1 && now != src_id) { 117 | std::cout << "Error: pre_v is wrong!" << std::endl; 118 | break; 119 | } 120 | now = pre_v[now]; 121 | } 122 | 123 | std::reverse(path.begin(), path.end()); 124 | 125 | return path; 126 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_Community_Detection.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Community Detection Using Label Propagation 10 | // call this function like:ans_cpu = CDLP(graph.INs, graph.OUTs, graph.vertex_id_to_str, graph.cdlp_max_its); 11 | // Returns label of the graph based on the graph and number of iterations. 12 | std::vector CDLP(graph_structure& graph, int iters) 13 | { 14 | auto& in_edges = graph.INs; // incoming edges of each vertex in the graph 15 | auto& out_edges = graph.OUTs; // outgoing edges of each vertex in the graph 16 | 17 | int N = in_edges.size(); // number of vertices in the graph 18 | std::vector label(N); // record the label of the vertex 19 | std::iota(std::begin(label), std::end(label), 0); 20 | std::vector new_label(N); // temporarily stores the updated label 21 | 22 | ThreadPool pool_dynamic(100); 23 | std::vector> results_dynamic; 24 | 25 | for (int k = 0; k < iters; k++) // continue for a fixed number of iterations 26 | { 27 | for (int q = 0; q < 100; q++) 28 | { 29 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &in_edges, &out_edges, &label, &new_label] 30 | { 31 | int start = (long long)q * N / 100, end = std::min((long long)N - 1, (long long)(q + 1) * N / 100); 32 | for (int i = start; i <= end; i++) { 33 | 34 | std::unordered_map count; // record the label information of the neighbor vertex. the first keyword is the label and the second keyword is the number of occurrences 35 | for (auto& x : in_edges[i]) // traverse the incoming edges of vertex i 36 | { 37 | count[label[x.first]]++; // count the number of label occurrences of the neighbor vertices 38 | } 39 | for (auto& x : out_edges[i]) // traverse the outcoming edges of vertex i 40 | { 41 | count[label[x.first]]++; // count the number of label occurrences of the neighbor vertices 42 | } 43 | int maxcount = 0; // the maximum number of maxlabel occurrences, the initial value is set to 0, which means that all existing labels can be recorded 44 | int maxlabel = label[i]; // consider the possibility of isolated points, the initial label is label[i] instead of 0 45 | for (std::pair p : count) // traversal the label statistics protector of the neighbor node 46 | { 47 | if (p.second > maxcount) // the number of label occurrences currently traversed is greater than the recorded value 48 | { 49 | maxcount = p.second; // update the label 50 | maxlabel = p.first; 51 | } 52 | else if (p.second == maxcount) // take a smaller value when the number of label occurrences is the same 53 | { 54 | maxlabel = std::min(p.first, maxlabel); 55 | } 56 | } 57 | 58 | new_label[i] = maxlabel; // record the maxlabel 59 | 60 | } 61 | return 1; })); 62 | } 63 | for (auto&& result : results_dynamic) 64 | { 65 | result.get(); 66 | } 67 | std::vector>().swap(results_dynamic); // clear results dynamic 68 | 69 | std::swap(new_label, label); // store labels of type string 70 | } 71 | 72 | std::vectorres(N); 73 | for (int i = 0; i < N; i++) 74 | { 75 | res[i] = graph.vertex_id_to_str[label[i]].first; // convert the label to string and store it in res 76 | } 77 | 78 | return res; 79 | } 80 | 81 | // Community Detection Using Label Propagation 82 | // Returns label of the graph based on the graph and number of iterations. 83 | // the type of the vertex and label are string 84 | std::vector> CPU_CDLP(graph_structure& graph, int iterations) 85 | { 86 | std::vector cdlpVec = CDLP(graph, iterations); // get the labels of each vertex. vector index is the id of vertex 87 | 88 | std::vector> res; // store results, the first value in pair records the vertex id, and the second value records the label 89 | int size = cdlpVec.size(); 90 | for (int i = 0; i < size; i++) 91 | res.push_back(std::make_pair(graph.vertex_id_to_str[i].first, cdlpVec[i])); // for each vertex, get its string number and store it in res 92 | 93 | return res; // return the results 94 | } -------------------------------------------------------------------------------- /include/CPU_adj_list/algorithm/CPU_connected_components.hpp: -------------------------------------------------------------------------------- 1 | // #pragma once 2 | 3 | // #include 4 | // #include 5 | // #include 6 | 7 | // template // T is float or double 8 | // std::vector CPU_connected_components(std::vector>>& input_graph, std::vector>>& output_graph) { 9 | // //Using BFS method to find connectivity vectors starting from each node 10 | // /*this is to find connected_components using breadth first search; time complexity O(|V|+|E|); 11 | // related content: https://www.boost.org/doc/libs/1_68_0/boost/graph/connected_components.hpp 12 | // https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm*/ 13 | 14 | // std::vector parent; 15 | 16 | // /*time complexity: O(V)*/ 17 | // int N = input_graph.size(); 18 | // std::vector discovered(N, false); 19 | // parent.resize(N); 20 | // //Vector initialization 21 | // for (int i = 0; i < N; i++) { 22 | 23 | // if (discovered[i] == false) { 24 | // //If the node has not yet been added to the connected component, search for the connected component starting from the node 25 | // /*below is a depth first search; time complexity O(|V|+|E|)*/ 26 | // std::queue Q; // Queue is a data structure designed to operate in FIFO (First in First out) context. 27 | // Q.push(i); 28 | // parent[i] = i; 29 | // discovered[i] = true; 30 | // while (Q.size() > 0) { 31 | // int v = Q.front(); 32 | // Q.pop(); //Removing that vertex from queue,whose neighbour will be visited now 33 | 34 | // for (auto& x : input_graph[v]) { 35 | // int adj_v = x.first; 36 | // if (discovered[adj_v] == false) { 37 | // Q.push(adj_v); 38 | // parent[adj_v] = parent[v]; 39 | // discovered[adj_v] = true; 40 | // } 41 | // } 42 | // for (auto& x : output_graph[v]) { 43 | // int adj_v = x.first; 44 | // if (discovered[adj_v] == false) { 45 | // Q.push(adj_v); 46 | // parent[adj_v] = parent[v]; 47 | // discovered[adj_v] = true; 48 | // } 49 | // } 50 | // } 51 | // } 52 | // } 53 | // return parent; 54 | // } 55 | 56 | // std::vector> CPU_WCC(graph_structure & graph){ 57 | // std::vector wccVec = CPU_connected_components(graph.OUTs, graph.INs); 58 | // return graph.res_trans_id_id(wccVec); 59 | // } 60 | 61 | #pragma once 62 | 63 | #include 64 | #include 65 | #include 66 | #include 67 | 68 | template // T is float or double 69 | std::vector CPU_connected_components(std::vector>>& input_graph, std::vector>>& output_graph) { 70 | //Using BFS method to find connectivity vectors starting from each node 71 | /*this is to find connected_components using breadth first search; time complexity O(|V|+|E|); 72 | related content: https://www.boost.org/doc/libs/1_68_0/boost/graph/connected_components.hpp 73 | https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm*/ 74 | 75 | /*time complexity: O(V)*/ 76 | 77 | int N = input_graph.size(); 78 | 79 | std::vector component; 80 | component.resize(N); 81 | for (int u = 0; u < N; u++) { 82 | component[u] = u; 83 | } 84 | 85 | int change = true; 86 | while (change) { 87 | change = false; 88 | 89 | ThreadPool pool_dynamic(100); 90 | std::vector> results_dynamic; 91 | for (long long q = 0; q < 100; q++) { 92 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &change, &input_graph, &component] 93 | { 94 | int start = q * N / 100, end = std::min(N - 1, (int)((q + 1) * N / 100)); 95 | for (int u = start; u <= end; u++) { 96 | for (auto& x : input_graph[u]) { 97 | int v = x.first; 98 | int comp_u = component[u]; 99 | int comp_v = component[v]; 100 | if (comp_u == comp_v) continue; 101 | int high_comp = comp_u > comp_v ? comp_u : comp_v; 102 | int low_comp = comp_u + (comp_v - high_comp); 103 | if (high_comp == component[high_comp]) { 104 | change = true; 105 | component[high_comp] = low_comp; 106 | } 107 | } 108 | } 109 | return 1; })); 110 | } 111 | for (auto&& result : results_dynamic) 112 | { 113 | result.get(); 114 | } 115 | std::vector>().swap(results_dynamic); 116 | 117 | for (long long q = 0; q < 100; q++) { 118 | results_dynamic.emplace_back(pool_dynamic.enqueue([q, N, &component] 119 | { 120 | int start = q * N / 100, end = std::min(N - 1, (int)((q + 1) * N / 100)); 121 | for (int u = start; u <= end; u++) { 122 | while (component[u] != component[component[u]]) { 123 | component[u] = component[component[u]]; 124 | } 125 | } 126 | return 1; })); 127 | } 128 | for (auto&& result : results_dynamic) 129 | { 130 | result.get(); 131 | } 132 | std::vector>().swap(results_dynamic); 133 | } 134 | 135 | return component; 136 | } 137 | 138 | std::vector> CPU_WCC(graph_structure & graph){ 139 | std::vector wccVec = CPU_connected_components(graph.OUTs, graph.INs); 140 | return graph.res_trans_id_id(wccVec); 141 | } 142 | -------------------------------------------------------------------------------- /include/CPU_adj_list/sorted_vector_binary_operations.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | for a sorted vector>, this dunction conducted binary (divide and conquer) operations on this vector; 7 | 8 | the int values are unique and sorted from small to large 9 | 10 | https://blog.csdn.net/EbowTang/article/details/50770315 11 | */ 12 | 13 | 14 | template 15 | bool sorted_vector_binary_operations_search(std::vector>& input_vector, int key); 16 | 17 | template 18 | T sorted_vector_binary_operations_search_weight(std::vector>& input_vector, int key); 19 | 20 | 21 | template 22 | int sorted_vector_binary_operations_search_position(std::vector>& input_vector, int key); 23 | 24 | template 25 | void sorted_vector_binary_operations_erase(std::vector>& input_vector, int key); 26 | 27 | template 28 | int sorted_vector_binary_operations_insert(std::vector>& input_vector, int key, T load); 29 | 30 | 31 | template 32 | bool sorted_vector_binary_operations_search(std::vector>& input_vector, int key) { 33 | 34 | /*return true if key is in vector; time complexity O(log n)*/ 35 | 36 | int left = 0, right = input_vector.size() - 1; 37 | 38 | while (left <= right) { 39 | int mid = left + ((right - left) / 2); // mid is between left and right (may be equal); 40 | if (input_vector[mid].first == key) { 41 | return true; 42 | } 43 | else if (input_vector[mid].first > key) { 44 | right = mid - 1; 45 | } 46 | else { 47 | left = mid + 1; 48 | } 49 | } 50 | 51 | return false; 52 | 53 | } 54 | 55 | template 56 | T sorted_vector_binary_operations_search_weight(std::vector>& input_vector, int key) { 57 | 58 | /*return std::numeric_limits::max() if key is not in vector; time complexity O(log n)*/ 59 | 60 | int left = 0, right = input_vector.size() - 1; 61 | 62 | while (left <= right) { 63 | int mid = left + ((right - left) / 2); // mid is between left and right (may be equal); 64 | if (input_vector[mid].first == key) { 65 | return input_vector[mid].second; 66 | } 67 | else if (input_vector[mid].first > key) { 68 | right = mid - 1; 69 | } 70 | else { 71 | left = mid + 1; 72 | } 73 | } 74 | 75 | return std::numeric_limits::max(); 76 | 77 | } 78 | 79 | template 80 | int sorted_vector_binary_operations_search_position(std::vector>& input_vector, int key) { 81 | 82 | /*return -1 if key is not in vector; time complexity O(log n)*/ 83 | 84 | int left = 0, right = input_vector.size() - 1; 85 | 86 | while (left <= right) { 87 | int mid = left + ((right - left) / 2); 88 | if (input_vector[mid].first == key) { 89 | return mid; 90 | } 91 | else if (input_vector[mid].first > key) { 92 | right = mid - 1; 93 | } 94 | else { 95 | left = mid + 1; 96 | } 97 | } 98 | 99 | return -1; 100 | 101 | } 102 | 103 | template 104 | void sorted_vector_binary_operations_erase(std::vector>& input_vector, int key) { 105 | 106 | /*erase key from vector; time complexity O(log n + size()-position ), which is O(n) in the worst case, as 107 | the time complexity of erasing an element from a vector is the number of elements behind this element*/ 108 | 109 | if (input_vector.size() > 0) { 110 | int left = 0, right = input_vector.size() - 1; 111 | 112 | while (left <= right) { 113 | int mid = left + ((right - left) / 2); 114 | if (input_vector[mid].first == key) { 115 | input_vector.erase(input_vector.begin() + mid); 116 | break; 117 | } 118 | else if (input_vector[mid].first > key) { 119 | right = mid - 1; 120 | } 121 | else { 122 | left = mid + 1; 123 | } 124 | } 125 | } 126 | 127 | } 128 | 129 | template 130 | int sorted_vector_binary_operations_insert(std::vector>& input_vector, int key, T load) { 131 | 132 | /*return the inserted position; 133 | 134 | insert into vector, if key is already inside, then load is updated; time complexity O(log n + size()-position ), which is O(n) in the worst case, as 135 | the time complexity of inserting an element into a vector is the number of elements behind this element*/ 136 | 137 | int left = 0, right = input_vector.size() - 1; 138 | 139 | while (left <= right) // it will be skept when input_vector.size() == 0 140 | { 141 | int mid = left + ((right - left) / 2); // mid is between left and right (may be equal); 142 | if (input_vector[mid].first == key) { 143 | input_vector[mid].second = load; 144 | return mid; 145 | } 146 | else if (input_vector[mid].first > key) { 147 | right = mid - 1; // the elements after right are always either empty, or have larger keys than input key 148 | } 149 | else { 150 | left = mid + 1; // the elements before left are always either empty, or have smaller keys than input key 151 | } 152 | } 153 | 154 | /*the following code is used when key is not in vector, i.e., left > right, specifically, left = right + 1; 155 | the elements before left are always either empty, or have smaller keys than input key; 156 | the elements after right are always either empty, or have larger keys than input key; 157 | so, the input key should be insert between right and left at this moment*/ 158 | input_vector.insert(input_vector.begin() + left, { key,load }); 159 | return left; 160 | } -------------------------------------------------------------------------------- /include/GPU_csr/GPU_csr.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cuda_runtime.h" 3 | #include 4 | #include 5 | #include 6 | /*for GPU*/ 7 | template 8 | class CSR_graph 9 | { 10 | //CSR has space efficiency and is easy to use by GPUs. 11 | public: 12 | CSR_graph() {} 13 | ~CSR_graph(); 14 | std::vector INs_Neighbor_start_pointers, OUTs_Neighbor_start_pointers, ALL_start_pointers; // Neighbor_start_pointers[i] is the start point of neighbor information of vertex i in Edges and Edge_weights 15 | /* 16 | Now, Neighbor_sizes[i] = Neighbor_start_pointers[i + 1] - Neighbor_start_pointers[i]. 17 | And Neighbor_start_pointers[V] = Edges.size() = Edge_weights.size() = the total number of edges. 18 | */ 19 | std::vector INs_Edges, OUTs_Edges,all_Edges; // Edges[Neighbor_start_pointers[i]] is the start of Neighbor_sizes[i] neighbor IDs 20 | std::vector INs_Edge_weights, OUTs_Edge_weights; // Edge_weights[Neighbor_start_pointers[i]] is the start of Neighbor_sizes[i] edge weights 21 | int *in_pointer, *out_pointer, *in_edge, *out_edge, *all_pointer, *all_edge;//All_edge has merged in_edge and out_edge, mainly used on CDLP 22 | double *in_edge_weight, *out_edge_weight; 23 | size_t E_all; 24 | }; 25 | 26 | template 27 | // CSR_graph toCSR(graph_structure& graph) 28 | CSR_graph toCSR(graph_structure& graph) 29 | { 30 | 31 | CSR_graph ARRAY; 32 | 33 | int V = graph.size(); 34 | ARRAY.INs_Neighbor_start_pointers.resize(V + 1); // Neighbor_start_pointers[V] = Edges.size() = Edge_weights.size() = the total number of edges. 35 | ARRAY.OUTs_Neighbor_start_pointers.resize(V + 1); 36 | ARRAY.ALL_start_pointers.resize(V + 1); 37 | 38 | int pointer = 0; 39 | for (int i = 0; i < V; i++) 40 | { 41 | ARRAY.INs_Neighbor_start_pointers[i] = pointer; 42 | for (auto &xx : graph.INs[i]) 43 | { 44 | ARRAY.INs_Edges.push_back(xx.first); 45 | ARRAY.INs_Edge_weights.push_back(xx.second); 46 | } 47 | pointer += graph.INs[i].size(); 48 | } 49 | ARRAY.INs_Neighbor_start_pointers[V] = pointer; 50 | 51 | pointer = 0; 52 | for (int i = 0; i < V; i++) 53 | { 54 | ARRAY.OUTs_Neighbor_start_pointers[i] = pointer; 55 | for (auto &xx : graph.OUTs[i]) 56 | { 57 | ARRAY.OUTs_Edges.push_back(xx.first); 58 | ARRAY.OUTs_Edge_weights.push_back(xx.second); 59 | } 60 | pointer += graph.OUTs[i].size(); 61 | } 62 | ARRAY.OUTs_Neighbor_start_pointers[V] = pointer; 63 | 64 | pointer = 0; 65 | for (int i = 0; i < V; i++) 66 | { 67 | ARRAY.ALL_start_pointers[i] = pointer; 68 | for (auto &xx : graph.INs[i]) 69 | { 70 | ARRAY.all_Edges.push_back(xx.first); 71 | } 72 | for (auto &xx : graph.OUTs[i]) 73 | { 74 | ARRAY.all_Edges.push_back(xx.first); 75 | } 76 | pointer += graph.INs[i].size() + graph.OUTs[i].size(); 77 | } 78 | ARRAY.ALL_start_pointers[V] = pointer; 79 | 80 | size_t E_in = ARRAY.INs_Edges.size(); 81 | size_t E_out = ARRAY.OUTs_Edges.size(); 82 | size_t E_all = E_in+E_out; 83 | ARRAY.E_all = E_all; 84 | cudaMallocManaged((void**)&ARRAY.in_pointer, (V + 1) * sizeof(int)); 85 | cudaMallocManaged((void**)&ARRAY.out_pointer, (V + 1) * sizeof(int)); 86 | cudaMallocManaged((void**)&ARRAY.all_pointer, (V + 1) * sizeof(int)); 87 | cudaMallocManaged((void**)&ARRAY.in_edge, E_in * sizeof(int)); 88 | cudaMallocManaged((void**)&ARRAY.out_edge, E_out * sizeof(int)); 89 | cudaMallocManaged((void**)&ARRAY.all_edge, E_all * sizeof(int)); 90 | cudaMallocManaged((void**)&ARRAY.in_edge_weight, E_in * sizeof(double)); 91 | cudaMallocManaged((void**)&ARRAY.out_edge_weight, E_out * sizeof(double)); 92 | 93 | cudaDeviceSynchronize(); 94 | cudaError_t error = cudaGetLastError(); 95 | if (error != cudaSuccess) { 96 | printf("CUDA error: %s\n", cudaGetErrorString(error)); 97 | } 98 | 99 | cudaMemcpy(ARRAY.in_pointer, ARRAY.INs_Neighbor_start_pointers.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice); 100 | cudaMemcpy(ARRAY.out_pointer, ARRAY.OUTs_Neighbor_start_pointers.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice); 101 | cudaMemcpy(ARRAY.all_pointer, ARRAY.ALL_start_pointers.data(), (V + 1) * sizeof(int), cudaMemcpyHostToDevice); 102 | cudaMemcpy(ARRAY.in_edge, ARRAY.INs_Edges.data(), E_in * sizeof(int), cudaMemcpyHostToDevice); 103 | cudaMemcpy(ARRAY.out_edge, ARRAY.OUTs_Edges.data(), E_out * sizeof(int), cudaMemcpyHostToDevice); 104 | cudaMemcpy(ARRAY.all_edge, ARRAY.all_Edges.data(), E_all * sizeof(int), cudaMemcpyHostToDevice); 105 | cudaMemcpy(ARRAY.in_edge_weight, ARRAY.INs_Edge_weights.data(), E_in * sizeof(double), cudaMemcpyHostToDevice); 106 | cudaMemcpy(ARRAY.out_edge_weight, ARRAY.OUTs_Edge_weights.data(), E_out * sizeof(double), cudaMemcpyHostToDevice); 107 | 108 | return ARRAY; 109 | } 110 | 111 | template 112 | CSR_graph::~CSR_graph() 113 | { 114 | cudaFree(in_pointer); 115 | cudaFree(out_pointer); 116 | cudaFree(all_pointer); 117 | cudaFree(in_edge); 118 | cudaFree(out_edge); 119 | cudaFree(all_edge); 120 | cudaFree(in_edge_weight); 121 | cudaFree(out_edge_weight); 122 | } -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_connected_components.cuh: -------------------------------------------------------------------------------- 1 | #ifndef WCCG 2 | #define WCCG 3 | #include "cuda_runtime.h" 4 | #include 5 | #include "device_launch_parameters.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | using namespace std; 13 | #define WCCG_THREAD_PER_BLOCK 512 14 | 15 | __global__ void parent_init(int *parent, int N) 16 | { 17 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 18 | 19 | if (tid < N) // tid decides process which vertex 20 | { 21 | parent[tid] = tid; // each vertex is initially labeled by itself 22 | } 23 | } 24 | __global__ void compress(int *parent, int N) 25 | { 26 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 27 | 28 | if (tid < N) // tid decides process which vertex 29 | { 30 | while (parent[tid] != parent[parent[tid]]) 31 | { 32 | parent[tid] = parent[parent[tid]]; 33 | } 34 | } 35 | } 36 | __global__ void get_freq(int *parent, int *freq, int N) 37 | { 38 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 39 | 40 | if (tid < N) // tid decides process which vertex 41 | atomicAdd(&freq[parent[tid]], 1); 42 | } 43 | 44 | 45 | __global__ void sampling(int *all_pointer, int *all_edge, int *parent, int N, int neighbor_round) 46 | { 47 | 48 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 49 | int u = tid; 50 | if (tid < N) // tid decides process which vertex 51 | { 52 | 53 | int i = all_pointer[u] + neighbor_round; 54 | if (i < all_pointer[u + 1]) 55 | { 56 | int v = all_edge[i]; 57 | int p1 = parent[u], p2 = parent[v]; 58 | while (p1 != p2) 59 | { // link 60 | int h = p1 > p2 ? p1 : p2; 61 | int l = p2 >= p1 ? p1 : p2; 62 | int check = atomicCAS(&parent[h], h, l); 63 | if (check == h) 64 | { 65 | break; 66 | } 67 | p1 = parent[parent[h]]; 68 | p2 = parent[l]; 69 | } 70 | } 71 | } 72 | } 73 | __global__ void full_link(int *all_pointer, int *all_edge, int *parent, int most, int N, int neighbor_round) 74 | { 75 | 76 | int tid = blockIdx.x * blockDim.x + threadIdx.x; // tid decides process which vertex 77 | int u = tid; 78 | if (tid < N) // tid decides process which vertex 79 | { 80 | if (parent[tid] == most) 81 | { 82 | return; 83 | } 84 | 85 | for (int i = all_pointer[u] + neighbor_round; i < all_pointer[u + 1]; i++) 86 | { 87 | int v = all_edge[i]; 88 | int p1 = parent[u], p2 = parent[v]; 89 | while (p1 != p2) 90 | { // link 91 | int h = p1 > p2 ? p1 : p2; 92 | int l = p2 >= p1 ? p1 : p2; 93 | int check = atomicCAS(&parent[h], h, l); 94 | if (check == h) 95 | { 96 | break; 97 | } 98 | p1 = parent[parent[h]]; 99 | p2 = parent[l]; 100 | } 101 | } 102 | } 103 | } 104 | std::vector WCC_GPU(graph_structure &graph, CSR_graph &input_graph) 105 | { 106 | int N = graph.size(); // number of vertices in the graph 107 | 108 | dim3 init_label_block((N + WCCG_THREAD_PER_BLOCK - 1) / WCCG_THREAD_PER_BLOCK, 1, 1); // the number of blocks used in the gpu 109 | dim3 init_label_thread(WCCG_THREAD_PER_BLOCK, 1, 1); // the number of threads used in the gpu 110 | 111 | int *all_edge = input_graph.in_edge; // graph stored in csr format 112 | int *all_pointer = input_graph.in_pointer; 113 | int *parent = nullptr; 114 | int *freq = nullptr; 115 | cudaMallocManaged((void **)&parent, N * sizeof(int)); 116 | cudaMallocManaged((void **)&freq, N * sizeof(int)); 117 | cudaMemset(freq, 0, N * sizeof(int)); 118 | parent_init<<>>(parent, N); 119 | cudaDeviceSynchronize(); 120 | int it = 0, ITERATION = 2; // number of iterations 121 | while (it < ITERATION) // continue for a fixed number of iterations 122 | { 123 | sampling<<>>(all_pointer, all_edge, parent, N, it); 124 | cudaDeviceSynchronize(); 125 | compress<<>>(parent, N); 126 | cudaDeviceSynchronize(); 127 | it++; 128 | } 129 | get_freq<<>>(parent, freq, N); 130 | int *c = thrust::max_element(thrust::device, freq, freq + N); 131 | int most_f_element = *c; 132 | full_link<<>>(all_pointer, all_edge, parent, most_f_element, N, ITERATION); 133 | cudaDeviceSynchronize(); 134 | compress<<>>(parent, N); 135 | cudaDeviceSynchronize(); 136 | 137 | std::vector result(N); 138 | cudaMemcpy(result.data(), parent, N * sizeof(int), cudaMemcpyDeviceToHost); 139 | cudaFree(parent); 140 | cudaFree(freq); 141 | return result; 142 | } 143 | std::vector> Cuda_WCC(graph_structure &graph, CSR_graph &csr_graph) 144 | { 145 | std::vector wccVecGPU = WCC_GPU(graph, csr_graph); 146 | return graph.res_trans_id_id(wccVecGPU); 147 | } 148 | #endif -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_BFS.cuh: -------------------------------------------------------------------------------- 1 | #ifndef GPU_BFS 2 | #define GPU_BFS 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | 14 | // template 15 | std::vector cuda_bfs(CSR_graph &input_graph, int source_vertex, int max_depth = INT_MAX); 16 | 17 | std::vector> Cuda_Bfs(graph_structure &graph, CSR_graph &csr_graph, std::string src_v, int min_depth = 0, int max_depth = INT_MAX); 18 | 19 | __global__ void bfs_Relax(int *start, int *edge, int *depth, int *visited, int *queue, int *queue_size) 20 | { 21 | //Relax is performed on each queue node, which traverses all neighboring nodes of that round and relaxes the corresponding distance 22 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 23 | 24 | if (idx < *queue_size) 25 | { 26 | int v = queue[idx]; 27 | 28 | for (int i = start[v]; i < start[v + 1]; i++) 29 | { 30 | // Traverse adjacent edges 31 | int new_v = edge[i]; 32 | 33 | int new_depth = depth[v] + 1; 34 | 35 | int old = atomicMin(&depth[new_v], new_depth);//Update distance using atomic operations to avoid conflict 36 | if (new_depth < old) 37 | { 38 | visited[new_v] = 1; 39 | } 40 | } 41 | } 42 | } 43 | 44 | __global__ void bfs_CompactQueue(int V, int *next_queue, int *next_queue_size, int *visited) 45 | { 46 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 47 | if (idx < V && visited[idx]) 48 | { 49 | //If the node has been accessed in this round, it will be added to the queue for the next round 50 | int pos = atomicAdd(next_queue_size, 1); 51 | next_queue[pos] = idx; 52 | visited[idx] = 0; 53 | } 54 | } 55 | 56 | 57 | 58 | // template 59 | std::vector cuda_bfs(CSR_graph &input_graph, int source, int max_depth) 60 | { 61 | /* The GPU code for breadth first search uses queues to traverse the graph and record depth, 62 | which is also used to prevent duplicate traversal */ 63 | int V = input_graph.OUTs_Neighbor_start_pointers.size() - 1; 64 | int E = input_graph.OUTs_Edges.size(); 65 | 66 | int *depth; 67 | int *edge = input_graph.out_edge; 68 | 69 | int *start = input_graph.out_pointer; 70 | int *visited; 71 | 72 | int *queue, *next_queue; 73 | int *queue_size, *next_queue_size; 74 | 75 | cudaMallocManaged((void **)&depth, V * sizeof(int)); 76 | cudaMallocManaged((void **)&visited, V * sizeof(int)); 77 | cudaMallocManaged((void **)&queue, V * sizeof(int)); 78 | cudaMallocManaged((void **)&next_queue, V * sizeof(int)); 79 | cudaMallocManaged((void **)&queue_size, sizeof(int)); 80 | cudaMallocManaged((void **)&next_queue_size, sizeof(int)); 81 | 82 | cudaDeviceSynchronize(); 83 | 84 | cudaError_t cuda_status = cudaGetLastError(); 85 | if (cuda_status != cudaSuccess) 86 | { 87 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 88 | return std::vector(); 89 | } 90 | 91 | for (int i = 0; i < V; i++) 92 | { 93 | depth[i] = max_depth; 94 | visited[i] = 0; 95 | } 96 | depth[source] = 0; 97 | 98 | *queue_size = 1; // At first, there was only the root node in the queue 99 | queue[0] = source; 100 | *next_queue_size = 0; 101 | 102 | int threadsPerBlock = 1024; 103 | int numBlocks = 0; 104 | int QBlocks = (V + threadsPerBlock - 1) / threadsPerBlock; 105 | std::vector res(V, max_depth); 106 | 107 | while (*queue_size > 0) 108 | { 109 | numBlocks = (*queue_size + threadsPerBlock - 1) / threadsPerBlock; 110 | bfs_Relax<<>>(start, edge, depth, visited, queue, queue_size); 111 | cudaDeviceSynchronize(); 112 | 113 | cudaError_t cuda_status = cudaGetLastError(); 114 | if (cuda_status != cudaSuccess) 115 | { 116 | fprintf(stderr, "Relax kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 117 | return res; 118 | } 119 | 120 | bfs_CompactQueue<<>>(V, next_queue, next_queue_size, visited); 121 | cudaDeviceSynchronize(); 122 | 123 | cuda_status = cudaGetLastError(); 124 | if (cuda_status != cudaSuccess) 125 | { 126 | fprintf(stderr, "CompactQueue kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 127 | return res; 128 | } 129 | 130 | std::swap(queue, next_queue); 131 | *queue_size = *next_queue_size; 132 | *next_queue_size = 0; 133 | /* After each round of updates, exchange pointers between the new and old queues, 134 | using the new queue as the traversal queue for the next round and the old queue as the new queue for the next round */ 135 | } 136 | 137 | cudaMemcpy(res.data(), depth, V * sizeof(int), cudaMemcpyDeviceToHost); 138 | 139 | cudaFree(depth); 140 | cudaFree(visited); 141 | cudaFree(queue); 142 | cudaFree(next_queue); 143 | cudaFree(queue_size); 144 | cudaFree(next_queue_size); 145 | 146 | return res; 147 | } 148 | 149 | std::vector> Cuda_Bfs(graph_structure &graph, CSR_graph &csr_graph, std::string src_v, int min_depth, int max_depth) 150 | { 151 | int src_v_id = graph.vertex_str_to_id[src_v]; 152 | std::vector gpuBfsVec = cuda_bfs(csr_graph, src_v_id, max_depth); 153 | 154 | return graph.res_trans_id_val(gpuBfsVec); 155 | } 156 | 157 | #endif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RucGraph - a fast graph database system on CPU/GPU platforms 2 | 3 | RucGraph is a lightweight graph database system that uses both CPUs and GPUs to efficiently perform graph analyses, such as Shortest Path, PageRank, Community Detection etc. 4 | 5 | 6 | - "Ruc" is the abbreviation of "[Renmin University of China](https://www.ruc.edu.cn/)". 7 | 8 | 9 | - RucGraph works efficiently on large graphs with billions of vertices and edges. In particular, on [LDBC Graphalytics Benchmarks](https://ldbcouncil.org/benchmarks/graphalytics/), RucGraph is 10 times faster than [neo4j](https://neo4j.com) on CPUs, and 50 times faster than [neo4j](https://neo4j.com) on GPUs. 10 | 11 | 12 | ## Graph data structures & algorithms 13 | 14 | RucGraph is now using [Adjacency Lists](https://www.geeksforgeeks.org/adjacency-list-meaning-definition-in-dsa/) to store graphs in CPU memory, and using [Sparse Matrix Representations](https://www.geeksforgeeks.org/sparse-matrix-representations-set-3-csr/) (CSRs) to store graphs in GPU memory. 15 | 16 | More diversified functions, such as using Adjacency Lists in GPU memory, is now under development. 17 | 18 | 19 | We have implemented 5 graph analysis algorithms on both CPUs and GPUs to date: Breadth-First Search (BFS), PageRank (PR), Weakly Connected Components (WCC), Community Detection using Label Propagation (CDLP), Single-Source Shortest Paths (SSSP). The pseudo codes of these algorithms can be found in the end of [the LDBC Graphalytics Benchmark handbook](https://arxiv.org/pdf/2011.15028). Nevertheless, our implementations are optimized for parallel computation, and may be considerably different from these pseudo codes. 20 | 21 | 22 | ## Code File structures 23 | 24 | - `include/`: header files 25 | 26 | - `include/CPU_adj_list/`: header files for operating Adjacency Lists on CPUs 27 | 28 | - `include/CPU_adj_list/CPU_adj_list.hpp`: an Adjacency List on CPUs 29 | 30 | - `include/CPU_adj_list/algorithm/`: header files for graph analysis operators on CPUs, such as Shortest Path, PageRank, Community Detection operators; these operators have passed the LDBC Graphalytics Benchmark test 31 | 32 | 33 | 34 | - `include/GPU_csr/`: header files for operating CSRs on GPUs 35 | 36 | - `include/GPU_csr/GPU_csr.hpp`: a CSR on GPUs 37 | 38 | - `include/GPU_csr/algorithm/`: header files for graph analysis operators on GPUs, such as Shortest Path, PageRank, Community Detection operators; these operators have also passed the LDBC Graphalytics Benchmark test 39 | 40 | 41 | - `include/LDBC/`: header files for performing the LDBC Graphalytics Benchmark test 42 | 43 | 44 | 45 |
46 | 47 | 48 | - `src/`: source files 49 | - `src/CPU_adj_list/CPU_example.cpp`: an example of performing graph analysis operators on CPUs 50 | - `src/GPU_csr/GPU_example.cu`: an example of performing graph analysis operators on GPUs 51 | - `src/LDBC/LDBC_CPU_adj_list.cpp`: the source codes of performing the LDBC Graphalytics Benchmark test on CPUs 52 | - `src/LDBC/LDBC_GPU_csr.cu`: the source codes of performing the LDBC Graphalytics Benchmark test on GPUs 53 | 54 | 55 | 56 | ## Copy & Run 57 | 58 | Here, we show how to build & run RucGraph on a Linux server with the Ubuntu 20.04 system, 2 Intel(R) Xeon(R) Gold 5218 CPUs, and an NVIDIA GeForce RTX 3090 GPU. The environment is as follows. 59 | 60 | - `cmake --version`: cmake version 3.27.9 61 | - `g++ --version`: g++ (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 62 | - `nvidia-smi`: NVIDIA-SMI 550.54.14 / Driver Version: 550.54.14 / CUDA Version: 12.4 63 | 64 | 65 | First, download the files onto the server, e.g., onto the following path: `/home/username/RucGraph`. Second, enter the following commands on a terminal at this path: 66 | 67 | ```shell 68 | username@server:~/RucGraph$ mkdir build 69 | username@server:~/RucGraph$ cd build 70 | username@server:~/RucGraph/build$ cmake .. -DBUILD_CPU=ON -DBUILD_GPU=ON 71 | username@server:~/RucGraph/build$ make 72 | username@server:~/RucGraph/build$ ./bin_cpu/CPU_example 73 | username@server:~/RucGraph/build$ ./bin_gpu/GPU_example 74 | username@server:~/RucGraph/build$ ./bin_cpu/Test_CPU 75 | username@server:~/RucGraph/build$ ./bin_gpu/Test_GPU 76 | ``` 77 | 78 | There are some explanations for the above commands: 79 | 80 | - `-DBUILD_CPU=ON -DBUILD_GPU=ON` is to compile both CPU and GPU codes. If GPUs are not available, then we can change `-DBUILD_GPU=ON` to `-DBUILD_GPU=OFF`. 81 | 82 | 83 | - `./bin_cpu/CPU_example` is to run the source codes at `src/CPU_adj_list/CPU_example.cpp` 84 | 85 | - `./bin_gpu/GPU_example` is to run the source codes at `src/GPU_csr/GPU_example.cu` 86 | 87 | - `./bin_cpu/Test_CPU` is to run the source codes at `src/LDBC/LDBC_CPU_adj_list.cpp` 88 | 89 | - `./bin_gpu/Test_GPU` is to run the source codes at `src/LDBC/LDBC_GPU_csr.cu` 90 | 91 | We can run "CPU_example" and "GPU_example" without any graph dataset. The outputs of graph analysis operators will be printed on the terminal. 92 | 93 | Nevertheless, before running "Test_CPU" and "Test_GPU", we need to download the [LDBC Graphalytics datasets](https://repository.surfsara.nl/datasets/cwi/graphalytics) at first. Then, when running "Test_CPU" and "Test_GPU", the program will ask us to input the data path and name sequentially. 94 | ```shell 95 | Please input the data directory: # The program asks 96 | /home/username/data # Input the data path 97 | Please input the graph name: # The program asks 98 | datagen-7_5-fb # Input a data name 99 | ``` 100 | 101 | After inputting the data path and name, the program will perform the LDBC Graphalytics Benchmark test for this dataset. Specifically, the program will print some parameters of this test, as well as the consumed times of different graph analysis operators on this dataset. 102 | 103 | 104 | ## License 105 | 106 | RucGraph is released under the [Apache 2.0 license](LICENSE.txt). 107 | 108 | ## Contact 109 | 110 | Please contact yahuisun@ruc.edu.cn for any enquiry. 111 | 112 | 113 | -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_sssp_pre.cuh: -------------------------------------------------------------------------------- 1 | #ifndef SSSP_PRE_H 2 | #define SSSP_PRE_H 3 | 4 | #include 5 | 6 | #include "cuda_runtime.h" 7 | #include "device_launch_parameters.h" 8 | 9 | #include 10 | #include 11 | 12 | __global__ void Relax_pre(int* out_pointer, int* out_edge, double* out_edge_weight, double* dis, int* queue, int* queue_size, int* visited, int* pre, int* mutex) { 13 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 14 | 15 | if (idx < *queue_size) { 16 | int v = queue[idx]; 17 | 18 | for (int i = out_pointer[v]; i < out_pointer[v + 1]; i++) { 19 | int new_v = out_edge[i]; 20 | double weight = out_edge_weight[i]; 21 | 22 | double new_w = dis[v] + weight; 23 | 24 | // try doing relaxation 25 | // mutex is used 26 | while (atomicCAS(&mutex[new_v], 0, 1) != 0); 27 | 28 | if (new_w < dis[new_v]) { 29 | dis[new_v] = new_w; 30 | // update the previous vertex 31 | pre[new_v] = v; 32 | atomicExch(&visited[new_v], 1); 33 | // share the updated distance with other threads in different blocks 34 | __threadfence(); 35 | } 36 | 37 | atomicExch(&mutex[new_v], 0); 38 | } 39 | } 40 | } 41 | 42 | void gpu_sssp_pre(CSR_graph& input_graph, int source, std::vector& distance, std::vector& pre_v, double max_dis) { 43 | // the only difference between this function and the previous one is that we need to record the previous vertex 44 | int V = input_graph.OUTs_Neighbor_start_pointers.size() - 1; 45 | int E = input_graph.OUTs_Edges.size(); 46 | 47 | double* dis; 48 | int* out_edge = input_graph.out_edge; 49 | double* out_edge_weight = input_graph.out_edge_weight; 50 | int* out_pointer = input_graph.out_pointer; 51 | int* visited; 52 | 53 | int* queue, * next_queue; 54 | int* queue_size, * next_queue_size; 55 | int* mutex; 56 | int* pre; 57 | 58 | cudaMallocManaged((void**)&dis, V * sizeof(double)); 59 | cudaMallocManaged((void**)&visited, V * sizeof(int)); 60 | cudaMallocManaged((void**)&queue, V * sizeof(int)); 61 | cudaMallocManaged((void**)&next_queue, V * sizeof(int)); 62 | cudaMallocManaged((void**)&queue_size, sizeof(int)); 63 | cudaMallocManaged((void**)&next_queue_size, sizeof(int)); 64 | 65 | cudaMallocManaged((void**)&mutex, V * sizeof(int)); 66 | cudaMallocManaged((void**)&pre, V * sizeof(int)); 67 | 68 | cudaDeviceSynchronize(); 69 | cudaError_t cuda_status = cudaGetLastError(); 70 | if (cuda_status != cudaSuccess) { 71 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 72 | return; 73 | } 74 | 75 | for (int i = 0; i < V; i++) { 76 | dis[i] = max_dis; 77 | visited[i] = 0; 78 | mutex[i] = 0; 79 | pre[i] = -1; 80 | } 81 | dis[source] = 0; 82 | 83 | 84 | *queue_size = 1; 85 | queue[0] = source; 86 | *next_queue_size = 0; 87 | 88 | int threadsPerBlock = 1024; 89 | int numBlocks = 0; 90 | 91 | while (*queue_size > 0) { 92 | numBlocks = (*queue_size + threadsPerBlock - 1) / threadsPerBlock; 93 | Relax_pre <<< numBlocks, threadsPerBlock >>> (out_pointer, out_edge, out_edge_weight, dis, queue, queue_size, visited, pre, mutex); 94 | cudaDeviceSynchronize(); 95 | 96 | cudaError_t cuda_status = cudaGetLastError(); 97 | if (cuda_status != cudaSuccess) { 98 | fprintf(stderr, "Relax kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 99 | return; 100 | } 101 | 102 | numBlocks = (V + threadsPerBlock - 1) / threadsPerBlock; 103 | CompactQueue <<< numBlocks, threadsPerBlock >>> (V, next_queue, next_queue_size, visited); 104 | cudaDeviceSynchronize(); 105 | 106 | cuda_status = cudaGetLastError(); 107 | if (cuda_status != cudaSuccess) { 108 | fprintf(stderr, "CompactQueue kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 109 | return; 110 | } 111 | 112 | std::swap(queue, next_queue); 113 | 114 | *queue_size = *next_queue_size; 115 | *next_queue_size = 0; 116 | } 117 | 118 | cudaMemcpy(distance.data(), dis, V * sizeof(double), cudaMemcpyDeviceToHost); 119 | cudaMemcpy(pre_v.data(), pre, V * sizeof(int), cudaMemcpyDeviceToHost); 120 | 121 | cudaFree(dis); 122 | cudaFree(visited); 123 | cudaFree(queue); 124 | cudaFree(next_queue); 125 | cudaFree(queue_size); 126 | cudaFree(next_queue_size); 127 | cudaFree(mutex); 128 | cudaFree(pre); 129 | 130 | return; 131 | } 132 | 133 | std::vector> Cuda_SSSP_pre(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, std::vector& pre_v, double max_dis) { 134 | int src_v_id = graph.vertex_str_to_id[src_v]; 135 | std::vector gpuSSSPvec(graph.V, 0); 136 | pre_v.resize(graph.V); 137 | gpu_sssp_pre(csr_graph, src_v_id, gpuSSSPvec, pre_v, max_dis); 138 | 139 | // check the correctness of the previous vertex 140 | /*for (int i = 0; i < graph.V; i++) { 141 | double dis = gpuSSSPvec[i]; 142 | int pre = pre_v[i]; 143 | int now = i; 144 | double sum = 0; 145 | while (pre != -1) { 146 | bool ff = false; 147 | for (auto edge : graph.OUTs[pre]) { 148 | if (edge.first == now) { 149 | sum += edge.second; 150 | now = pre; 151 | pre = pre_v[pre]; 152 | ff = true; 153 | break; 154 | } 155 | } 156 | if (!ff) { 157 | std::cout << "Not found!" << std::endl; 158 | break; 159 | } 160 | } 161 | if (fabs(sum - dis) > 1e-4) { 162 | std::cout << "Error: pre_v is wrong!" << std::endl; 163 | std::cout << "sum: " << sum << " dis: " << dis << std::endl; 164 | } 165 | }*/ 166 | 167 | return graph.res_trans_id_val(gpuSSSPvec); 168 | } 169 | 170 | #endif -------------------------------------------------------------------------------- /include/GPU_csr/algorithm/GPU_shortest_paths.cuh: -------------------------------------------------------------------------------- 1 | #ifndef WS_SSSP_H 2 | #define WS_SSSP_H 3 | 4 | #include 5 | 6 | #include "cuda_runtime.h" 7 | #include "device_launch_parameters.h" 8 | 9 | #include 10 | 11 | __device__ __forceinline__ double atomicMinDouble (double * addr, double value); 12 | 13 | __global__ void Relax(int* offsets, int* edges, double* weights, double* dis, int* queue, int* queue_size, int* visited); 14 | __global__ void CompactQueue(int V, int* next_queue, int* next_queue_size, int* visited); 15 | void gpu_shortest_paths(CSR_graph& input_graph, int source, std::vector& distance, double max_dis = 10000000000); 16 | void gpu_sssp_pre(CSR_graph& input_graph, int source, std::vector& distance, std::vector& pre_v, double max_dis = 10000000000); 17 | 18 | std::vector> Cuda_SSSP(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, double max_dis = 10000000000); 19 | std::vector> Cuda_SSSP_pre(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, std::vector& pre_v, double max_dis = 10000000000); 20 | 21 | // this function is used to get the minimum value of double type atomically 22 | __device__ __forceinline__ double atomicMinDouble (double * addr, double value) { 23 | double old; 24 | old = __longlong_as_double(atomicMin((long long *)addr, __double_as_longlong(value))); 25 | return old; 26 | } 27 | 28 | __global__ void Relax(int* out_pointer, int* out_edge, double* out_edge_weight, double* dis, int* queue, int* queue_size, int* visited) { 29 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 30 | 31 | if (idx < *queue_size) { 32 | int v = queue[idx]; 33 | 34 | // for all adjacent vertices 35 | for (int i = out_pointer[v]; i < out_pointer[v + 1]; i++) { 36 | int new_v = out_edge[i]; 37 | double weight = out_edge_weight[i]; 38 | 39 | double new_w = dis[v] + weight; 40 | 41 | // try doing relaxation 42 | double old = atomicMinDouble(&dis[new_v], new_w); 43 | 44 | if (old <= new_w) 45 | continue; 46 | 47 | // if the distance is updated, set the vertex as visited 48 | atomicExch(&visited[new_v], 1); 49 | } 50 | } 51 | } 52 | 53 | __global__ void CompactQueue(int V, int* next_queue, int* next_queue_size, int* visited) { 54 | // this function is used to ensure that each necessary vertex is only pushed into the queue once 55 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 56 | if (idx < V && visited[idx]) { 57 | int pos = atomicAdd(next_queue_size, 1); 58 | next_queue[pos] = idx; 59 | // reset the visited flag 60 | visited[idx] = 0; 61 | } 62 | } 63 | 64 | void gpu_shortest_paths(CSR_graph& input_graph, int source, std::vector& distance, double max_dis) { 65 | int V = input_graph.OUTs_Neighbor_start_pointers.size() - 1; 66 | int E = input_graph.OUTs_Edges.size(); 67 | 68 | double* dis; 69 | int* out_edge = input_graph.out_edge; 70 | double* out_edge_weight = input_graph.out_edge_weight; 71 | int* out_pointer = input_graph.out_pointer; 72 | int* visited; 73 | 74 | int* queue, * next_queue; 75 | int* queue_size, * next_queue_size; 76 | 77 | // allocate memory on GPU 78 | cudaMallocManaged((void**)&dis, V * sizeof(double)); 79 | cudaMallocManaged((void**)&visited, V * sizeof(int)); 80 | cudaMallocManaged((void**)&queue, V * sizeof(int)); 81 | cudaMallocManaged((void**)&next_queue, V * sizeof(int)); 82 | cudaMallocManaged((void**)&queue_size, sizeof(int)); 83 | cudaMallocManaged((void**)&next_queue_size, sizeof(int)); 84 | 85 | // synchronize the device 86 | cudaDeviceSynchronize(); 87 | cudaError_t cuda_status = cudaGetLastError(); 88 | if (cuda_status != cudaSuccess) { 89 | fprintf(stderr, "Cuda malloc failed: %s\n", cudaGetErrorString(cuda_status)); 90 | return; 91 | } 92 | 93 | for (int i = 0; i < V; i++) { 94 | // initialize the distance array and visited array 95 | dis[i] = max_dis; 96 | visited[i] = 0; 97 | } 98 | dis[source] = 0; 99 | 100 | 101 | *queue_size = 1; 102 | queue[0] = source; 103 | *next_queue_size = 0; 104 | 105 | int threadsPerBlock = 1024; 106 | int numBlocks = 0; 107 | 108 | while (*queue_size > 0) { 109 | numBlocks = (*queue_size + threadsPerBlock - 1) / threadsPerBlock; 110 | // launch the kernel function to relax the edges 111 | Relax <<< numBlocks, threadsPerBlock >>> (out_pointer, out_edge, out_edge_weight, dis, queue, queue_size, visited); 112 | cudaDeviceSynchronize(); 113 | 114 | cudaError_t cuda_status = cudaGetLastError(); 115 | if (cuda_status != cudaSuccess) { 116 | fprintf(stderr, "Relax kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 117 | return; 118 | } 119 | 120 | numBlocks = (V + threadsPerBlock - 1) / threadsPerBlock; 121 | // do the compact operation 122 | CompactQueue <<< numBlocks, threadsPerBlock >>> (V, next_queue, next_queue_size, visited); 123 | cudaDeviceSynchronize(); 124 | 125 | cuda_status = cudaGetLastError(); 126 | if (cuda_status != cudaSuccess) { 127 | fprintf(stderr, "CompactQueue kernel launch failed: %s\n", cudaGetErrorString(cuda_status)); 128 | return; 129 | } 130 | 131 | // swap the queue and next_queue 132 | std::swap(queue, next_queue); 133 | 134 | *queue_size = *next_queue_size; 135 | *next_queue_size = 0; 136 | } 137 | 138 | cudaMemcpy(distance.data(), dis, V * sizeof(double), cudaMemcpyDeviceToHost); 139 | 140 | // free the memory 141 | cudaFree(dis); 142 | cudaFree(visited); 143 | cudaFree(queue); 144 | cudaFree(next_queue); 145 | cudaFree(queue_size); 146 | cudaFree(next_queue_size); 147 | 148 | return; 149 | } 150 | 151 | std::vector> Cuda_SSSP(graph_structure& graph, CSR_graph& csr_graph, std::string src_v, double max_dis) { 152 | int src_v_id = graph.vertex_str_to_id[src_v]; 153 | std::vector gpuSSSPvec(graph.V, 0); 154 | gpu_shortest_paths(csr_graph, src_v_id, gpuSSSPvec, max_dis); 155 | 156 | // transfer the vertex id to vertex name 157 | return graph.res_trans_id_val(gpuSSSPvec); 158 | } 159 | 160 | #endif -------------------------------------------------------------------------------- /src/LDBC/LDBC_CPU_adj_list.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | int main() 14 | { 15 | ios::sync_with_stdio(false); 16 | std::cin.tie(0); 17 | std::cout.tie(0); 18 | 19 | //freopen("../input.txt", "r", stdin); 20 | 21 | std::string directory; 22 | std::cout << "Please input the data directory: " << std::endl; 23 | std::cin >> directory; 24 | 25 | if (directory.back() != '/') 26 | directory += "/"; 27 | 28 | std::string graph_name; 29 | std::cout << "Please input the graph name: " << std::endl; 30 | std::cin >> graph_name; 31 | 32 | std::string config_file_path = directory + graph_name + ".properties"; 33 | 34 | LDBC graph(directory, graph_name); 35 | graph.read_config(config_file_path); //Read the ldbc configuration file to obtain key parameter information in the file 36 | 37 | auto begin = std::chrono::high_resolution_clock::now(); 38 | graph.load_graph(); //Read the vertex and edge files corresponding to the configuration file, // The vertex information in graph is converted to csr format for storage 39 | auto end = std::chrono::high_resolution_clock::now(); 40 | double load_ldbc_time = std::chrono::duration_cast(end - begin).count() / 1e9; // s 41 | printf("load_ldbc_time cost time: %f s\n", load_ldbc_time); 42 | 43 | std::vector> result_all; 44 | 45 | if (1) { 46 | if (graph.sup_bfs) { 47 | double cpu_bfs_time = 0; 48 | 49 | try{ 50 | std::vector> cpu_bfs_result; 51 | begin = std::chrono::high_resolution_clock::now(); 52 | cpu_bfs_result = CPU_Bfs(graph, graph.bfs_src_name); 53 | end = std::chrono::high_resolution_clock::now(); 54 | cpu_bfs_time = std::chrono::duration_cast(end - begin).count() / 1e9; 55 | printf("CPU BFS cost time: %f s\n", cpu_bfs_time); 56 | 57 | if(Bfs_checker(graph, cpu_bfs_result, graph.base_path + "-BFS")) 58 | result_all.push_back(std::make_pair("BFS", std::to_string(cpu_bfs_time))); 59 | else 60 | result_all.push_back(std::make_pair("BFS", "wrong")); 61 | } 62 | catch(...) { 63 | result_all.push_back(std::make_pair("BFS", "failed!")); 64 | } 65 | } 66 | else 67 | result_all.push_back(std::make_pair("BFS", "N/A")); 68 | } 69 | 70 | if (1) { 71 | if (graph.sup_sssp) { 72 | double cpu_sssp_time = 0; 73 | 74 | try { 75 | //std::vector pre_v; 76 | begin = std::chrono::high_resolution_clock::now(); 77 | std::vector> cpu_sssp_result = CPU_SSSP(graph, graph.sssp_src_name); 78 | //std::vector> cpu_sssp_result = CPU_SSSP_pre(graph, graph.sssp_src_name, pre_v); 79 | end = std::chrono::high_resolution_clock::now(); 80 | /*std::vector> path = path_query(graph, graph.sssp_src_name, "338", pre_v); 81 | for (auto p : path) 82 | std::cout << p.first << " -> " << p.second << std::endl;*/ 83 | cpu_sssp_time = std::chrono::duration_cast(end - begin).count() / 1e9; 84 | printf("CPU SSSP cost time: %f s\n", cpu_sssp_time); 85 | 86 | if (SSSP_checker(graph, cpu_sssp_result, graph.base_path + "-SSSP")) 87 | result_all.push_back(std::make_pair("SSSP", std::to_string(cpu_sssp_time))); 88 | else 89 | result_all.push_back(std::make_pair("SSSP", "wrong")); 90 | } 91 | catch(...) { 92 | result_all.push_back(std::make_pair("SSSP", "failed!")); 93 | } 94 | } 95 | else 96 | result_all.push_back(std::make_pair("SSSP", "N/A")); 97 | } 98 | 99 | if (1) { 100 | if (graph.sup_wcc) { 101 | double cpu_wcc_time = 0; 102 | 103 | try { 104 | begin = std::chrono::high_resolution_clock::now(); 105 | std::vector> cpu_wcc_result = CPU_WCC(graph); 106 | end = std::chrono::high_resolution_clock::now(); 107 | cpu_wcc_time = std::chrono::duration_cast(end - begin).count() / 1e9; 108 | printf("CPU WCC cost time: %f s\n", cpu_wcc_time); 109 | 110 | if (WCC_checker(graph, cpu_wcc_result, graph.base_path + "-WCC")) 111 | result_all.push_back(std::make_pair("WCC", std::to_string(cpu_wcc_time))); 112 | else 113 | result_all.push_back(std::make_pair("WCC", "wrong")); 114 | } 115 | catch(...) { 116 | result_all.push_back(std::make_pair("WCC", "failed!")); 117 | } 118 | } 119 | else 120 | result_all.push_back(std::make_pair("WCC", "N/A")); 121 | } 122 | 123 | if (1) { 124 | if (graph.sup_pr) { 125 | double cpu_pr_time = 0; 126 | 127 | try { 128 | begin = std::chrono::high_resolution_clock::now(); 129 | std::vector> cpu_pr_result = CPU_PR(graph, graph.pr_its, graph.pr_damping); 130 | end = std::chrono::high_resolution_clock::now(); 131 | cpu_pr_time = std::chrono::duration_cast(end - begin).count() / 1e9; 132 | printf("CPU PageRank cost time: %f s\n", cpu_pr_time); 133 | 134 | if (PR_checker(graph, cpu_pr_result, graph.base_path + "-PR")) 135 | result_all.push_back(std::make_pair("PageRank", std::to_string(cpu_pr_time))); 136 | else 137 | result_all.push_back(std::make_pair("PageRank", "wrong")); 138 | } 139 | catch(...) { 140 | result_all.push_back(std::make_pair("PageRank", "failed!")); 141 | } 142 | } 143 | else 144 | result_all.push_back(std::make_pair("PageRank", "N/A")); 145 | } 146 | 147 | if (1) { 148 | if (graph.sup_cdlp) { 149 | double cpu_cdlp_time = 0; 150 | 151 | try { 152 | begin = std::chrono::high_resolution_clock::now(); 153 | std::vector> cpu_cdlp_result = CPU_CDLP(graph, graph.cdlp_max_its); 154 | end = std::chrono::high_resolution_clock::now(); 155 | cpu_cdlp_time = std::chrono::duration_cast(end - begin).count() / 1e9; 156 | printf("CPU Community Detection cost time: %f s\n", cpu_cdlp_time); 157 | 158 | if (CDLP_checker(graph, cpu_cdlp_result, graph.base_path + "-CDLP")) 159 | result_all.push_back(std::make_pair("CommunityDetection", std::to_string(cpu_cdlp_time))); 160 | else 161 | result_all.push_back(std::make_pair("CommunityDetection", "wrong")); 162 | } 163 | catch(...) { 164 | result_all.push_back(std::make_pair("CommunityDetection", "failed!")); 165 | } 166 | } 167 | else 168 | result_all.push_back(std::make_pair("CommunityDetection", "N/A")); 169 | } 170 | 171 | std::cout << "Result: " << std::endl; 172 | int res_size = result_all.size(); 173 | for (int i = 0; i < res_size; i++) { 174 | std::cout << result_all[i].second; 175 | if (i != res_size - 1) 176 | std::cout << ","; 177 | } 178 | std::cout << std::endl; 179 | 180 | graph.save_to_CSV(result_all, "./result-cpu.csv"); 181 | 182 | //freopen("/dev/tty", "r", stdin); 183 | 184 | return 0; 185 | } 186 | -------------------------------------------------------------------------------- /include/CPU_adj_list/CPU_adj_list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | template // weight_type may be int, long long int, float, double... 17 | class graph_structure 18 | { 19 | //The data structure used on the CPU provides common operations such as adding nodes and edges on the graph 20 | 21 | /* Special note that the actual labels of nodes on the graph may be unordered. 22 | In the end, we provide some functions for converting from continuous labels (used in operators) to actual labels (original data) */ 23 | public: 24 | int V = 0; // the number of vertices 25 | long long E = 0; // the number of edges 26 | 27 | // OUTs[u] = v means there is an edge starting from u to v 28 | std::vector>> OUTs; 29 | // INs is transpose of OUTs. INs[u] = v means there is an edge starting from v to u 30 | std::vector>> INs; 31 | 32 | /*constructors*/ 33 | graph_structure() {} 34 | graph_structure(int n) 35 | { 36 | V = n; 37 | OUTs.resize(n); // initialize n vertices 38 | INs.resize(n); 39 | } 40 | int size() 41 | { 42 | return V; 43 | } 44 | 45 | /*class member functions*/ 46 | inline void add_edge(int, int, weight_type); // this function can change edge weights 47 | 48 | inline void remove_edge(int, int);//Remove any edge that connects two vertices 49 | inline void remove_edge(std::string, std::string);//Remove any edge that connects two vertices 50 | inline void remove_all_adjacent_edges(int);//Remove all edges, the input params is vertex numbers 51 | 52 | inline bool contain_edge(int, int); // whether there is an edge 53 | inline weight_type edge_weight(int, int); //get edge weight 54 | inline long long int edge_number(); // the total number of edges 55 | 56 | inline void print();//print graph 57 | inline void clear();// clear graph 58 | inline int out_degree(int);//get graph out degree 59 | inline int in_degree(int);//get graph in degree 60 | 61 | std::unordered_map vertex_str_to_id; // vertex_str_to_id[vertex_name] = vertex_id 62 | std::vector> vertex_id_to_str; // vertex_id_to_str[vertex_id].first = vertex_name, vertex_id_to_str[vertex_id].second = whether the vertex is valid 63 | 64 | std::queue invalid_vertex_id; // store the invalid vertex id 65 | 66 | int add_vertice(std::string); // Read the vertex information as a string 67 | void remove_vertice(std::string); // Remove the vertex information as a string 68 | void add_edge(std::string, std::string, weight_type); 69 | 70 | template 71 | std::vector> res_trans_id_val(std::vector &res); 72 | std::vector> res_trans_id_id(std::vector &wcc_res); 73 | }; 74 | 75 | /*class member functions*/ 76 | 77 | template 78 | int graph_structure::add_vertice(std::string vertex) 79 | { 80 | if (vertex_str_to_id.find(vertex) == vertex_str_to_id.end()) 81 | { 82 | if (invalid_vertex_id.empty()) { 83 | vertex_id_to_str.push_back(std::make_pair(vertex, true)); 84 | vertex_str_to_id[vertex] = V++; 85 | std::vector> x; 86 | OUTs.push_back(x); 87 | INs.push_back(x); 88 | } 89 | else { 90 | int v = invalid_vertex_id.front(); 91 | invalid_vertex_id.pop(); 92 | vertex_id_to_str[v].first = vertex; 93 | vertex_id_to_str[v].second = true; 94 | vertex_str_to_id[vertex] = v; 95 | 96 | std::cout << "Recover vertex id " << v << std::endl; 97 | } 98 | } 99 | return vertex_str_to_id[vertex]; 100 | } 101 | 102 | template 103 | void graph_structure::remove_vertice(std::string vertex) { 104 | // if the vertex is not exist, return 105 | if (vertex_str_to_id.find(vertex) == vertex_str_to_id.end()) { 106 | std::cerr << "vertex " << vertex << " not exist!" << std::endl; 107 | return; 108 | } 109 | int v = vertex_str_to_id[vertex]; 110 | remove_all_adjacent_edges(v); 111 | vertex_str_to_id.erase(vertex); 112 | vertex_id_to_str[v].second = false; 113 | invalid_vertex_id.push(v); 114 | } 115 | 116 | template 117 | void graph_structure::add_edge(int e1, int e2, weight_type ec) 118 | { 119 | sorted_vector_binary_operations_insert(OUTs[e1], e2, ec); 120 | sorted_vector_binary_operations_insert(INs[e2], e1, ec); 121 | } 122 | 123 | template 124 | void graph_structure::add_edge(std::string e1, std::string e2, weight_type ec) 125 | { 126 | E++; 127 | int v1 = add_vertice(e1); 128 | int v2 = add_vertice(e2); 129 | add_edge(v1, v2, ec); 130 | } 131 | 132 | template 133 | void graph_structure::remove_edge(int e1, int e2) 134 | { 135 | sorted_vector_binary_operations_erase(OUTs[e1], e2); 136 | sorted_vector_binary_operations_erase(INs[e2], e1); 137 | } 138 | 139 | template 140 | void graph_structure::remove_edge(std::string e1, std::string e2) 141 | { 142 | if (vertex_str_to_id.find(e1) == vertex_str_to_id.end()) { 143 | std::cerr << "vertex " << e1 << " not exist!" << std::endl; 144 | return; 145 | } 146 | if (vertex_str_to_id.find(e2) == vertex_str_to_id.end()) { 147 | std::cerr << "vertex " << e2 << " not exist!" << std::endl; 148 | return; 149 | } 150 | int v1 = vertex_str_to_id[e1]; 151 | int v2 = vertex_str_to_id[e2]; 152 | remove_edge(v1, v2); 153 | } 154 | 155 | template 156 | void graph_structure::remove_all_adjacent_edges(int v) 157 | { 158 | for (auto it = OUTs[v].begin(); it != OUTs[v].end(); it++) 159 | sorted_vector_binary_operations_erase(INs[it->first], v); 160 | 161 | for (auto it = INs[v].begin(); it != INs[v].end(); it++) 162 | sorted_vector_binary_operations_erase(OUTs[it->first], v); 163 | 164 | std::vector>().swap(OUTs[v]); 165 | std::vector>().swap(INs[v]); 166 | } 167 | 168 | template 169 | bool graph_structure::contain_edge(int e1, int e2) 170 | { 171 | return sorted_vector_binary_operations_search(OUTs[e1], e2); 172 | } 173 | template 174 | weight_type graph_structure::edge_weight(int e1, int e2) 175 | { 176 | return sorted_vector_binary_operations_search_weight(OUTs[e1], e2); 177 | } 178 | template 179 | long long int graph_structure::edge_number() 180 | { 181 | long long int num = 0; 182 | for (auto it : OUTs) 183 | num = num + it.size(); 184 | 185 | return num; 186 | } 187 | template 188 | void graph_structure::clear() 189 | { 190 | std::vector>>().swap(OUTs); 191 | std::vector>>().swap(INs); 192 | } 193 | template 194 | int graph_structure::out_degree(int v) 195 | { 196 | return OUTs[v].size(); 197 | } 198 | template 199 | int graph_structure::in_degree(int v) 200 | { 201 | return INs[v].size(); 202 | } 203 | template 204 | void graph_structure::print() 205 | { 206 | 207 | std::cout << "graph_structure_print:" << std::endl; 208 | 209 | for (int i = 0; i < V; i++) 210 | { 211 | std::cout << "Vertex " << i << " OUTs List: "; 212 | int v_size = OUTs[i].size(); 213 | for (int j = 0; j < v_size; j++) 214 | { 215 | std::cout << "<" << OUTs[i][j].first << "," << OUTs[i][j].second << "> "; 216 | } 217 | std::cout << std::endl; 218 | } 219 | std::cout << "graph_structure_print END" << std::endl; 220 | } 221 | 222 | template 223 | template 224 | std::vector> graph_structure::res_trans_id_val(std::vector &res) 225 | { 226 | std::vector> res_str; 227 | int res_size = res.size(); 228 | for (int i = 0; i < res_size; i++) { 229 | if (vertex_id_to_str[i].second) 230 | res_str.push_back(std::make_pair(vertex_id_to_str[i].first, res[i])); 231 | } 232 | 233 | return res_str; 234 | } 235 | 236 | template 237 | std::vector> graph_structure::res_trans_id_id(std::vector &wcc_res) 238 | { 239 | std::vector> res_str; 240 | int res_size = wcc_res.size(); 241 | for (int i = 0; i < res_size; i++) { 242 | if (vertex_id_to_str[i].second) { 243 | if (vertex_id_to_str[wcc_res[i]].second) 244 | res_str.push_back(std::make_pair(vertex_id_to_str[i].first, vertex_id_to_str[wcc_res[i]].first)); 245 | else 246 | std::cerr << "vertex " << vertex_id_to_str[wcc_res[i]].first << " not exist!" << std::endl; 247 | } 248 | } 249 | 250 | return res_str; 251 | } 252 | -------------------------------------------------------------------------------- /src/LDBC/LDBC_GPU_csr.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include