├── LICENSE.md ├── Makefile ├── README.md ├── shared ├── Makefile ├── argument_parsing.cu ├── argument_parsing.cuh ├── globals.hpp ├── gpu_error_check.cuh ├── gpu_kernels.cu ├── gpu_kernels.cuh ├── graph.cu ├── graph.cuh ├── partitioner.cu ├── partitioner.cuh ├── subgraph.cu ├── subgraph.cuh ├── subgraph_generator.cu ├── subgraph_generator.cuh ├── subway_utilities.cpp ├── subway_utilities.hpp ├── test.cu ├── test.cuh ├── timer.cpp └── timer.hpp ├── subway ├── Makefile ├── bfs-async.cu ├── bfs-sync.cu ├── cc-async.cu ├── cc-sync.cu ├── pr-async.cu ├── pr-sync.cu ├── sssp-async.cu ├── sssp-sync.cu ├── sswp-async.cu └── sswp-sync.cu └── tools ├── Makefile └── converter.cpp /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 AutomataLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | CC=g++ 4 | NC=nvcc 5 | CFLAGS=-std=c++11 -O3 6 | NFLAGS=-arch=sm_60 7 | 8 | SHARED=shared 9 | SUBWAY=subway 10 | TOOLS=tools 11 | 12 | DEP=$(SHARED)/timer.o $(SHARED)/argument_parsing.o $(SHARED)/graph.o $(SHARED)/subgraph.o $(SHARED)/partitioner.o $(SHARED)/subgraph_generator.o $(SHARED)/gpu_kernels.o $(SHARED)/subway_utilities.o $(SHARED)/test.o 13 | 14 | all: make1 make2 make3 bfs-sync cc-sync sssp-sync sswp-sync pr-sync bfs-async cc-async sssp-async sswp-async pr-async 15 | 16 | make1: 17 | make -C $(SHARED) 18 | 19 | make2: 20 | make -C $(SUBWAY) 21 | 22 | make3: 23 | make -C $(TOOLS) 24 | 25 | 26 | bfs-sync: $(SUBWAY)/bfs-sync.o $(DEP) 27 | $(NC) $(SUBWAY)/bfs-sync.o $(DEP) -o bfs-sync $(CFLAGS) $(NFLAGS) 28 | 29 | cc-sync: $(SUBWAY)/cc-sync.o $(DEP) 30 | $(NC) $(SUBWAY)/cc-sync.o $(DEP) -o cc-sync $(CFLAGS) $(NFLAGS) 31 | 32 | sssp-sync: $(SUBWAY)/sssp-sync.o $(DEP) 33 | $(NC) $(SUBWAY)/sssp-sync.o $(DEP) -o sssp-sync $(CFLAGS) $(NFLAGS) 34 | 35 | sswp-sync: $(SUBWAY)/sswp-sync.o $(DEP) 36 | $(NC) $(SUBWAY)/sswp-sync.o $(DEP) -o sswp-sync $(CFLAGS) $(NFLAGS) 37 | 38 | pr-sync: $(SUBWAY)/pr-sync.o $(DEP) 39 | $(NC) $(SUBWAY)/pr-sync.o $(DEP) -o pr-sync $(CFLAGS) $(NFLAGS) 40 | 41 | bfs-async: $(SUBWAY)/bfs-async.o $(DEP) 42 | $(NC) $(SUBWAY)/bfs-async.o $(DEP) -o bfs-async $(CFLAGS) $(NFLAGS) 43 | 44 | cc-async: $(SUBWAY)/cc-async.o $(DEP) 45 | $(NC) $(SUBWAY)/cc-async.o $(DEP) -o cc-async $(CFLAGS) $(NFLAGS) 46 | 47 | sssp-async: $(SUBWAY)/sssp-async.o $(DEP) 48 | $(NC) $(SUBWAY)/sssp-async.o $(DEP) -o sssp-async $(CFLAGS) $(NFLAGS) 49 | 50 | sswp-async: $(SUBWAY)/sswp-async.o $(DEP) 51 | $(NC) $(SUBWAY)/sswp-async.o $(DEP) -o sswp-async $(CFLAGS) $(NFLAGS) 52 | 53 | pr-async: $(SUBWAY)/pr-async.o $(DEP) 54 | $(NC) $(SUBWAY)/pr-async.o $(DEP) -o pr-async $(CFLAGS) $(NFLAGS) 55 | 56 | clean: 57 | make -C $(SHARED) clean 58 | make -C $(SUBWAY) clean 59 | make -C $(TOOLS) clean 60 | rm -f bfs-sync cc-sync sssp-sync sswp-sync pr-sync bfs-async cc-async sssp-async sswp-async pr-async 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Subway 2 | Subway is an out-of-GPU-memory graph processing framework. 3 | 4 | Subway provides a highly cost-effective solution to extracting a subgraph that only consists of the edges of active vertices. This allows it to transfer only the active parts of the graph from CPU to GPU, thus dramatically reduces the volume of data transfer. The benefits from the data transfer reduction outweigh the costs of subgraph generation in (almost) all iterations of graph processing, bringing in substantial overall performance improvements. Moreover, it supports asynchronous processing between the loaded subgraph in GPU and the rest of the graph in host memory, which tends to decrease the number of global iterations, thus can further reduce the data transfer. 5 | 6 | #### Compilation 7 | 8 | To compile Subway, just run make in the root directory. The only requrements are g++ and CUDA toolkit. 9 | 10 | #### Input graph formats 11 | 12 | Subway accepts edge-list (.el) and weighted edge-list (.wel) graph formats, as well as the binary serialized pre-built CSR graph representation (.bcsr and .bwcsr). It is highly recommended to convert edge-list format graph files to the binary format (using tools/converter). Reading binary formats is faster and more space efficient. 13 | 14 | Subway is sensitive to graph file extension. A weighted edge-list graph file has to end with .wel. The followings are two graph file examples. 15 | 16 | Graph.el ("SOURCE DESTINATION" for each edge in each line): 17 | ``` 18 | 0 1 19 | 0 3 20 | 2 3 21 | 1 2 22 | ``` 23 | 24 | Graph.wel ("SOURCE DESTINATION WEIGHT" for each edge in each line): 25 | ``` 26 | 0 1 26 27 | 0 3 33 28 | 2 3 40 29 | 1 2 10 30 | ``` 31 | 32 | To convert these graph files to the binary format, run the following commands in the root folder: 33 | ``` 34 | tools/converter path_to_Graph.el 35 | tools/converter path_to_Graph.wel 36 | ``` 37 | 38 | The first command converts Graph.el to the binary CSR format and generates a binary graph file with .bcsr extension under the same directory as the original file. The second command converts Graph.wel to a weighted binary graph file with .bwcsr extension. 39 | 40 | #### Running applications in Subway 41 | The applications take a graph as input as well as some optional arguments. For example: 42 | 43 | ``` 44 | $ ./sssp-async --input path-to-input-graph 45 | $ ./sssp-async --input path-to-input-graph --source 10 46 | ``` 47 | 48 | For applications that run on weighted graphs, like SSSP, the input must be weighted (.bwcsr or .wel) and for applications that run on unweighted graphs, like BFS, the input must be unweighted (.bcsr or .el). 49 | 50 | #### Publications: 51 | 52 | [EUROSYS'20] Amir Hossein Nodehi Sabet, Zhijia Zhao, and Rajiv Gupta. [Subway: minimizing data transfer during out-of-GPU-memory graph processing](https://dl.acm.org/doi/abs/10.1145/3342195.3387537). In Proceedings of the Fifteenth European Conference on Computer Systems. 53 | 54 | [ASPLOS'18] Amir Hossein Nodehi Sabet, Junqiao Qiu, and Zhijia Zhao. [Tigr: Transforming Irregular Graphs for GPU-Friendly Graph Processing](https://dl.acm.org/doi/10.1145/3173162.3173180). In Proceedings of the Twenty-Third International Conference on Architectural Support for Programming Languages and Operating Systems. 55 | 56 | 57 | -------------------------------------------------------------------------------- /shared/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | NC=nvcc 3 | CFLAGS=-std=c++11 -O3 4 | NFLAGS=-arch=sm_60 5 | 6 | 7 | all: timer.o argument_parsing.o graph.o subgraph.o partitioner.o subgraph_generator.o gpu_kernels.o subway_utilities.o test.o 8 | 9 | 10 | timer.o: timer.cpp 11 | $(CC) -c timer.cpp -o timer.o $(CFLAGS) 12 | 13 | argument_parsing.o: argument_parsing.cu 14 | $(NC) -c argument_parsing.cu -o argument_parsing.o $(CFLAGS) $(NFLAGS) 15 | 16 | graph.o: graph.cu 17 | $(NC) -c graph.cu -o graph.o $(CFLAGS) $(NFLAGS) 18 | 19 | subgraph.o: subgraph.cu 20 | $(NC) -c subgraph.cu -o subgraph.o $(CFLAGS) $(NFLAGS) 21 | 22 | partitioner.o: partitioner.cu 23 | $(NC) -c partitioner.cu -o partitioner.o $(CFLAGS) $(NFLAGS) 24 | 25 | subgraph_generator.o: subgraph_generator.cu 26 | $(NC) -c subgraph_generator.cu -o subgraph_generator.o $(CFLAGS) $(NFLAGS) 27 | 28 | gpu_kernels.o: gpu_kernels.cu 29 | $(NC) -c gpu_kernels.cu -o gpu_kernels.o $(CFLAGS) $(NFLAGS) 30 | 31 | subway_utilities.o: subway_utilities.cpp 32 | $(CC) -c subway_utilities.cpp -o subway_utilities.o $(CFLAGS) 33 | 34 | test.o: test.cu 35 | $(NC) -c test.cu -o test.o $(CFLAGS) $(NFLAGS) 36 | 37 | clean: 38 | rm *.o 39 | -------------------------------------------------------------------------------- /shared/argument_parsing.cu: -------------------------------------------------------------------------------- 1 | #include "argument_parsing.cuh" 2 | 3 | 4 | 5 | ArgumentParser::ArgumentParser(int argc, char **argv, bool canHaveSource, bool canHaveItrs) 6 | { 7 | this->argc = argc; 8 | this->argv = argv; 9 | this->canHaveSource = canHaveSource; 10 | this->canHaveItrs = canHaveItrs; 11 | 12 | this->sourceNode = 0; 13 | this->deviceID = 0; 14 | this->numberOfItrs = 1; 15 | 16 | hasInput = false; 17 | hasSourceNode = false; 18 | hasOutput = false; 19 | hasDeviceID = false; 20 | hasNumberOfItrs = false; 21 | 22 | Parse(); 23 | } 24 | 25 | bool ArgumentParser::Parse() 26 | { 27 | try 28 | { 29 | if(argc == 1) 30 | { 31 | cout << GenerateHelpString(); 32 | exit(0); 33 | } 34 | 35 | if(argc == 2) 36 | if ((strcmp(argv[1], "--help") == 0) || 37 | (strcmp(argv[1], "-help") == 0) || 38 | (strcmp(argv[1], "--h") == 0) || 39 | (strcmp(argv[1], "-h") == 0)) 40 | { 41 | cout << GenerateHelpString(); 42 | exit(0); 43 | } 44 | 45 | if(argc%2 == 0) 46 | { 47 | cout << "\nThere was an error parsing command line arguments\n"; 48 | cout << GenerateHelpString(); 49 | exit(0); 50 | } 51 | 52 | 53 | for(int i=1; i\n"; 81 | cout << GenerateHelpString(); 82 | exit(0); 83 | } 84 | } 85 | 86 | if(hasInput) 87 | return true; 88 | else 89 | { 90 | cout << "\nInput graph file argument is required.\n"; 91 | cout << GenerateHelpString(); 92 | exit(0); 93 | } 94 | } 95 | catch( const std::exception& strException ) { 96 | std::cerr << strException.what() << "\n"; 97 | GenerateHelpString(); 98 | exit(0); 99 | } 100 | catch(...) { 101 | std::cerr << "An exception has occurred.\n"; 102 | GenerateHelpString(); 103 | exit(0); 104 | } 105 | } 106 | 107 | string ArgumentParser::GenerateHelpString(){ 108 | string str = "\nRequired arguments:"; 109 | str += "\n [--input]: Input graph file. E.g., --input FacebookGraph.txt"; 110 | str += "\nOptional arguments"; 111 | if(canHaveSource) 112 | str += "\n [--source]: Begins from the source (Default: 0). E.g., --source 10"; 113 | str += "\n [--output]: Output file for results. E.g., --output results.txt"; 114 | str += "\n [--device]: Select GPU device (default: 0). E.g., --device 1"; 115 | if(canHaveItrs) 116 | str += "\n [--iteration]: Number of iterations (default: 1). E.g., --iterations 10"; 117 | str += "\n\n"; 118 | return str; 119 | } 120 | 121 | -------------------------------------------------------------------------------- /shared/argument_parsing.cuh: -------------------------------------------------------------------------------- 1 | #ifndef ARGUMENT_PARSING_HPP 2 | #define ARGUMENT_PARSING_HPP 3 | 4 | #include "globals.hpp" 5 | 6 | 7 | class ArgumentParser 8 | { 9 | private: 10 | 11 | public: 12 | int argc; 13 | char** argv; 14 | 15 | bool canHaveSource; 16 | bool canHaveItrs; 17 | 18 | bool hasInput; 19 | bool hasSourceNode; 20 | bool hasOutput; 21 | bool hasDeviceID; 22 | bool hasNumberOfItrs; 23 | string input; 24 | int sourceNode; 25 | string output; 26 | int deviceID; 27 | int numberOfItrs; 28 | 29 | 30 | ArgumentParser(int argc, char **argv, bool canHaveSource, bool canHaveItrs); 31 | 32 | bool Parse(); 33 | 34 | string GenerateHelpString(); 35 | 36 | }; 37 | 38 | 39 | #endif // ARGUMENT_PARSING_HPP 40 | -------------------------------------------------------------------------------- /shared/globals.hpp: -------------------------------------------------------------------------------- 1 | #ifndef GLOBALS_HPP 2 | #define GLOBALS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | 27 | const unsigned int DIST_INFINITY = std::numeric_limits::max() - 1; 28 | 29 | typedef unsigned int uint; 30 | typedef unsigned long long ull; 31 | 32 | 33 | struct OutEdge{ 34 | uint end; 35 | }; 36 | 37 | struct OutEdgeWeighted{ 38 | uint end; 39 | uint w8; 40 | }; 41 | 42 | struct Edge{ 43 | uint source; 44 | uint end; 45 | }; 46 | 47 | struct EdgeWeighted{ 48 | uint source; 49 | uint end; 50 | uint w8; 51 | }; 52 | 53 | 54 | 55 | 56 | #endif // GLOBALS_HPP 57 | -------------------------------------------------------------------------------- /shared/gpu_error_check.cuh: -------------------------------------------------------------------------------- 1 | #ifndef GPU_ERROR_CHECK_CUH 2 | #define GPU_ERROR_CHECK_CUH 3 | 4 | //#include 5 | //#include 6 | //#include 7 | 8 | #define gpuErrorcheck(ans) { gpuAssert((ans), __FILE__, __LINE__); } 9 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 10 | { 11 | if (code != cudaSuccess) 12 | { 13 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 14 | if (abort) exit(code); 15 | } 16 | } 17 | 18 | #endif // GPU_ERROR_CHECK_CUH 19 | -------------------------------------------------------------------------------- /shared/gpu_kernels.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "gpu_kernels.cuh" 3 | #include "globals.hpp" 4 | #include "gpu_error_check.cuh" 5 | #include "graph.cuh" 6 | #include "subgraph.cuh" 7 | 8 | 9 | __global__ void bfs_kernel(unsigned int numNodes, 10 | unsigned int from, 11 | unsigned int numPartitionedEdges, 12 | unsigned int *activeNodes, 13 | unsigned int *activeNodesPointer, 14 | OutEdge *edgeList, 15 | unsigned int *outDegree, 16 | unsigned int *value, 17 | //bool *finished, 18 | bool *label1, 19 | bool *label2) 20 | { 21 | unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x; 22 | 23 | if(tId < numNodes) 24 | { 25 | unsigned int id = activeNodes[from + tId]; 26 | 27 | if(label1[id] == false) 28 | return; 29 | 30 | label1[id] = false; 31 | 32 | unsigned int sourceWeight = value[id]; 33 | 34 | unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges; 35 | unsigned int degree = outDegree[id]; 36 | unsigned int thisTo = thisFrom + degree; 37 | 38 | //printf("******* %i\n", thisFrom); 39 | 40 | unsigned int finalDist; 41 | 42 | for(unsigned int i=thisFrom; i dist[edgeList[i].end]) 198 | { 199 | atomicMax(&dist[edgeList[i].end] , finalDist); 200 | 201 | //*finished = false; 202 | 203 | //label1[edgeList[i].end] = true; 204 | 205 | label2[edgeList[i].end] = true; 206 | } 207 | } 208 | } 209 | } 210 | 211 | __global__ void pr_kernel(unsigned int numNodes, 212 | unsigned int from, 213 | unsigned int numPartitionedEdges, 214 | unsigned int *activeNodes, 215 | unsigned int *activeNodesPointer, 216 | OutEdge *edgeList, 217 | unsigned int *outDegree, 218 | float *dist, 219 | float *delta, 220 | //bool *finished, 221 | float acc) 222 | { 223 | unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x; 224 | 225 | if(tId < numNodes) 226 | { 227 | unsigned int id = activeNodes[from + tId]; 228 | unsigned int degree = outDegree[id]; 229 | float thisDelta = delta[id]; 230 | 231 | if(thisDelta > acc) 232 | { 233 | dist[id] += thisDelta; 234 | 235 | if(degree != 0) 236 | { 237 | //*finished = false; 238 | 239 | float sourcePR = ((float) thisDelta / degree) * 0.85; 240 | 241 | unsigned int thisfrom = activeNodesPointer[from+tId]-numPartitionedEdges; 242 | unsigned int thisto = thisfrom + degree; 243 | 244 | for(unsigned int i=thisfrom; i dist[edgeList[i].end]) 394 | { 395 | atomicMax(&dist[edgeList[i].end] , finalDist); 396 | 397 | *finished = false; 398 | 399 | //label1[edgeList[i].end] = true; 400 | 401 | label2[edgeList[i].end] = true; 402 | } 403 | } 404 | } 405 | } 406 | 407 | 408 | __global__ void cc_async(unsigned int numNodes, 409 | unsigned int from, 410 | unsigned int numPartitionedEdges, 411 | unsigned int *activeNodes, 412 | unsigned int *activeNodesPointer, 413 | OutEdge *edgeList, 414 | unsigned int *outDegree, 415 | unsigned int *dist, 416 | bool *finished, 417 | bool *label1, 418 | bool *label2) 419 | { 420 | unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x; 421 | 422 | if(tId < numNodes) 423 | { 424 | unsigned int id = activeNodes[from + tId]; 425 | 426 | if(label1[id] == false) 427 | return; 428 | 429 | label1[id] = false; 430 | 431 | unsigned int sourceWeight = dist[id]; 432 | 433 | unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges; 434 | unsigned int degree = outDegree[id]; 435 | unsigned int thisTo = thisFrom + degree; 436 | 437 | //printf("******* %i\n", thisFrom); 438 | 439 | //unsigned int finalDist; 440 | 441 | for(unsigned int i=thisFrom; i acc) 480 | { 481 | dist[id] += thisDelta; 482 | 483 | if(degree != 0) 484 | { 485 | *finished = false; 486 | 487 | float sourcePR = ((float) thisDelta / degree) * 0.85; 488 | 489 | unsigned int thisfrom = activeNodesPointer[from+tId]-numPartitionedEdges; 490 | unsigned int thisto = thisfrom + degree; 491 | 492 | for(unsigned int i=thisfrom; i 5 | Graph::Graph(string graphFilePath, bool isWeighted) 6 | { 7 | this->graphFilePath = graphFilePath; 8 | this->isWeighted = isWeighted; 9 | } 10 | 11 | template 12 | string Graph::GetFileExtension(string fileName) 13 | { 14 | if(fileName.find_last_of(".") != string::npos) 15 | return fileName.substr(fileName.find_last_of(".")+1); 16 | return ""; 17 | } 18 | 19 | template <> 20 | void Graph::AssignW8(uint w8, uint index) 21 | { 22 | edgeList[index].w8 = w8; 23 | } 24 | 25 | template <> 26 | void Graph::AssignW8(uint w8, uint index) 27 | { 28 | edgeList[index].end = edgeList[index].end; // do nothing 29 | } 30 | 31 | template 32 | void Graph::ReadGraph() 33 | { 34 | 35 | cout << "Reading the input graph from the following file:\n>> " << graphFilePath << endl; 36 | 37 | this->graphFormat = GetFileExtension(graphFilePath); 38 | 39 | if(graphFormat == "bcsr" || graphFormat == "bwcsr") 40 | { 41 | ifstream infile (graphFilePath, ios::in | ios::binary); 42 | 43 | infile.read ((char*)&num_nodes, sizeof(uint)); 44 | infile.read ((char*)&num_edges, sizeof(uint)); 45 | 46 | nodePointer = new uint[num_nodes+1]; 47 | gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E))); 48 | 49 | infile.read ((char*)nodePointer, sizeof(uint)*num_nodes); 50 | infile.read ((char*)edgeList, sizeof(E)*num_edges); 51 | nodePointer[num_nodes] = num_edges; 52 | } 53 | else if(graphFormat == "el" || graphFormat == "wel") 54 | { 55 | ifstream infile; 56 | infile.open(graphFilePath); 57 | stringstream ss; 58 | uint max = 0; 59 | string line; 60 | uint edgeCounter = 0; 61 | if(isWeighted) 62 | { 63 | vector edges; 64 | EdgeWeighted newEdge; 65 | while(getline( infile, line )) 66 | { 67 | ss.str(""); 68 | ss.clear(); 69 | ss << line; 70 | 71 | ss >> newEdge.source; 72 | ss >> newEdge.end; 73 | ss >> newEdge.w8; 74 | 75 | edges.push_back(newEdge); 76 | edgeCounter++; 77 | 78 | if(max < newEdge.source) 79 | max = newEdge.source; 80 | if(max < newEdge.end) 81 | max = newEdge.end; 82 | } 83 | infile.close(); 84 | num_nodes = max + 1; 85 | num_edges = edgeCounter; 86 | nodePointer = new uint[num_nodes+1]; 87 | gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E))); 88 | uint *degree = new uint[num_nodes]; 89 | for(uint i=0; i edges; 120 | Edge newEdge; 121 | while(getline( infile, line )) 122 | { 123 | ss.str(""); 124 | ss.clear(); 125 | ss << line; 126 | 127 | ss >> newEdge.source; 128 | ss >> newEdge.end; 129 | 130 | edges.push_back(newEdge); 131 | edgeCounter++; 132 | 133 | if(max < newEdge.source) 134 | max = newEdge.source; 135 | if(max < newEdge.end) 136 | max = newEdge.end; 137 | } 138 | infile.close(); 139 | num_nodes = max + 1; 140 | num_edges = edgeCounter; 141 | nodePointer = new uint[num_nodes+1]; 142 | gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E))); 143 | uint *degree = new uint[num_nodes]; 144 | for(uint i=0; i 217 | void GraphPR::AssignW8(uint w8, uint index) 218 | { 219 | edgeList[index].w8 = w8; 220 | } 221 | 222 | template <> 223 | void GraphPR::AssignW8(uint w8, uint index) 224 | { 225 | edgeList[index].end = edgeList[index].end; // do nothing 226 | } 227 | 228 | template 229 | void GraphPR::ReadGraph() 230 | { 231 | 232 | cout << "Reading the input graph from the following file:\n>> " << graphFilePath << endl; 233 | 234 | this->graphFormat = GetFileExtension(graphFilePath); 235 | 236 | if(graphFormat == "bcsr" || graphFormat == "bwcsr") 237 | { 238 | ifstream infile (graphFilePath, ios::in | ios::binary); 239 | 240 | infile.read ((char*)&num_nodes, sizeof(uint)); 241 | infile.read ((char*)&num_edges, sizeof(uint)); 242 | 243 | nodePointer = new uint[num_nodes+1]; 244 | gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E))); 245 | 246 | infile.read ((char*)nodePointer, sizeof(uint)*num_nodes); 247 | infile.read ((char*)edgeList, sizeof(E)*num_edges); 248 | nodePointer[num_nodes] = num_edges; 249 | } 250 | else if(graphFormat == "el" || graphFormat == "wel") 251 | { 252 | ifstream infile; 253 | infile.open(graphFilePath); 254 | stringstream ss; 255 | uint max = 0; 256 | string line; 257 | uint edgeCounter = 0; 258 | if(isWeighted) 259 | { 260 | vector edges; 261 | EdgeWeighted newEdge; 262 | while(getline( infile, line )) 263 | { 264 | ss.str(""); 265 | ss.clear(); 266 | ss << line; 267 | 268 | ss >> newEdge.source; 269 | ss >> newEdge.end; 270 | ss >> newEdge.w8; 271 | 272 | edges.push_back(newEdge); 273 | edgeCounter++; 274 | 275 | if(max < newEdge.source) 276 | max = newEdge.source; 277 | if(max < newEdge.end) 278 | max = newEdge.end; 279 | } 280 | infile.close(); 281 | num_nodes = max + 1; 282 | num_edges = edgeCounter; 283 | nodePointer = new uint[num_nodes+1]; 284 | gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E))); 285 | uint *degree = new uint[num_nodes]; 286 | for(uint i=0; i edges; 317 | Edge newEdge; 318 | while(getline( infile, line )) 319 | { 320 | ss.str(""); 321 | ss.clear(); 322 | ss << line; 323 | 324 | ss >> newEdge.source; 325 | ss >> newEdge.end; 326 | 327 | edges.push_back(newEdge); 328 | edgeCounter++; 329 | 330 | if(max < newEdge.source) 331 | max = newEdge.source; 332 | if(max < newEdge.end) 333 | max = newEdge.end; 334 | } 335 | infile.close(); 336 | num_nodes = max + 1; 337 | num_edges = edgeCounter; 338 | nodePointer = new uint[num_nodes+1]; 339 | gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E))); 340 | uint *degree = new uint[num_nodes]; 341 | for(uint i=0; i 8 | class Graph 9 | { 10 | private: 11 | 12 | public: 13 | string graphFilePath; 14 | bool isWeighted; 15 | bool isLarge; 16 | uint num_nodes; 17 | uint num_edges; 18 | uint *nodePointer; 19 | E *edgeList; 20 | uint *outDegree; 21 | bool *label1; 22 | bool *label2; 23 | uint *value; 24 | uint *d_outDegree; 25 | uint *d_value; 26 | bool *d_label1; 27 | bool *d_label2; 28 | string graphFormat; 29 | Graph(string graphFilePath, bool isWeighted); 30 | string GetFileExtension(string fileName); 31 | void AssignW8(uint w8, uint index); 32 | void ReadGraph(); 33 | }; 34 | 35 | template 36 | class GraphPR 37 | { 38 | private: 39 | 40 | public: 41 | string graphFilePath; 42 | bool isWeighted; 43 | bool isLarge; 44 | uint num_nodes; 45 | uint num_edges; 46 | uint *nodePointer; 47 | E *edgeList; 48 | uint *outDegree; 49 | float *value; 50 | float *delta; 51 | uint *d_outDegree; 52 | float *d_value; 53 | float *d_delta; 54 | string graphFormat; 55 | GraphPR(string graphFilePath, bool isWeighted); 56 | string GetFileExtension(string fileName); 57 | void AssignW8(uint w8, uint index); 58 | void ReadGraph(); 59 | }; 60 | 61 | #endif // GRAPH_CUH 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /shared/partitioner.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "partitioner.cuh" 3 | #include "gpu_error_check.cuh" 4 | 5 | template 6 | Partitioner::Partitioner() 7 | { 8 | reset(); 9 | } 10 | 11 | template 12 | void Partitioner::partition(Subgraph &subgraph, uint numActiveNodes) 13 | { 14 | reset(); 15 | 16 | unsigned int from, to; 17 | unsigned int left, right, mid; 18 | unsigned int partitionSize; 19 | unsigned int numNodesInPartition; 20 | unsigned int numPartitionedEdges; 21 | bool foundTo; 22 | unsigned int accurCount; 23 | 24 | 25 | from = 0; 26 | to = numActiveNodes; // last in pointers 27 | numPartitionedEdges = 0; 28 | 29 | do 30 | { 31 | left = from; 32 | right = numActiveNodes; 33 | 34 | //cout << "#active nodes: " << numActiveNodes << endl; 35 | //cout << "left: " << left << " right: " << right << endl; 36 | //cout << "pointer to left: " << subgraph.activeNodesPointer[left] << " pointer to right: " << subgraph.activeNodesPointer[right] << endl; 37 | 38 | partitionSize = subgraph.activeNodesPointer[right] - subgraph.activeNodesPointer[left]; 39 | if(partitionSize <= subgraph.max_partition_size) 40 | { 41 | to = right; 42 | } 43 | else 44 | { 45 | foundTo = false; 46 | accurCount = 10; 47 | while(foundTo==false || accurCount>0) 48 | { 49 | mid = (left + right)/2; 50 | partitionSize = subgraph.activeNodesPointer[mid] - subgraph.activeNodesPointer[from]; 51 | if(foundTo == true) 52 | accurCount--; 53 | if(partitionSize <= subgraph.max_partition_size) 54 | { 55 | left = mid; 56 | to = mid; 57 | foundTo = true; 58 | } 59 | else 60 | { 61 | right = mid; 62 | } 63 | } 64 | 65 | 66 | if(to == numActiveNodes) 67 | { 68 | cout << "Error in Partitioning...\n"; 69 | exit(-1); 70 | } 71 | 72 | } 73 | 74 | partitionSize = subgraph.activeNodesPointer[to] - subgraph.activeNodesPointer[from]; 75 | numNodesInPartition = to - from; 76 | 77 | //cout << "from: " << from << " to: " << to << endl; 78 | //cout << "#nodes in P: " << numNodesInPartition << " #edges in P: " << partitionSize << endl; 79 | 80 | fromNode.push_back(from); 81 | fromEdge.push_back(numPartitionedEdges); 82 | partitionNodeSize.push_back(numNodesInPartition); 83 | partitionEdgeSize.push_back(partitionSize); 84 | 85 | from = to; 86 | numPartitionedEdges += partitionSize; 87 | 88 | } while (to != numActiveNodes); 89 | 90 | numPartitions = fromNode.size(); 91 | } 92 | 93 | template 94 | void Partitioner::reset() 95 | { 96 | fromNode.clear(); 97 | fromEdge.clear(); 98 | partitionNodeSize.clear(); 99 | partitionEdgeSize.clear(); 100 | numPartitions = 0; 101 | } 102 | 103 | template class Partitioner; 104 | template class Partitioner; 105 | -------------------------------------------------------------------------------- /shared/partitioner.cuh: -------------------------------------------------------------------------------- 1 | #ifndef PARTITIONER_CUH 2 | #define PARTITIONER_CUH 3 | 4 | 5 | #include "globals.hpp" 6 | #include "subgraph.cuh" 7 | 8 | template 9 | class Partitioner 10 | { 11 | private: 12 | 13 | public: 14 | uint numPartitions; 15 | vector fromNode; 16 | vector fromEdge; 17 | vector partitionNodeSize; 18 | vector partitionEdgeSize; 19 | Partitioner(); 20 | void partition(Subgraph &subgraph, uint numActiveNodes); 21 | void reset(); 22 | }; 23 | 24 | #endif // PARTITIONER_CUH 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /shared/subgraph.cu: -------------------------------------------------------------------------------- 1 | 2 | #include "subgraph.cuh" 3 | #include "gpu_error_check.cuh" 4 | #include "graph.cuh" 5 | #include 6 | 7 | 8 | template 9 | Subgraph::Subgraph(uint num_nodes, uint num_edges) 10 | { 11 | cudaProfilerStart(); 12 | cudaError_t error; 13 | cudaDeviceProp dev; 14 | int deviceID; 15 | cudaGetDevice(&deviceID); 16 | error = cudaGetDeviceProperties(&dev, deviceID); 17 | if(error != cudaSuccess) 18 | { 19 | printf("Error: %s\n", cudaGetErrorString(error)); 20 | exit(-1); 21 | } 22 | cudaProfilerStop(); 23 | 24 | max_partition_size = 0.9 * (dev.totalGlobalMem - 8*4*num_nodes) / sizeof(E); 25 | //max_partition_size = 1000000000; 26 | 27 | if(max_partition_size > DIST_INFINITY) 28 | max_partition_size = DIST_INFINITY; 29 | 30 | //cout << "Max Partition Size: " << max_partition_size << endl; 31 | 32 | this->num_nodes = num_nodes; 33 | this->num_edges = num_edges; 34 | 35 | gpuErrorcheck(cudaMallocHost(&activeNodes, num_nodes * sizeof(uint))); 36 | gpuErrorcheck(cudaMallocHost(&activeNodesPointer, (num_nodes+1) * sizeof(uint))); 37 | gpuErrorcheck(cudaMallocHost(&activeEdgeList, num_edges * sizeof(E))); 38 | 39 | gpuErrorcheck(cudaMalloc(&d_activeNodes, num_nodes * sizeof(unsigned int))); 40 | gpuErrorcheck(cudaMalloc(&d_activeNodesPointer, (num_nodes+1) * sizeof(unsigned int))); 41 | gpuErrorcheck(cudaMalloc(&d_activeEdgeList, (max_partition_size) * sizeof(E))); 42 | } 43 | 44 | template class Subgraph; 45 | template class Subgraph; 46 | 47 | // For initialization with one active node 48 | //unsigned int numActiveNodes = 1; 49 | //subgraph.activeNodes[0] = SOURCE_NODE; 50 | //for(unsigned int i=graph.nodePointer[SOURCE_NODE], j=0; i 9 | class Subgraph 10 | { 11 | private: 12 | 13 | public: 14 | uint num_nodes; 15 | uint num_edges; 16 | uint numActiveNodes; 17 | 18 | uint *activeNodes; 19 | uint *activeNodesPointer; 20 | E *activeEdgeList; 21 | 22 | uint *d_activeNodes; 23 | uint *d_activeNodesPointer; 24 | E *d_activeEdgeList; 25 | 26 | ull max_partition_size; 27 | 28 | Subgraph(uint num_nodes, uint num_edges); 29 | }; 30 | 31 | #endif // SUBGRAPH_HPP 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /shared/subgraph_generator.cu: -------------------------------------------------------------------------------- 1 | #include "subgraph_generator.cuh" 2 | #include "graph.cuh" 3 | #include "subgraph.cuh" 4 | #include "gpu_error_check.cuh" 5 | 6 | const unsigned int NUM_THREADS = 64; 7 | 8 | const unsigned int THRESHOLD_THREAD = 50000; 9 | 10 | __global__ void prePrefix(unsigned int *activeNodesLabeling, unsigned int *activeNodesDegree, 11 | unsigned int *outDegree, bool *label1, bool *label2, unsigned int numNodes) 12 | { 13 | unsigned int id = blockDim.x * blockIdx.x + threadIdx.x; 14 | if(id < numNodes){ 15 | activeNodesLabeling[id] = label1[id] || label2[id]; // label1 is always zero in sync 16 | //activeNodesLabeling[id] = label[id]; 17 | //activeNodesLabeling[id] = 1; 18 | activeNodesDegree[id] = 0; 19 | if(activeNodesLabeling[id] == 1) 20 | activeNodesDegree[id] = outDegree[id]; 21 | } 22 | } 23 | 24 | __global__ void prePrefix(unsigned int *activeNodesLabeling, unsigned int *activeNodesDegree, 25 | unsigned int *outDegree, float *delta, unsigned int numNodes, float acc) 26 | { 27 | unsigned int id = blockDim.x * blockIdx.x + threadIdx.x; 28 | if(id < numNodes){ 29 | if(delta[id] > acc) 30 | { 31 | activeNodesLabeling[id] = 1; 32 | } 33 | else 34 | { 35 | activeNodesLabeling[id] = 0; 36 | } 37 | activeNodesDegree[id] = 0; 38 | if(activeNodesLabeling[id] == 1) 39 | activeNodesDegree[id] = outDegree[id]; 40 | } 41 | } 42 | 43 | __global__ void makeQueue(unsigned int *activeNodes, unsigned int *activeNodesLabeling, 44 | unsigned int *prefixLabeling, unsigned int numNodes) 45 | { 46 | unsigned int id = blockDim.x * blockIdx.x + threadIdx.x; 47 | if(id < numNodes && activeNodesLabeling[id] == 1){ 48 | activeNodes[prefixLabeling[id]] = id; 49 | } 50 | } 51 | 52 | __global__ void makeActiveNodesPointer(unsigned int *activeNodesPointer, unsigned int *activeNodesLabeling, 53 | unsigned int *prefixLabeling, unsigned int *prefixSumDegrees, 54 | unsigned int numNodes) 55 | { 56 | unsigned int id = blockDim.x * blockIdx.x + threadIdx.x; 57 | if(id < numNodes && activeNodesLabeling[id] == 1){ 58 | activeNodesPointer[prefixLabeling[id]] = prefixSumDegrees[id]; 59 | } 60 | } 61 | 62 | // pthread 63 | template 64 | void dynamic(unsigned int tId, 65 | unsigned int numThreads, 66 | unsigned int numActiveNodes, 67 | unsigned int *activeNodes, 68 | unsigned int *outDegree, 69 | unsigned int *activeNodesPointer, 70 | unsigned int *nodePointer, 71 | E *activeEdgeList, 72 | E *edgeList) 73 | { 74 | 75 | unsigned int chunkSize = ceil(numActiveNodes / numThreads); 76 | unsigned int left, right; 77 | left = tId * chunkSize; 78 | right = min(left+chunkSize, numActiveNodes); 79 | 80 | unsigned int thisNode; 81 | unsigned int thisDegree; 82 | unsigned int fromHere; 83 | unsigned int fromThere; 84 | 85 | for(unsigned int i=left; i 100 | SubgraphGenerator::SubgraphGenerator(Graph &graph) 101 | { 102 | gpuErrorcheck(cudaMallocHost(&activeNodesLabeling, graph.num_nodes * sizeof(unsigned int))); 103 | gpuErrorcheck(cudaMallocHost(&activeNodesDegree, graph.num_nodes * sizeof(unsigned int))); 104 | gpuErrorcheck(cudaMallocHost(&prefixLabeling, graph.num_nodes * sizeof(unsigned int))); 105 | gpuErrorcheck(cudaMallocHost(&prefixSumDegrees, (graph.num_nodes+1) * sizeof(unsigned int))); 106 | 107 | gpuErrorcheck(cudaMalloc(&d_activeNodesLabeling, graph.num_nodes * sizeof(unsigned int))); 108 | gpuErrorcheck(cudaMalloc(&d_activeNodesDegree, graph.num_nodes * sizeof(unsigned int))); 109 | gpuErrorcheck(cudaMalloc(&d_prefixLabeling, graph.num_nodes * sizeof(unsigned int))); 110 | gpuErrorcheck(cudaMalloc(&d_prefixSumDegrees , (graph.num_nodes+1) * sizeof(unsigned int))); 111 | } 112 | 113 | template 114 | SubgraphGenerator::SubgraphGenerator(GraphPR &graph) 115 | { 116 | gpuErrorcheck(cudaMallocHost(&activeNodesLabeling, graph.num_nodes * sizeof(unsigned int))); 117 | gpuErrorcheck(cudaMallocHost(&activeNodesDegree, graph.num_nodes * sizeof(unsigned int))); 118 | gpuErrorcheck(cudaMallocHost(&prefixLabeling, graph.num_nodes * sizeof(unsigned int))); 119 | gpuErrorcheck(cudaMallocHost(&prefixSumDegrees, (graph.num_nodes+1) * sizeof(unsigned int))); 120 | 121 | gpuErrorcheck(cudaMalloc(&d_activeNodesLabeling, graph.num_nodes * sizeof(unsigned int))); 122 | gpuErrorcheck(cudaMalloc(&d_activeNodesDegree, graph.num_nodes * sizeof(unsigned int))); 123 | gpuErrorcheck(cudaMalloc(&d_prefixLabeling, graph.num_nodes * sizeof(unsigned int))); 124 | gpuErrorcheck(cudaMalloc(&d_prefixSumDegrees , (graph.num_nodes+1) * sizeof(unsigned int))); 125 | } 126 | 127 | template 128 | void SubgraphGenerator::generate(Graph &graph, Subgraph &subgraph) 129 | { 130 | //std::chrono::time_point startDynG, finishDynG; 131 | //startDynG = std::chrono::system_clock::now(); 132 | 133 | prePrefix<<>>(d_activeNodesLabeling, d_activeNodesDegree, graph.d_outDegree, graph.d_label1, graph.d_label2, graph.num_nodes); 134 | 135 | thrust::device_ptr ptr_labeling(d_activeNodesLabeling); 136 | thrust::device_ptr ptr_labeling_prefixsum(d_prefixLabeling); 137 | 138 | subgraph.numActiveNodes = thrust::reduce(ptr_labeling, ptr_labeling + graph.num_nodes); 139 | //cout << "Number of Active Nodes = " << subgraph.numActiveNodes << endl; 140 | 141 | thrust::exclusive_scan(ptr_labeling, ptr_labeling + graph.num_nodes, ptr_labeling_prefixsum); 142 | 143 | makeQueue<<>>(subgraph.d_activeNodes, d_activeNodesLabeling, d_prefixLabeling, graph.num_nodes); 144 | 145 | gpuErrorcheck(cudaMemcpy(subgraph.activeNodes, subgraph.d_activeNodes, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost)); 146 | 147 | thrust::device_ptr ptr_degrees(d_activeNodesDegree); 148 | thrust::device_ptr ptr_degrees_prefixsum(d_prefixSumDegrees); 149 | 150 | thrust::exclusive_scan(ptr_degrees, ptr_degrees + graph.num_nodes, ptr_degrees_prefixsum); 151 | 152 | makeActiveNodesPointer<<>>(subgraph.d_activeNodesPointer, d_activeNodesLabeling, d_prefixLabeling, d_prefixSumDegrees, graph.num_nodes); 153 | gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost)); 154 | 155 | unsigned int numActiveEdges = 0; 156 | if(subgraph.numActiveNodes>0) 157 | numActiveEdges = subgraph.activeNodesPointer[subgraph.numActiveNodes-1] + graph.outDegree[subgraph.activeNodes[subgraph.numActiveNodes-1]]; 158 | 159 | unsigned int last = numActiveEdges; 160 | gpuErrorcheck(cudaMemcpy(subgraph.d_activeNodesPointer+subgraph.numActiveNodes, &last, sizeof(unsigned int), cudaMemcpyHostToDevice)); 161 | 162 | gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, (subgraph.numActiveNodes+1)*sizeof(unsigned int), cudaMemcpyDeviceToHost)); 163 | 164 | 165 | //finishDynG = std::chrono::system_clock::now(); 166 | //std::chrono::duration elapsed_seconds_dyng = finishDynG-startDynG; 167 | //std::time_t finish_time_dyng = std::chrono::system_clock::to_time_t(finishDynG); 168 | //std::cout << "Dynamic GPU Time = " << elapsed_seconds_dyng.count() << std::endl; 169 | 170 | //td::chrono::time_point startDynC, finishDynC; 171 | //startDynC = std::chrono::system_clock::now(); 172 | 173 | unsigned int numThreads = NUM_THREADS; 174 | 175 | if(subgraph.numActiveNodes < THRESHOLD_THREAD) 176 | numThreads = 1; 177 | 178 | thread runThreads[numThreads]; 179 | 180 | for(unsigned int t=0; t, 184 | t, 185 | numThreads, 186 | subgraph.numActiveNodes, 187 | subgraph.activeNodes, 188 | graph.outDegree, 189 | subgraph.activeNodesPointer, 190 | graph.nodePointer, 191 | subgraph.activeEdgeList, 192 | graph.edgeList); 193 | 194 | } 195 | 196 | for(unsigned int t=0; t elapsed_seconds_dync = finishDynC-startDynC; 201 | //std::time_t finish_time_dync = std::chrono::system_clock::to_time_t(finishDynC); 202 | //std::cout << "Dynamic CPU Time = " << elapsed_seconds_dync.count() << std::endl; 203 | 204 | } 205 | 206 | 207 | 208 | template 209 | void SubgraphGenerator::generate(GraphPR &graph, Subgraph &subgraph, float acc) 210 | { 211 | //std::chrono::time_point startDynG, finishDynG; 212 | //startDynG = std::chrono::system_clock::now(); 213 | 214 | prePrefix<<>>(d_activeNodesLabeling, d_activeNodesDegree, graph.d_outDegree, graph.d_delta, graph.num_nodes, acc); 215 | 216 | thrust::device_ptr ptr_labeling(d_activeNodesLabeling); 217 | thrust::device_ptr ptr_labeling_prefixsum(d_prefixLabeling); 218 | 219 | subgraph.numActiveNodes = thrust::reduce(ptr_labeling, ptr_labeling + graph.num_nodes); 220 | //cout << "Number of Active Nodes = " << subgraph.numActiveNodes << endl; 221 | 222 | thrust::exclusive_scan(ptr_labeling, ptr_labeling + graph.num_nodes, ptr_labeling_prefixsum); 223 | 224 | makeQueue<<>>(subgraph.d_activeNodes, d_activeNodesLabeling, d_prefixLabeling, graph.num_nodes); 225 | 226 | gpuErrorcheck(cudaMemcpy(subgraph.activeNodes, subgraph.d_activeNodes, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost)); 227 | 228 | thrust::device_ptr ptr_degrees(d_activeNodesDegree); 229 | thrust::device_ptr ptr_degrees_prefixsum(d_prefixSumDegrees); 230 | 231 | thrust::exclusive_scan(ptr_degrees, ptr_degrees + graph.num_nodes, ptr_degrees_prefixsum); 232 | 233 | makeActiveNodesPointer<<>>(subgraph.d_activeNodesPointer, d_activeNodesLabeling, d_prefixLabeling, d_prefixSumDegrees, graph.num_nodes); 234 | gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost)); 235 | 236 | unsigned int numActiveEdges = 0; 237 | if(subgraph.numActiveNodes>0) 238 | numActiveEdges = subgraph.activeNodesPointer[subgraph.numActiveNodes-1] + graph.outDegree[subgraph.activeNodes[subgraph.numActiveNodes-1]]; 239 | 240 | unsigned int last = numActiveEdges; 241 | gpuErrorcheck(cudaMemcpy(subgraph.d_activeNodesPointer+subgraph.numActiveNodes, &last, sizeof(unsigned int), cudaMemcpyHostToDevice)); 242 | 243 | gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, (subgraph.numActiveNodes+1)*sizeof(unsigned int), cudaMemcpyDeviceToHost)); 244 | 245 | 246 | //finishDynG = std::chrono::system_clock::now(); 247 | //std::chrono::duration elapsed_seconds_dyng = finishDynG-startDynG; 248 | //std::time_t finish_time_dyng = std::chrono::system_clock::to_time_t(finishDynG); 249 | //std::cout << "Dynamic GPU Time = " << elapsed_seconds_dyng.count() << std::endl; 250 | 251 | //td::chrono::time_point startDynC, finishDynC; 252 | //startDynC = std::chrono::system_clock::now(); 253 | 254 | unsigned int numThreads = NUM_THREADS; 255 | 256 | if(subgraph.numActiveNodes < THRESHOLD_THREAD) 257 | numThreads = 1; 258 | 259 | thread runThreads[numThreads]; 260 | 261 | for(unsigned int t=0; t, 265 | t, 266 | numThreads, 267 | subgraph.numActiveNodes, 268 | subgraph.activeNodes, 269 | graph.outDegree, 270 | subgraph.activeNodesPointer, 271 | graph.nodePointer, 272 | subgraph.activeEdgeList, 273 | graph.edgeList); 274 | 275 | } 276 | 277 | for(unsigned int t=0; t elapsed_seconds_dync = finishDynC-startDynC; 282 | //std::time_t finish_time_dync = std::chrono::system_clock::to_time_t(finishDynC); 283 | //std::cout << "Dynamic CPU Time = " << elapsed_seconds_dync.count() << std::endl; 284 | 285 | } 286 | 287 | template class SubgraphGenerator; 288 | template class SubgraphGenerator; 289 | 290 | -------------------------------------------------------------------------------- /shared/subgraph_generator.cuh: -------------------------------------------------------------------------------- 1 | #ifndef SUBGRAPH_GENERATOR_HPP 2 | #define SUBGRAPH_GENERATOR_HPP 3 | 4 | 5 | #include "globals.hpp" 6 | #include "graph.cuh" 7 | #include "subgraph.cuh" 8 | #include 9 | #include 10 | #include 11 | 12 | template 13 | class SubgraphGenerator 14 | { 15 | private: 16 | 17 | public: 18 | unsigned int *activeNodesLabeling; 19 | unsigned int *activeNodesDegree; 20 | unsigned int *prefixLabeling; 21 | unsigned int *prefixSumDegrees; 22 | unsigned int *d_activeNodesLabeling; 23 | unsigned int *d_activeNodesDegree; 24 | unsigned int *d_prefixLabeling; 25 | unsigned int *d_prefixSumDegrees; 26 | SubgraphGenerator(Graph &graph); 27 | SubgraphGenerator(GraphPR &graph); 28 | void generate(Graph &graph, Subgraph &subgraph); 29 | void generate(GraphPR &graph, Subgraph &subgraph, float acc); 30 | }; 31 | 32 | #endif // SUBGRAPH_GENERATOR_HPP 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /shared/subway_utilities.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "subway_utilities.hpp" 3 | 4 | void utilities::PrintResults(uint *results, uint n) 5 | { 6 | cout << "Results of first "<< n << " nodes:\n["; 7 | for(int i=0; i0) 10 | cout << " "; 11 | cout << i << ":" << results[i]; 12 | } 13 | cout << "]\n"; 14 | } 15 | 16 | void utilities::PrintResults(float *results, uint n) 17 | { 18 | cout << "Results of first "<< n << " nodes:\n["; 19 | for(int i=0; i0) 22 | cout << " "; 23 | cout << i << ":" << results[i]; 24 | } 25 | cout << "]\n"; 26 | } 27 | 28 | void utilities::PrintResults(double *results, uint n) 29 | { 30 | cout << "Results of first "<< n << " nodes:\n["; 31 | for(int i=0; i0) 34 | cout << " "; 35 | cout << i << ":" << results[i]; 36 | } 37 | cout << "]\n"; 38 | } 39 | 40 | void utilities::SaveResults(string filepath, uint *results, uint n) 41 | { 42 | cout << "Saving the results into the following file:\n"; 43 | cout << ">> " << filepath << endl; 44 | ofstream outfile; 45 | outfile.open(filepath); 46 | for(int i=0; i 5 | Test::Test() 6 | { 7 | this->a = 1; 8 | this->b = 1; 9 | } 10 | 11 | template 12 | int Test::sum(int a, int b) 13 | { 14 | return a + b; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /shared/test.cuh: -------------------------------------------------------------------------------- 1 | #ifndef TEST_HPP 2 | #define TEST_HPP 3 | 4 | template 5 | class Test 6 | { 7 | private: 8 | 9 | public: 10 | int a; 11 | int b; 12 | Test(); 13 | int sum(int a, int b); 14 | }; 15 | 16 | #endif // TEST_HPP 17 | -------------------------------------------------------------------------------- /shared/timer.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "timer.hpp" 3 | 4 | 5 | void Timer::Start() 6 | { 7 | //A = chrono::system_clock::now(); 8 | gettimeofday( &StartingTime, NULL ); 9 | } 10 | 11 | 12 | float Timer::Finish() 13 | { 14 | //B = std::chrono::system_clock::now(); 15 | //chrono::duration elapsed_seconds = B - A; 16 | //time_t finish_time = std::chrono::system_clock::to_time_t(B); 17 | //cout << "title" << elapsed_seconds.count()*1000; 18 | timeval PausingTime, ElapsedTime; 19 | gettimeofday( &PausingTime, NULL ); 20 | timersub(&PausingTime, &StartingTime, &ElapsedTime); 21 | float d = ElapsedTime.tv_sec*1000.0+ElapsedTime.tv_usec/1000.0; 22 | return d; 23 | } 24 | -------------------------------------------------------------------------------- /shared/timer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TIMER_HPP 2 | #define TIMER_HPP 3 | 4 | 5 | #include "globals.hpp" 6 | #include 7 | #include 8 | 9 | 10 | class Timer 11 | { 12 | private: 13 | //chrono::time_point A, B; 14 | timeval StartingTime; 15 | public: 16 | void Start(); 17 | float Finish(); 18 | }; 19 | 20 | #endif // TIMER_HPP 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /subway/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC=g++ 3 | NC=nvcc 4 | CFLAGS=-std=c++11 -O3 5 | NFLAGS=-arch=sm_60 6 | 7 | SHARED=../shared 8 | 9 | all: bfs-sync.o cc-sync.o sssp-sync.o sswp-sync.o pr-sync.o bfs-async.o cc-async.o sssp-async.o sswp-async.o pr-async.o 10 | 11 | 12 | bfs-sync.o: bfs-sync.cu 13 | $(NC) -c bfs-sync.cu $(CFLAGS) $(NFLAGS) 14 | 15 | cc-sync.o: cc-sync.cu 16 | $(NC) -c cc-sync.cu $(CFLAGS) $(NFLAGS) 17 | 18 | sssp-sync.o: sssp-sync.cu 19 | $(NC) -c sssp-sync.cu $(CFLAGS) $(NFLAGS) 20 | 21 | sswp-sync.o: sswp-sync.cu 22 | $(NC) -c sswp-sync.cu $(CFLAGS) $(NFLAGS) 23 | 24 | pr-sync.o: pr-sync.cu 25 | $(NC) -c pr-sync.cu $(CFLAGS) $(NFLAGS) 26 | 27 | bfs-async.o: bfs-async.cu 28 | $(NC) -c bfs-async.cu $(CFLAGS) $(NFLAGS) 29 | 30 | cc-async.o: cc-async.cu 31 | $(NC) -c cc-async.cu $(CFLAGS) $(NFLAGS) 32 | 33 | sssp-async.o: sssp-async.cu 34 | $(NC) -c sssp-async.cu $(CFLAGS) $(NFLAGS) 35 | 36 | sswp-async.o: sswp-async.cu 37 | $(NC) -c sswp-async.cu $(CFLAGS) $(NFLAGS) 38 | 39 | pr-async.o: pr-async.cu 40 | $(NC) -c pr-async.cu $(CFLAGS) $(NFLAGS) 41 | 42 | clean: 43 | rm *.o 44 | -------------------------------------------------------------------------------- /subway/bfs-async.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, true, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, false); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 44 | 45 | SubgraphGenerator subgen(graph); 46 | 47 | subgen.generate(graph, subgraph); 48 | 49 | for(unsigned int i=0; i partitioner; 57 | 58 | timer.Start(); 59 | 60 | unsigned int gItr = 0; 61 | 62 | bool finished; 63 | bool *d_finished; 64 | gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool))); 65 | 66 | while (subgraph.numActiveNodes>0) 67 | { 68 | gItr++; 69 | 70 | partitioner.partition(subgraph, subgraph.numActiveNodes); 71 | // a super iteration 72 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 79 | mixLabels<<>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 80 | 81 | uint itr = 0; 82 | do 83 | { 84 | itr++; 85 | finished = true; 86 | gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice)); 87 | 88 | bfs_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 89 | partitioner.fromNode[i], 90 | partitioner.fromEdge[i], 91 | subgraph.d_activeNodes, 92 | subgraph.d_activeNodesPointer, 93 | subgraph.d_activeEdgeList, 94 | graph.d_outDegree, 95 | graph.d_value, 96 | d_finished, 97 | (itr%2==1) ? graph.d_label1 : graph.d_label2, 98 | (itr%2==1) ? graph.d_label2 : graph.d_label1); 99 | 100 | cudaDeviceSynchronize(); 101 | gpuErrorcheck( cudaPeekAtLastError() ); 102 | 103 | gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost)); 104 | }while(!(finished)); 105 | 106 | cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i << endl; 107 | } 108 | 109 | subgen.generate(graph, subgraph); 110 | 111 | } 112 | 113 | float runtime = timer.Finish(); 114 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 115 | 116 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 117 | 118 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 119 | 120 | if(arguments.hasOutput) 121 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 122 | } 123 | 124 | -------------------------------------------------------------------------------- /subway/bfs-sync.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, true, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, false); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 45 | 46 | SubgraphGenerator subgen(graph); 47 | 48 | subgen.generate(graph, subgraph); 49 | 50 | 51 | Partitioner partitioner; 52 | 53 | timer.Start(); 54 | 55 | uint itr = 0; 56 | 57 | while (subgraph.numActiveNodes>0) 58 | { 59 | itr++; 60 | 61 | partitioner.partition(subgraph, subgraph.numActiveNodes); 62 | // a super iteration 63 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 70 | 71 | bfs_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 72 | partitioner.fromNode[i], 73 | partitioner.fromEdge[i], 74 | subgraph.d_activeNodes, 75 | subgraph.d_activeNodesPointer, 76 | subgraph.d_activeEdgeList, 77 | graph.d_outDegree, 78 | graph.d_value, 79 | //d_finished, 80 | graph.d_label1, 81 | graph.d_label2); 82 | 83 | cudaDeviceSynchronize(); 84 | gpuErrorcheck( cudaPeekAtLastError() ); 85 | } 86 | 87 | subgen.generate(graph, subgraph); 88 | 89 | } 90 | 91 | float runtime = timer.Finish(); 92 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 93 | 94 | cout << "Number of iterations = " << itr << endl; 95 | 96 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 97 | 98 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 99 | 100 | if(arguments.hasOutput) 101 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 102 | } 103 | 104 | -------------------------------------------------------------------------------- /subway/cc-async.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, true, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, false); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 42 | 43 | SubgraphGenerator subgen(graph); 44 | 45 | subgen.generate(graph, subgraph); 46 | 47 | 48 | Partitioner partitioner; 49 | 50 | timer.Start(); 51 | 52 | unsigned int gItr = 0; 53 | 54 | bool finished; 55 | bool *d_finished; 56 | gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool))); 57 | 58 | while (subgraph.numActiveNodes>0) 59 | { 60 | gItr++; 61 | 62 | partitioner.partition(subgraph, subgraph.numActiveNodes); 63 | // a super iteration 64 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 71 | mixLabels<<>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 72 | 73 | uint itr = 0; 74 | do 75 | { 76 | itr++; 77 | finished = true; 78 | gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice)); 79 | 80 | cc_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 81 | partitioner.fromNode[i], 82 | partitioner.fromEdge[i], 83 | subgraph.d_activeNodes, 84 | subgraph.d_activeNodesPointer, 85 | subgraph.d_activeEdgeList, 86 | graph.d_outDegree, 87 | graph.d_value, 88 | d_finished, 89 | (itr%2==1) ? graph.d_label1 : graph.d_label2, 90 | (itr%2==1) ? graph.d_label2 : graph.d_label1); 91 | 92 | cudaDeviceSynchronize(); 93 | gpuErrorcheck( cudaPeekAtLastError() ); 94 | 95 | gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost)); 96 | }while(!(finished)); 97 | 98 | cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i << endl; 99 | } 100 | 101 | subgen.generate(graph, subgraph); 102 | 103 | } 104 | 105 | float runtime = timer.Finish(); 106 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 107 | 108 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 109 | 110 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 111 | 112 | if(arguments.hasOutput) 113 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 114 | } 115 | 116 | -------------------------------------------------------------------------------- /subway/cc-sync.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, false, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, false); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | 29 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 42 | 43 | SubgraphGenerator subgen(graph); 44 | 45 | subgen.generate(graph, subgraph); 46 | 47 | 48 | Partitioner partitioner; 49 | 50 | timer.Start(); 51 | 52 | uint itr = 0; 53 | 54 | while (subgraph.numActiveNodes>0) 55 | { 56 | itr++; 57 | 58 | partitioner.partition(subgraph, subgraph.numActiveNodes); 59 | // a super iteration 60 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 67 | 68 | cc_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 69 | partitioner.fromNode[i], 70 | partitioner.fromEdge[i], 71 | subgraph.d_activeNodes, 72 | subgraph.d_activeNodesPointer, 73 | subgraph.d_activeEdgeList, 74 | graph.d_outDegree, 75 | graph.d_value, 76 | //d_finished, 77 | graph.d_label1, 78 | graph.d_label2); 79 | 80 | cudaDeviceSynchronize(); 81 | gpuErrorcheck( cudaPeekAtLastError() ); 82 | } 83 | 84 | subgen.generate(graph, subgraph); 85 | 86 | } 87 | 88 | float runtime = timer.Finish(); 89 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 90 | 91 | cout << "Number of iterations = " << itr << endl; 92 | 93 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 94 | 95 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 96 | 97 | if(arguments.hasOutput) 98 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 99 | } 100 | 101 | -------------------------------------------------------------------------------- /subway/pr-async.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | #include "../shared/test.cuh" 12 | #include "../shared/test.cu" 13 | 14 | 15 | int main(int argc, char** argv) 16 | { 17 | 18 | cudaFree(0); 19 | 20 | ArgumentParser arguments(argc, argv, true, false); 21 | 22 | Timer timer; 23 | timer.Start(); 24 | 25 | GraphPR graph(arguments.input, true); 26 | graph.ReadGraph(); 27 | 28 | float readtime = timer.Finish(); 29 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 30 | 31 | //for(unsigned int i=0; i<100; i++) 32 | // cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8; 33 | 34 | float initPR = 0.15; 35 | float acc = 0.01; 36 | 37 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 51 | 52 | SubgraphGenerator subgen(graph); 53 | 54 | subgen.generate(graph, subgraph, acc); 55 | 56 | Partitioner partitioner; 57 | 58 | timer.Start(); 59 | 60 | uint gItr = 0; 61 | 62 | bool finished; 63 | bool *d_finished; 64 | gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool))); 65 | 66 | while (subgraph.numActiveNodes>0) 67 | { 68 | gItr++; 69 | 70 | partitioner.partition(subgraph, subgraph.numActiveNodes); 71 | // a super iteration 72 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 79 | //mixLabels<<>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 80 | 81 | uint itr = 0; 82 | do 83 | { 84 | itr++; 85 | finished = true; 86 | gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice)); 87 | 88 | pr_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 89 | partitioner.fromNode[i], 90 | partitioner.fromEdge[i], 91 | subgraph.d_activeNodes, 92 | subgraph.d_activeNodesPointer, 93 | subgraph.d_activeEdgeList, 94 | graph.d_outDegree, 95 | graph.d_value, 96 | graph.d_delta, 97 | d_finished, 98 | acc); 99 | 100 | 101 | cudaDeviceSynchronize(); 102 | gpuErrorcheck( cudaPeekAtLastError() ); 103 | 104 | gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost)); 105 | }while(!(finished)); 106 | 107 | cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i << endl; 108 | } 109 | 110 | subgen.generate(graph, subgraph, acc); 111 | 112 | } 113 | 114 | float runtime = timer.Finish(); 115 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 116 | 117 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(float), cudaMemcpyDeviceToHost)); 118 | 119 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 120 | 121 | 122 | if(arguments.hasOutput) 123 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 124 | } 125 | 126 | -------------------------------------------------------------------------------- /subway/pr-sync.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | #include "../shared/test.cuh" 12 | #include "../shared/test.cu" 13 | 14 | 15 | int main(int argc, char** argv) 16 | { 17 | cudaFree(0); 18 | 19 | ArgumentParser arguments(argc, argv, true, false); 20 | 21 | Timer timer; 22 | timer.Start(); 23 | 24 | GraphPR graph(arguments.input, true); 25 | graph.ReadGraph(); 26 | 27 | float readtime = timer.Finish(); 28 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 29 | 30 | //for(unsigned int i=0; i<100; i++) 31 | // cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8; 32 | 33 | float initPR = 0.15; 34 | float acc = 0.01; 35 | 36 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 50 | 51 | SubgraphGenerator subgen(graph); 52 | 53 | subgen.generate(graph, subgraph, acc); 54 | 55 | Partitioner partitioner; 56 | 57 | timer.Start(); 58 | 59 | uint gItr = 0; 60 | 61 | 62 | while (subgraph.numActiveNodes>0) 63 | { 64 | gItr++; 65 | 66 | partitioner.partition(subgraph, subgraph.numActiveNodes); 67 | // a super iteration 68 | for(int i=0; i>>(partitioner.partitionNodeSize[i], 76 | partitioner.fromNode[i], 77 | partitioner.fromEdge[i], 78 | subgraph.d_activeNodes, 79 | subgraph.d_activeNodesPointer, 80 | subgraph.d_activeEdgeList, 81 | graph.d_outDegree, 82 | graph.d_value, 83 | graph.d_delta, 84 | acc); 85 | 86 | 87 | cudaDeviceSynchronize(); 88 | gpuErrorcheck( cudaPeekAtLastError() ); 89 | 90 | } 91 | 92 | subgen.generate(graph, subgraph, acc); 93 | 94 | } 95 | 96 | float runtime = timer.Finish(); 97 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 98 | 99 | cout << "Number of iterations = " << gItr << endl; 100 | 101 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(float), cudaMemcpyDeviceToHost)); 102 | 103 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 104 | 105 | 106 | if(arguments.hasOutput) 107 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 108 | } 109 | 110 | -------------------------------------------------------------------------------- /subway/sssp-async.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | #include "../shared/test.cuh" 12 | #include "../shared/test.cu" 13 | 14 | 15 | int main(int argc, char** argv) 16 | { 17 | /* 18 | Test test; 19 | cout << test.sum(20, 30) << endl; 20 | */ 21 | 22 | cudaFree(0); 23 | 24 | ArgumentParser arguments(argc, argv, true, false); 25 | 26 | Timer timer; 27 | timer.Start(); 28 | 29 | Graph graph(arguments.input, true); 30 | graph.ReadGraph(); 31 | 32 | float readtime = timer.Finish(); 33 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 34 | 35 | //for(unsigned int i=0; i<100; i++) 36 | // cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8; 37 | 38 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 54 | 55 | SubgraphGenerator subgen(graph); 56 | 57 | subgen.generate(graph, subgraph); 58 | 59 | for(unsigned int i=0; i partitioner; 68 | 69 | timer.Start(); 70 | 71 | uint gItr = 0; 72 | 73 | bool finished; 74 | bool *d_finished; 75 | gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool))); 76 | 77 | while (subgraph.numActiveNodes>0) 78 | { 79 | gItr++; 80 | 81 | partitioner.partition(subgraph, subgraph.numActiveNodes); 82 | // a super iteration 83 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 90 | mixLabels<<>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 91 | 92 | uint itr = 0; 93 | do 94 | { 95 | itr++; 96 | finished = true; 97 | gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice)); 98 | 99 | sssp_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 100 | partitioner.fromNode[i], 101 | partitioner.fromEdge[i], 102 | subgraph.d_activeNodes, 103 | subgraph.d_activeNodesPointer, 104 | subgraph.d_activeEdgeList, 105 | graph.d_outDegree, 106 | graph.d_value, 107 | d_finished, 108 | (itr%2==1) ? graph.d_label1 : graph.d_label2, 109 | (itr%2==1) ? graph.d_label2 : graph.d_label1); 110 | 111 | cudaDeviceSynchronize(); 112 | gpuErrorcheck( cudaPeekAtLastError() ); 113 | 114 | gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost)); 115 | }while(!(finished)); 116 | 117 | cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i << endl; 118 | } 119 | 120 | subgen.generate(graph, subgraph); 121 | 122 | } 123 | 124 | float runtime = timer.Finish(); 125 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 126 | 127 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 128 | 129 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 130 | 131 | //for(int i=0; i<20; i++) 132 | // cout << graph.value[i] << endl; 133 | 134 | if(arguments.hasOutput) 135 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 136 | } 137 | 138 | -------------------------------------------------------------------------------- /subway/sssp-sync.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, true, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, true); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 45 | 46 | SubgraphGenerator subgen(graph); 47 | 48 | subgen.generate(graph, subgraph); 49 | 50 | 51 | Partitioner partitioner; 52 | 53 | timer.Start(); 54 | 55 | uint itr = 0; 56 | 57 | while (subgraph.numActiveNodes>0) 58 | { 59 | itr++; 60 | 61 | partitioner.partition(subgraph, subgraph.numActiveNodes); 62 | // a super iteration 63 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 70 | 71 | sssp_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 72 | partitioner.fromNode[i], 73 | partitioner.fromEdge[i], 74 | subgraph.d_activeNodes, 75 | subgraph.d_activeNodesPointer, 76 | subgraph.d_activeEdgeList, 77 | graph.d_outDegree, 78 | graph.d_value, 79 | //d_finished, 80 | graph.d_label1, 81 | graph.d_label2); 82 | 83 | cudaDeviceSynchronize(); 84 | gpuErrorcheck( cudaPeekAtLastError() ); 85 | } 86 | 87 | subgen.generate(graph, subgraph); 88 | 89 | } 90 | 91 | float runtime = timer.Finish(); 92 | cout << "Processing finished in " << runtime << " (ms).\n"; 93 | 94 | cout << "Number of iterations = " << itr << endl; 95 | 96 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 97 | 98 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 99 | 100 | if(arguments.hasOutput) 101 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 102 | } 103 | 104 | -------------------------------------------------------------------------------- /subway/sswp-async.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, true, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, true); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | //for(unsigned int i=0; i<100; i++) 29 | // cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8; 30 | 31 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 47 | 48 | SubgraphGenerator subgen(graph); 49 | 50 | subgen.generate(graph, subgraph); 51 | 52 | for(unsigned int i=0; i partitioner; 61 | 62 | timer.Start(); 63 | 64 | uint gItr = 0; 65 | 66 | bool finished; 67 | bool *d_finished; 68 | gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool))); 69 | 70 | while (subgraph.numActiveNodes>0) 71 | { 72 | gItr++; 73 | 74 | partitioner.partition(subgraph, subgraph.numActiveNodes); 75 | // a super iteration 76 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 83 | mixLabels<<>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 84 | 85 | uint itr = 0; 86 | do 87 | { 88 | cout << "\t\tIteration " << ++itr << endl; 89 | finished = true; 90 | gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice)); 91 | 92 | sswp_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 93 | partitioner.fromNode[i], 94 | partitioner.fromEdge[i], 95 | subgraph.d_activeNodes, 96 | subgraph.d_activeNodesPointer, 97 | subgraph.d_activeEdgeList, 98 | graph.d_outDegree, 99 | graph.d_value, 100 | d_finished, 101 | (itr%2==1) ? graph.d_label1 : graph.d_label2, 102 | (itr%2==1) ? graph.d_label2 : graph.d_label1); 103 | 104 | cudaDeviceSynchronize(); 105 | gpuErrorcheck( cudaPeekAtLastError() ); 106 | 107 | gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost)); 108 | }while(!(finished)); 109 | 110 | cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i << endl; 111 | } 112 | 113 | subgen.generate(graph, subgraph); 114 | 115 | } 116 | 117 | float runtime = timer.Finish(); 118 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 119 | 120 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 121 | 122 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 123 | 124 | if(arguments.hasOutput) 125 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 126 | } 127 | 128 | -------------------------------------------------------------------------------- /subway/sswp-sync.cu: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | #include "../shared/timer.hpp" 3 | #include "../shared/argument_parsing.cuh" 4 | #include "../shared/graph.cuh" 5 | #include "../shared/subgraph.cuh" 6 | #include "../shared/partitioner.cuh" 7 | #include "../shared/subgraph_generator.cuh" 8 | #include "../shared/gpu_error_check.cuh" 9 | #include "../shared/gpu_kernels.cuh" 10 | #include "../shared/subway_utilities.hpp" 11 | 12 | 13 | int main(int argc, char** argv) 14 | { 15 | cudaFree(0); 16 | 17 | ArgumentParser arguments(argc, argv, true, false); 18 | 19 | Timer timer; 20 | timer.Start(); 21 | 22 | Graph graph(arguments.input, true); 23 | graph.ReadGraph(); 24 | 25 | float readtime = timer.Finish(); 26 | cout << "Graph Reading finished in " << readtime/1000 << " (s).\n"; 27 | 28 | for(unsigned int i=0; i subgraph(graph.num_nodes, graph.num_edges); 45 | 46 | SubgraphGenerator subgen(graph); 47 | 48 | subgen.generate(graph, subgraph); 49 | 50 | 51 | Partitioner partitioner; 52 | 53 | timer.Start(); 54 | 55 | uint itr = 0; 56 | 57 | while (subgraph.numActiveNodes>0) 58 | { 59 | itr++; 60 | 61 | partitioner.partition(subgraph, subgraph.numActiveNodes); 62 | // a super iteration 63 | for(int i=0; i>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]); 70 | 71 | sswp_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i], 72 | partitioner.fromNode[i], 73 | partitioner.fromEdge[i], 74 | subgraph.d_activeNodes, 75 | subgraph.d_activeNodesPointer, 76 | subgraph.d_activeEdgeList, 77 | graph.d_outDegree, 78 | graph.d_value, 79 | //d_finished, 80 | graph.d_label1, 81 | graph.d_label2); 82 | 83 | cudaDeviceSynchronize(); 84 | gpuErrorcheck( cudaPeekAtLastError() ); 85 | } 86 | 87 | subgen.generate(graph, subgraph); 88 | 89 | } 90 | 91 | float runtime = timer.Finish(); 92 | cout << "Processing finished in " << runtime/1000 << " (s).\n"; 93 | 94 | cout << "Number of iterations = " << itr << endl; 95 | 96 | gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost)); 97 | 98 | utilities::PrintResults(graph.value, min(30, graph.num_nodes)); 99 | 100 | if(arguments.hasOutput) 101 | utilities::SaveResults(arguments.output, graph.value, graph.num_nodes); 102 | } 103 | 104 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC=g++ 3 | NC=nvcc 4 | CFLAGS=-std=c++11 -O3 5 | NFLAGS=-arch=sm_60 6 | 7 | SHARED=../shared 8 | 9 | all: converter 10 | 11 | converter: converter.cpp 12 | $(CC) converter.cpp -o converter $(CFLAGS) 13 | 14 | clean: 15 | rm -f converter 16 | -------------------------------------------------------------------------------- /tools/converter.cpp: -------------------------------------------------------------------------------- 1 | #include "../shared/globals.hpp" 2 | 3 | 4 | bool IsWeightedFormat(string format) 5 | { 6 | if((format == "bwcsr") || 7 | (format == "wcsr") || 8 | (format == "wel")) 9 | return true; 10 | return false; 11 | } 12 | 13 | string GetFileExtension(string fileName) 14 | { 15 | if(fileName.find_last_of(".") != string::npos) 16 | return fileName.substr(fileName.find_last_of(".")+1); 17 | return ""; 18 | } 19 | 20 | int main(int argc, char** argv) 21 | { 22 | if(argc!= 2) 23 | { 24 | cout << "\nThere was an error parsing command line arguments\n"; 25 | exit(0); 26 | } 27 | 28 | string input = string(argv[1]); 29 | 30 | if(GetFileExtension(input) == "el") 31 | { 32 | ifstream infile; 33 | infile.open(input); 34 | stringstream ss; 35 | uint max = 0; 36 | string line; 37 | uint edgeCounter = 0; 38 | 39 | vector edges; 40 | Edge newEdge; 41 | while(getline( infile, line )) 42 | { 43 | ss.str(""); 44 | ss.clear(); 45 | ss << line; 46 | 47 | ss >> newEdge.source; 48 | ss >> newEdge.end; 49 | 50 | edges.push_back(newEdge); 51 | edgeCounter++; 52 | 53 | if(max < newEdge.source) 54 | max = newEdge.source; 55 | if(max < newEdge.end) 56 | max = newEdge.end; 57 | } 58 | infile.close(); 59 | 60 | uint num_nodes = max + 1; 61 | uint num_edges = edgeCounter; 62 | uint *nodePointer = new uint[num_nodes+1]; 63 | OutEdge *edgeList = new OutEdge[num_edges]; 64 | uint *degree = new uint[num_nodes]; 65 | for(uint i=0; i edges; 107 | EdgeWeighted newEdge; 108 | while(getline( infile, line )) 109 | { 110 | ss.str(""); 111 | ss.clear(); 112 | ss << line; 113 | 114 | ss >> newEdge.source; 115 | ss >> newEdge.end; 116 | ss >> newEdge.w8; 117 | 118 | edges.push_back(newEdge); 119 | edgeCounter++; 120 | 121 | if(max < newEdge.source) 122 | max = newEdge.source; 123 | if(max < newEdge.end) 124 | max = newEdge.end; 125 | } 126 | infile.close(); 127 | 128 | uint num_nodes = max + 1; 129 | uint num_edges = edgeCounter; 130 | uint *nodePointer = new uint[num_nodes+1]; 131 | OutEdgeWeighted *edgeList = new OutEdgeWeighted[num_edges]; 132 | uint *degree = new uint[num_nodes]; 133 | for(uint i=0; i