├── LICENSE.md
├── Makefile
├── README.md
├── shared
    ├── Makefile
    ├── argument_parsing.cu
    ├── argument_parsing.cuh
    ├── globals.hpp
    ├── gpu_error_check.cuh
    ├── gpu_kernels.cu
    ├── gpu_kernels.cuh
    ├── graph.cu
    ├── graph.cuh
    ├── partitioner.cu
    ├── partitioner.cuh
    ├── subgraph.cu
    ├── subgraph.cuh
    ├── subgraph_generator.cu
    ├── subgraph_generator.cuh
    ├── subway_utilities.cpp
    ├── subway_utilities.hpp
    ├── test.cu
    ├── test.cuh
    ├── timer.cpp
    └── timer.hpp
├── subway
    ├── Makefile
    ├── bfs-async.cu
    ├── bfs-sync.cu
    ├── cc-async.cu
    ├── cc-sync.cu
    ├── pr-async.cu
    ├── pr-sync.cu
    ├── sssp-async.cu
    ├── sssp-sync.cu
    ├── sswp-async.cu
    └── sswp-sync.cu
└── tools
    ├── Makefile
    └── converter.cpp


/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 AutomataLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | CC=g++
 4 | NC=nvcc
 5 | CFLAGS=-std=c++11 -O3
 6 | NFLAGS=-arch=sm_60
 7 | 
 8 | SHARED=shared
 9 | SUBWAY=subway
10 | TOOLS=tools
11 | 
12 | DEP=$(SHARED)/timer.o $(SHARED)/argument_parsing.o $(SHARED)/graph.o $(SHARED)/subgraph.o $(SHARED)/partitioner.o $(SHARED)/subgraph_generator.o $(SHARED)/gpu_kernels.o $(SHARED)/subway_utilities.o $(SHARED)/test.o  
13 | 
14 | all: make1 make2 make3 bfs-sync cc-sync sssp-sync sswp-sync pr-sync bfs-async cc-async sssp-async sswp-async pr-async
15 | 
16 | make1:
17 | 	make -C $(SHARED)
18 | 
19 | make2:
20 | 	make -C $(SUBWAY)
21 | 
22 | make3:
23 | 	make -C $(TOOLS)
24 | 
25 | 
26 | bfs-sync: $(SUBWAY)/bfs-sync.o $(DEP)
27 | 	$(NC) $(SUBWAY)/bfs-sync.o $(DEP) -o bfs-sync $(CFLAGS) $(NFLAGS)
28 | 	
29 | cc-sync: $(SUBWAY)/cc-sync.o $(DEP)
30 | 	$(NC) $(SUBWAY)/cc-sync.o $(DEP) -o cc-sync $(CFLAGS) $(NFLAGS)
31 | 
32 | sssp-sync: $(SUBWAY)/sssp-sync.o $(DEP)
33 | 	$(NC) $(SUBWAY)/sssp-sync.o $(DEP) -o sssp-sync $(CFLAGS) $(NFLAGS)
34 | 
35 | sswp-sync: $(SUBWAY)/sswp-sync.o $(DEP)
36 | 	$(NC) $(SUBWAY)/sswp-sync.o $(DEP) -o sswp-sync $(CFLAGS) $(NFLAGS)
37 | 	
38 | pr-sync: $(SUBWAY)/pr-sync.o $(DEP)
39 | 	$(NC) $(SUBWAY)/pr-sync.o $(DEP) -o pr-sync $(CFLAGS) $(NFLAGS)	
40 | 	
41 | bfs-async: $(SUBWAY)/bfs-async.o $(DEP)
42 | 	$(NC) $(SUBWAY)/bfs-async.o $(DEP) -o bfs-async $(CFLAGS) $(NFLAGS)	
43 | 	
44 | cc-async: $(SUBWAY)/cc-async.o $(DEP)
45 | 	$(NC) $(SUBWAY)/cc-async.o $(DEP) -o cc-async $(CFLAGS) $(NFLAGS)		
46 | 	
47 | sssp-async: $(SUBWAY)/sssp-async.o $(DEP)
48 | 	$(NC) $(SUBWAY)/sssp-async.o $(DEP) -o sssp-async $(CFLAGS) $(NFLAGS)	
49 | 
50 | sswp-async: $(SUBWAY)/sswp-async.o $(DEP)
51 | 	$(NC) $(SUBWAY)/sswp-async.o $(DEP) -o sswp-async $(CFLAGS) $(NFLAGS)	
52 | 	
53 | pr-async: $(SUBWAY)/pr-async.o $(DEP)
54 | 	$(NC) $(SUBWAY)/pr-async.o $(DEP) -o pr-async $(CFLAGS) $(NFLAGS)
55 | 	
56 | clean:
57 | 	make -C $(SHARED) clean
58 | 	make -C $(SUBWAY) clean
59 | 	make -C $(TOOLS) clean
60 | 	rm -f bfs-sync cc-sync sssp-sync sswp-sync pr-sync bfs-async cc-async sssp-async sswp-async pr-async
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Subway
 2 | Subway is an out-of-GPU-memory graph processing framework.
 3 | 
 4 | Subway provides a highly cost-effective solution to extracting a subgraph that only consists of the edges of active vertices. This allows it to transfer only the active parts of the graph from CPU to GPU, thus dramatically reduces the volume of data transfer. The benefits from the data transfer reduction outweigh the costs of subgraph generation in (almost) all iterations of graph processing, bringing in substantial overall performance improvements. Moreover, it supports asynchronous processing between the loaded subgraph in GPU and the rest of the graph in host memory, which tends to decrease the number of global iterations, thus can further reduce the data transfer.
 5 | 
 6 | #### Compilation
 7 | 
 8 | To compile Subway, just run make in the root directory. The only requrements are g++ and CUDA toolkit.
 9 | 
10 | #### Input graph formats
11 | 
12 | Subway accepts edge-list (.el) and weighted edge-list (.wel) graph formats, as well as the binary serialized pre-built CSR graph representation (.bcsr and .bwcsr). It is highly recommended to convert edge-list format graph files to the binary format (using tools/converter). Reading binary formats is faster and more space efficient.
13 | 
14 | Subway is sensitive to graph file extension. A weighted edge-list graph file has to end with .wel. The followings are two graph file examples.
15 | 
16 | Graph.el ("SOURCE DESTINATION" for each edge in each line):
17 | ```
18 | 0 1
19 | 0 3
20 | 2 3
21 | 1 2
22 | ```
23 | 
24 | Graph.wel ("SOURCE DESTINATION WEIGHT" for each edge in each line):
25 | ```
26 | 0 1 26
27 | 0 3 33
28 | 2 3 40
29 | 1 2 10
30 | ```
31 | 
32 | To convert these graph files to the binary format, run the following commands in the root folder:
33 | ```
34 | tools/converter path_to_Graph.el
35 | tools/converter path_to_Graph.wel
36 | ```
37 | 
38 | The first command converts Graph.el to the binary CSR format and generates a binary graph file with .bcsr extension under the same directory as the original file. The second command converts Graph.wel to a weighted binary graph file with .bwcsr extension.
39 | 
40 | #### Running applications in Subway
41 | The applications take a graph as input as well as some optional arguments. For example:
42 | 
43 | ```
44 | $ ./sssp-async --input path-to-input-graph
45 | $ ./sssp-async --input path-to-input-graph --source 10
46 | ```
47 | 
48 | For applications that run on weighted graphs, like SSSP, the input must be weighted (.bwcsr or .wel) and for applications that run on unweighted graphs, like BFS, the input must be unweighted (.bcsr or .el).
49 | 
50 | #### Publications:
51 | 
52 | [EUROSYS'20] Amir Hossein Nodehi Sabet, Zhijia Zhao, and Rajiv Gupta. [Subway: minimizing data transfer during out-of-GPU-memory graph processing](https://dl.acm.org/doi/abs/10.1145/3342195.3387537). In Proceedings of the Fifteenth European Conference on Computer Systems.
53 | 
54 | [ASPLOS'18] Amir Hossein Nodehi Sabet, Junqiao Qiu, and Zhijia Zhao. [Tigr: Transforming Irregular Graphs for GPU-Friendly Graph Processing](https://dl.acm.org/doi/10.1145/3173162.3173180). In Proceedings of the Twenty-Third International Conference on Architectural Support for Programming Languages and Operating Systems.
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/shared/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | NC=nvcc
 3 | CFLAGS=-std=c++11 -O3
 4 | NFLAGS=-arch=sm_60
 5 | 
 6 | 
 7 | all: timer.o argument_parsing.o graph.o subgraph.o partitioner.o subgraph_generator.o gpu_kernels.o subway_utilities.o test.o
 8 | 
 9 | 
10 | timer.o: timer.cpp
11 | 	$(CC) -c timer.cpp -o timer.o $(CFLAGS)
12 | 	
13 | argument_parsing.o: argument_parsing.cu
14 | 	$(NC) -c argument_parsing.cu -o argument_parsing.o $(CFLAGS) $(NFLAGS)
15 | 
16 | graph.o: graph.cu
17 | 	$(NC) -c graph.cu -o graph.o $(CFLAGS) $(NFLAGS)
18 | 
19 | subgraph.o: subgraph.cu
20 | 	$(NC) -c subgraph.cu -o subgraph.o $(CFLAGS) $(NFLAGS)
21 | 
22 | partitioner.o: partitioner.cu
23 | 	$(NC) -c partitioner.cu -o partitioner.o $(CFLAGS) $(NFLAGS)
24 | 
25 | subgraph_generator.o: subgraph_generator.cu
26 | 	$(NC) -c subgraph_generator.cu -o subgraph_generator.o $(CFLAGS) $(NFLAGS)
27 | 	
28 | gpu_kernels.o: gpu_kernels.cu
29 | 	$(NC) -c gpu_kernels.cu -o gpu_kernels.o $(CFLAGS) $(NFLAGS)
30 | 
31 | subway_utilities.o: subway_utilities.cpp
32 | 	$(CC) -c subway_utilities.cpp -o subway_utilities.o $(CFLAGS)
33 | 
34 | test.o: test.cu
35 | 	$(NC) -c test.cu -o test.o $(CFLAGS) $(NFLAGS)
36 | 
37 | clean:
38 | 	rm *.o
39 | 


--------------------------------------------------------------------------------
/shared/argument_parsing.cu:
--------------------------------------------------------------------------------
  1 | #include "argument_parsing.cuh"
  2 | 
  3 | 
  4 |     
  5 | ArgumentParser::ArgumentParser(int argc, char **argv, bool canHaveSource, bool canHaveItrs)
  6 | {
  7 | 	this->argc = argc;
  8 | 	this->argv = argv;
  9 | 	this->canHaveSource = canHaveSource;
 10 | 	this->canHaveItrs = canHaveItrs;
 11 | 	
 12 | 	this->sourceNode = 0;
 13 | 	this->deviceID = 0;
 14 | 	this->numberOfItrs = 1;
 15 | 	
 16 | 	hasInput = false;
 17 | 	hasSourceNode = false;
 18 | 	hasOutput = false;
 19 | 	hasDeviceID = false;
 20 | 	hasNumberOfItrs = false;
 21 | 	
 22 | 	Parse();
 23 | }
 24 | 	
 25 | bool ArgumentParser::Parse()
 26 | {
 27 | 	try
 28 | 	{
 29 | 		if(argc == 1)
 30 | 		{
 31 | 			cout << GenerateHelpString();
 32 | 			exit(0);
 33 | 		}
 34 | 		
 35 | 		if(argc == 2) 
 36 | 			if ((strcmp(argv[1], "--help") == 0) || 
 37 | 				(strcmp(argv[1], "-help") == 0) || 
 38 | 				(strcmp(argv[1], "--h") == 0) || 
 39 | 				(strcmp(argv[1], "-h") == 0))
 40 | 			{
 41 | 				cout << GenerateHelpString();
 42 | 				exit(0);
 43 | 			}
 44 | 		
 45 | 		if(argc%2 == 0)
 46 | 		{
 47 | 			cout << "\nThere was an error parsing command line arguments\n";
 48 | 			cout << GenerateHelpString();
 49 | 			exit(0);
 50 | 		}
 51 | 		
 52 | 			
 53 | 		for(int i=1; i<argc-1; i=i+2)
 54 | 		{
 55 | 			//argv[i]
 56 | 			
 57 | 			if (strcmp(argv[i], "--input") == 0) {
 58 | 				input = string(argv[i+1]);
 59 | 				hasInput = true;
 60 | 			}
 61 | 			else if (strcmp(argv[i], "--output") == 0) {
 62 | 				output = string(argv[i+1]);
 63 | 				hasOutput = true;
 64 | 			}
 65 | 			else if (strcmp(argv[i], "--source") == 0 && canHaveSource) {
 66 | 				sourceNode = atoi(argv[i+1]);
 67 | 				hasSourceNode = true;
 68 | 			}
 69 | 			else if (strcmp(argv[i], "--device") == 0) {
 70 | 				deviceID = atoi(argv[i+1]);
 71 | 				hasDeviceID = true;
 72 | 				cudaSetDevice(deviceID);
 73 | 			}
 74 | 			else if (strcmp(argv[i], "--iteration") == 0 && canHaveItrs) {
 75 | 				numberOfItrs = atoi(argv[i+1]);
 76 | 				hasNumberOfItrs = true;
 77 | 			}
 78 | 			else
 79 | 			{
 80 | 				cout << "\nThere was an error parsing command line argument <" << argv[i] << ">\n";
 81 | 				cout << GenerateHelpString();
 82 | 				exit(0);
 83 | 			}
 84 | 		}
 85 | 		
 86 | 		if(hasInput)
 87 | 			return true;
 88 | 		else
 89 | 		{
 90 | 			cout << "\nInput graph file argument is required.\n";
 91 | 			cout << GenerateHelpString();
 92 | 			exit(0);
 93 | 		}
 94 | 	}
 95 | 	catch( const std::exception& strException ) {
 96 | 		std::cerr << strException.what() << "\n";
 97 | 		GenerateHelpString();
 98 | 		exit(0);
 99 | 	}
100 | 	catch(...) {
101 | 		std::cerr << "An exception has occurred.\n";
102 | 		GenerateHelpString();
103 | 		exit(0);
104 | 	}
105 | }
106 | 
107 | string ArgumentParser::GenerateHelpString(){
108 | 	string str = "\nRequired arguments:";
109 | 	str += "\n    [--input]: Input graph file. E.g., --input FacebookGraph.txt";
110 | 	str += "\nOptional arguments";
111 | 	if(canHaveSource)
112 | 		str += "\n    [--source]:  Begins from the source (Default: 0). E.g., --source 10";
113 | 	str += "\n    [--output]: Output file for results. E.g., --output results.txt";
114 | 	str += "\n    [--device]: Select GPU device (default: 0). E.g., --device 1";
115 | 	if(canHaveItrs)
116 | 		str += "\n    [--iteration]: Number of iterations (default: 1). E.g., --iterations 10";
117 | 	str += "\n\n";
118 | 	return str;
119 | }
120 | 
121 | 


--------------------------------------------------------------------------------
/shared/argument_parsing.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef ARGUMENT_PARSING_HPP
 2 | #define ARGUMENT_PARSING_HPP
 3 | 
 4 | #include "globals.hpp"
 5 | 
 6 | 
 7 | class ArgumentParser
 8 | {
 9 | private:
10 | 
11 | public:
12 | 	int argc;
13 | 	char** argv;
14 | 	
15 | 	bool canHaveSource;
16 | 	bool canHaveItrs;
17 | 	
18 | 	bool hasInput;
19 | 	bool hasSourceNode;
20 | 	bool hasOutput;
21 | 	bool hasDeviceID;
22 | 	bool hasNumberOfItrs;
23 | 	string input;
24 | 	int sourceNode;
25 | 	string output;
26 | 	int deviceID;
27 | 	int numberOfItrs;
28 | 	
29 | 	
30 | 	ArgumentParser(int argc, char **argv, bool canHaveSource, bool canHaveItrs);
31 | 	
32 | 	bool Parse();
33 | 	
34 | 	string GenerateHelpString();
35 | 	
36 | };
37 | 
38 | 
39 | #endif	//	ARGUMENT_PARSING_HPP
40 | 


--------------------------------------------------------------------------------
/shared/globals.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef GLOBALS_HPP
 2 | #define GLOBALS_HPP
 3 | 
 4 | #include <iostream>
 5 | #include <stdlib.h>
 6 | #include <ctime>
 7 | #include <fstream>
 8 | #include <string>
 9 | #include <ctime>
10 | #include <random>
11 | #include <stdio.h>
12 | #include <iomanip>
13 | #include <locale>
14 | #include <sstream>
15 | #include <string>
16 | #include <cstring>
17 | #include <vector>
18 | #include <cstdlib>
19 | #include <math.h>
20 | #include <chrono>
21 | #include <stdexcept>
22 | #include <iostream>
23 | #include <sstream> 
24 | 
25 | using namespace std;
26 | 
27 | const unsigned int DIST_INFINITY = std::numeric_limits<unsigned int>::max() - 1;
28 | 
29 | typedef unsigned int uint;
30 | typedef unsigned long long ull;
31 | 
32 | 
33 | struct OutEdge{
34 |     uint end;
35 | };
36 | 
37 | struct OutEdgeWeighted{
38 |     uint end;
39 |     uint w8;
40 | };
41 | 
42 | struct Edge{
43 | 	uint source;
44 |     uint end;
45 | };
46 | 
47 | struct EdgeWeighted{
48 | 	uint source;
49 |     uint end;
50 |     uint w8;
51 | };
52 | 
53 | 
54 | 
55 | 
56 | #endif 	//	GLOBALS_HPP
57 | 


--------------------------------------------------------------------------------
/shared/gpu_error_check.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef	GPU_ERROR_CHECK_CUH
 2 | #define	GPU_ERROR_CHECK_CUH
 3 | 
 4 | //#include <string>
 5 | //#include <sstream>
 6 | //#include <stdexcept>
 7 | 
 8 | #define gpuErrorcheck(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 9 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
10 | {
11 |    if (code != cudaSuccess) 
12 |    {
13 |       fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
14 |       if (abort) exit(code);
15 |    }
16 | }
17 | 
18 | #endif	//	GPU_ERROR_CHECK_CUH
19 | 


--------------------------------------------------------------------------------
/shared/gpu_kernels.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "gpu_kernels.cuh"
  3 | #include "globals.hpp"
  4 | #include "gpu_error_check.cuh"
  5 | #include "graph.cuh"
  6 | #include "subgraph.cuh"
  7 | 
  8 | 
  9 | __global__ void bfs_kernel(unsigned int numNodes,
 10 | 							unsigned int from,
 11 | 							unsigned int numPartitionedEdges,
 12 | 							unsigned int *activeNodes,
 13 | 							unsigned int *activeNodesPointer,
 14 | 							OutEdge *edgeList,
 15 | 							unsigned int *outDegree,
 16 | 							unsigned int *value,
 17 | 							//bool *finished,
 18 | 							bool *label1,
 19 | 							bool *label2)
 20 | {
 21 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
 22 | 
 23 | 	if(tId < numNodes)
 24 | 	{
 25 | 		unsigned int id = activeNodes[from + tId];
 26 | 		
 27 | 		if(label1[id] == false)
 28 | 			return;
 29 | 			
 30 | 		label1[id] = false;
 31 | 
 32 | 		unsigned int sourceWeight = value[id];
 33 | 
 34 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
 35 | 		unsigned int degree = outDegree[id];
 36 | 		unsigned int thisTo = thisFrom + degree;
 37 | 		
 38 | 		//printf("******* %i\n", thisFrom);
 39 | 		
 40 | 		unsigned int finalDist;
 41 | 		
 42 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
 43 | 		{	
 44 | 			//finalDist = sourceWeight + edgeList[i].w8;
 45 | 			finalDist = sourceWeight + 1;
 46 | 			if(finalDist < value[edgeList[i].end])
 47 | 			{
 48 | 				atomicMin(&value[edgeList[i].end] , finalDist);
 49 | 
 50 | 				//*finished = false;
 51 | 				
 52 | 				//label1[edgeList[i].end] = true;
 53 | 
 54 | 				label2[edgeList[i].end] = true;
 55 | 			}
 56 | 		}
 57 | 	}
 58 | }
 59 | 
 60 | __global__ void cc_kernel(unsigned int numNodes,
 61 | 							unsigned int from,
 62 | 							unsigned int numPartitionedEdges,
 63 | 							unsigned int *activeNodes,
 64 | 							unsigned int *activeNodesPointer,
 65 | 							OutEdge *edgeList,
 66 | 							unsigned int *outDegree,
 67 | 							unsigned int *dist,
 68 | 							//bool *finished,
 69 | 							bool *label1,
 70 | 							bool *label2)
 71 | {
 72 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
 73 | 
 74 | 	if(tId < numNodes)
 75 | 	{
 76 | 		unsigned int id = activeNodes[from + tId];
 77 | 		
 78 | 		if(label1[id] == false)
 79 | 			return;
 80 | 			
 81 | 		label1[id] = false;
 82 | 		
 83 | 		unsigned int sourceWeight = dist[id];
 84 | 
 85 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
 86 | 		unsigned int degree = outDegree[id];
 87 | 		unsigned int thisTo = thisFrom + degree;
 88 | 		
 89 | 		//printf("******* %i\n", thisFrom);
 90 | 		
 91 | 		//unsigned int finalDist;
 92 | 		
 93 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
 94 | 		{	
 95 | 			//finalDist = sourceWeight + edgeList[i].w8;
 96 | 			if(sourceWeight < dist[edgeList[i].end])
 97 | 			{
 98 | 				atomicMin(&dist[edgeList[i].end] , sourceWeight);
 99 | 
100 | 				//*finished = false;
101 | 				
102 | 				//label1[edgeList[i].end] = true;
103 | 
104 | 				label2[edgeList[i].end] = true;
105 | 			}
106 | 		}
107 | 	}
108 | }
109 | 
110 | 
111 | __global__ void sssp_kernel(unsigned int numNodes,
112 | 							unsigned int from,
113 | 							unsigned int numPartitionedEdges,
114 | 							unsigned int *activeNodes,
115 | 							unsigned int *activeNodesPointer,
116 | 							OutEdgeWeighted *edgeList,
117 | 							unsigned int *outDegree,
118 | 							unsigned int *dist,
119 | 							//bool *finished,
120 | 							bool *label1,
121 | 							bool *label2)
122 | {
123 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
124 | 
125 | 	if(tId < numNodes)
126 | 	{
127 | 		unsigned int id = activeNodes[from + tId];
128 | 		
129 | 		if(label1[id] == false)
130 | 			return;
131 | 			
132 | 		label1[id] = false;
133 | 
134 | 		unsigned int sourceWeight = dist[id];
135 | 
136 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
137 | 		unsigned int degree = outDegree[id];
138 | 		unsigned int thisTo = thisFrom + degree;
139 | 		
140 | 		//printf("******* %i\n", thisFrom);
141 | 		
142 | 		unsigned int finalDist;
143 | 		
144 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
145 | 		{	
146 | 			finalDist = sourceWeight + edgeList[i].w8;
147 | 			if(finalDist < dist[edgeList[i].end])
148 | 			{
149 | 				atomicMin(&dist[edgeList[i].end] , finalDist);
150 | 
151 | 				//*finished = false;
152 | 				
153 | 				//label1[edgeList[i].end] = true;
154 | 
155 | 				label2[edgeList[i].end] = true;
156 | 			}
157 | 		}
158 | 	}
159 | }
160 | 
161 | __global__ void sswp_kernel(unsigned int numNodes,
162 | 							unsigned int from,
163 | 							unsigned int numPartitionedEdges,
164 | 							unsigned int *activeNodes,
165 | 							unsigned int *activeNodesPointer,
166 | 							OutEdgeWeighted *edgeList,
167 | 							unsigned int *outDegree,
168 | 							unsigned int *dist,
169 | 							//bool *finished,
170 | 							bool *label1,
171 | 							bool *label2)
172 | {
173 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
174 | 
175 | 	if(tId < numNodes)
176 | 	{
177 | 		unsigned int id = activeNodes[from + tId];
178 | 		
179 | 		if(label1[id] == false)
180 | 			return;
181 | 			
182 | 		label1[id] = false;
183 | 		
184 | 		unsigned int sourceWeight = dist[id];
185 | 
186 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
187 | 		unsigned int degree = outDegree[id];
188 | 		unsigned int thisTo = thisFrom + degree;
189 | 		
190 | 		//printf("******* %i\n", thisFrom);
191 | 		
192 | 		unsigned int finalDist;
193 | 		
194 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
195 | 		{	
196 | 			finalDist = min(sourceWeight, edgeList[i].w8);
197 | 			if(finalDist > dist[edgeList[i].end])
198 | 			{
199 | 				atomicMax(&dist[edgeList[i].end] , finalDist);
200 | 
201 | 				//*finished = false;
202 | 				
203 | 				//label1[edgeList[i].end] = true;
204 | 
205 | 				label2[edgeList[i].end] = true;
206 | 			}
207 | 		}
208 | 	}
209 | }
210 | 
211 | __global__ void pr_kernel(unsigned int numNodes,
212 | 							unsigned int from,
213 | 							unsigned int numPartitionedEdges,
214 | 							unsigned int *activeNodes,
215 | 							unsigned int *activeNodesPointer,
216 | 							OutEdge *edgeList,
217 | 							unsigned int *outDegree,
218 | 							float *dist,
219 | 							float *delta,
220 | 							//bool *finished,
221 | 							float acc)
222 | {
223 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
224 | 
225 | 	if(tId < numNodes)
226 | 	{
227 | 		unsigned int id = activeNodes[from + tId];
228 | 		unsigned int degree = outDegree[id];
229 | 		float thisDelta = delta[id];
230 | 
231 | 		if(thisDelta > acc)
232 | 		{
233 | 			dist[id] += thisDelta;
234 | 			
235 | 			if(degree != 0)
236 | 			{
237 | 				//*finished = false;
238 | 				
239 | 				float sourcePR = ((float) thisDelta / degree) * 0.85;
240 | 
241 | 				unsigned int thisfrom = activeNodesPointer[from+tId]-numPartitionedEdges;
242 | 				unsigned int thisto = thisfrom + degree;
243 | 				
244 | 				for(unsigned int i=thisfrom; i<thisto; i++)
245 | 				{
246 | 					atomicAdd(&delta[edgeList[i].end], sourcePR);
247 | 				}				
248 | 			}
249 | 			
250 | 			atomicAdd(&delta[id], -thisDelta);
251 | 		}
252 | 		
253 | 	}
254 | }
255 | 
256 | 
257 | __global__ void bfs_async(unsigned int numNodes,
258 | 							unsigned int from,
259 | 							unsigned int numPartitionedEdges,
260 | 							unsigned int *activeNodes,
261 | 							unsigned int *activeNodesPointer,
262 | 							OutEdge *edgeList,
263 | 							unsigned int *outDegree,
264 | 							unsigned int *dist,
265 | 							bool *finished,
266 | 							bool *label1,
267 | 							bool *label2)
268 | {
269 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
270 | 
271 | 	if(tId < numNodes)
272 | 	{
273 | 		unsigned int id = activeNodes[from + tId];
274 | 		
275 | 		if(label1[id] == false)
276 | 			return;
277 | 			
278 | 		label1[id] = false;
279 | 		
280 | 		unsigned int sourceWeight = dist[id];
281 | 
282 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
283 | 		unsigned int degree = outDegree[id];
284 | 		unsigned int thisTo = thisFrom + degree;
285 | 		
286 | 		//printf("******* %i\n", thisFrom);
287 | 		
288 | 		unsigned int finalDist;
289 | 		
290 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
291 | 		{	
292 | 			//finalDist = sourceWeight + edgeList[i].w8;
293 | 			finalDist = sourceWeight + 1;
294 | 			if(finalDist < dist[edgeList[i].end])
295 | 			{
296 | 				atomicMin(&dist[edgeList[i].end] , finalDist);
297 | 
298 | 				*finished = false;
299 | 				
300 | 				//label1[edgeList[i].end] = true;
301 | 
302 | 				label2[edgeList[i].end] = true;
303 | 			}
304 | 		}
305 | 	}
306 | }
307 | 
308 | __global__ void sssp_async(unsigned int numNodes,
309 | 							unsigned int from,
310 | 							unsigned int numPartitionedEdges,
311 | 							unsigned int *activeNodes,
312 | 							unsigned int *activeNodesPointer,
313 | 							OutEdgeWeighted *edgeList,
314 | 							unsigned int *outDegree,
315 | 							unsigned int *dist,
316 | 							bool *finished,
317 | 							bool *label1,
318 | 							bool *label2)
319 | {
320 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
321 | 
322 | 	if(tId < numNodes)
323 | 	{
324 | 		unsigned int id = activeNodes[from + tId];
325 | 		
326 | 		if(label1[id] == false)
327 | 			return;
328 | 			
329 | 		label1[id] = false;
330 | 		
331 | 		unsigned int sourceWeight = dist[id];
332 | 
333 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
334 | 		unsigned int degree = outDegree[id];
335 | 		unsigned int thisTo = thisFrom + degree;
336 | 		
337 | 		//printf("******* %i\n", thisFrom);
338 | 		
339 | 		unsigned int finalDist;
340 | 		
341 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
342 | 		{	
343 | 			finalDist = sourceWeight + edgeList[i].w8;
344 | 			if(finalDist < dist[edgeList[i].end])
345 | 			{
346 | 				atomicMin(&dist[edgeList[i].end] , finalDist);
347 | 
348 | 				*finished = false;
349 | 				
350 | 				//label1[edgeList[i].end] = true;
351 | 
352 | 				label2[edgeList[i].end] = true;
353 | 			}
354 | 		}
355 | 	}
356 | }
357 | 
358 | __global__ void sswp_async(unsigned int numNodes,
359 | 							unsigned int from,
360 | 							unsigned int numPartitionedEdges,
361 | 							unsigned int *activeNodes,
362 | 							unsigned int *activeNodesPointer,
363 | 							OutEdgeWeighted *edgeList,
364 | 							unsigned int *outDegree,
365 | 							unsigned int *dist,
366 | 							bool *finished,
367 | 							bool *label1,
368 | 							bool *label2)
369 | {
370 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
371 | 
372 | 	if(tId < numNodes)
373 | 	{
374 | 		unsigned int id = activeNodes[from + tId];
375 | 		
376 | 		if(label1[id] == false)
377 | 			return;
378 | 			
379 | 		label1[id] = false;
380 | 		
381 | 		unsigned int sourceWeight = dist[id];
382 | 
383 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
384 | 		unsigned int degree = outDegree[id];
385 | 		unsigned int thisTo = thisFrom + degree;
386 | 		
387 | 		
388 | 		unsigned int finalDist;
389 | 		
390 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
391 | 		{	
392 | 			finalDist = min(sourceWeight, edgeList[i].w8);
393 | 			if(finalDist > dist[edgeList[i].end])
394 | 			{
395 | 				atomicMax(&dist[edgeList[i].end] , finalDist);
396 | 
397 | 				*finished = false;
398 | 				
399 | 				//label1[edgeList[i].end] = true;
400 | 
401 | 				label2[edgeList[i].end] = true;
402 | 			}
403 | 		}
404 | 	}
405 | }
406 | 
407 | 
408 | __global__ void cc_async(unsigned int numNodes,
409 | 							unsigned int from,
410 | 							unsigned int numPartitionedEdges,
411 | 							unsigned int *activeNodes,
412 | 							unsigned int *activeNodesPointer,
413 | 							OutEdge *edgeList,
414 | 							unsigned int *outDegree,
415 | 							unsigned int *dist,
416 | 							bool *finished,
417 | 							bool *label1,
418 | 							bool *label2)
419 | {
420 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
421 | 
422 | 	if(tId < numNodes)
423 | 	{
424 | 		unsigned int id = activeNodes[from + tId];
425 | 		
426 | 		if(label1[id] == false)
427 | 			return;
428 | 			
429 | 		label1[id] = false;
430 | 
431 | 		unsigned int sourceWeight = dist[id];
432 | 
433 | 		unsigned int thisFrom = activeNodesPointer[from+tId]-numPartitionedEdges;
434 | 		unsigned int degree = outDegree[id];
435 | 		unsigned int thisTo = thisFrom + degree;
436 | 		
437 | 		//printf("******* %i\n", thisFrom);
438 | 		
439 | 		//unsigned int finalDist;
440 | 		
441 | 		for(unsigned int i=thisFrom; i<thisTo; i++)
442 | 		{	
443 | 			//finalDist = sourceWeight + edgeList[i].w8;
444 | 			if(sourceWeight < dist[edgeList[i].end])
445 | 			{
446 | 				atomicMin(&dist[edgeList[i].end] , sourceWeight);
447 | 
448 | 				*finished = false;
449 | 				
450 | 				//label1[edgeList[i].end] = true;
451 | 
452 | 				label2[edgeList[i].end] = true;
453 | 			}
454 | 		}
455 | 	}
456 | }
457 | 
458 | 
459 | __global__ void pr_async(unsigned int numNodes,
460 | 							unsigned int from,
461 | 							unsigned int numPartitionedEdges,
462 | 							unsigned int *activeNodes,
463 | 							unsigned int *activeNodesPointer,
464 | 							OutEdge *edgeList,
465 | 							unsigned int *outDegree,
466 | 							float *dist,
467 | 							float *delta,
468 | 							bool *finished,
469 | 							float acc)
470 | {
471 | 	unsigned int tId = blockDim.x * blockIdx.x + threadIdx.x;
472 | 
473 | 	if(tId < numNodes)
474 | 	{
475 | 		unsigned int id = activeNodes[from + tId];
476 | 		unsigned int degree = outDegree[id];
477 | 		float thisDelta = delta[id];
478 | 
479 | 		if(thisDelta > acc)
480 | 		{
481 | 			dist[id] += thisDelta;
482 | 			
483 | 			if(degree != 0)
484 | 			{
485 | 				*finished = false;
486 | 				
487 | 				float sourcePR = ((float) thisDelta / degree) * 0.85;
488 | 
489 | 				unsigned int thisfrom = activeNodesPointer[from+tId]-numPartitionedEdges;
490 | 				unsigned int thisto = thisfrom + degree;
491 | 				
492 | 				for(unsigned int i=thisfrom; i<thisto; i++)
493 | 				{
494 | 					atomicAdd(&delta[edgeList[i].end], sourcePR);
495 | 				}				
496 | 			}
497 | 			
498 | 			atomicAdd(&delta[id], -thisDelta);
499 | 		}
500 | 		
501 | 	}
502 | }
503 | 
504 | 
505 | 
506 | __global__ void clearLabel(unsigned int * activeNodes, bool *label, unsigned int size, unsigned int from)
507 | {
508 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
509 | 	if(id < size)
510 | 	{
511 | 		label[activeNodes[id+from]] = false;
512 | 	}
513 | }
514 | 
515 | __global__ void mixLabels(unsigned int * activeNodes, bool *label1, bool *label2, unsigned int size, unsigned int from)
516 | {
517 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
518 | 	if(id < size){
519 | 		int nID = activeNodes[id+from];
520 | 		label1[nID] = label1[nID] || label2[nID];
521 | 		label2[nID] = false;	
522 | 	}
523 | }
524 | 
525 | __global__ void moveUpLabels(unsigned int * activeNodes, bool *label1, bool *label2, unsigned int size, unsigned int from)
526 | {
527 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
528 | 	unsigned int nID;
529 | 	if(id < size){
530 | 		nID = activeNodes[id+from];
531 | 		label1[nID] = label2[nID];
532 | 		label2[nID] = false;	
533 | 	}
534 | }
535 | 
536 | 


--------------------------------------------------------------------------------
/shared/gpu_kernels.cuh:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "globals.hpp"
  3 | #include "graph.cuh"
  4 | #include "subgraph.cuh"
  5 | 
  6 | 
  7 | __global__ void bfs_kernel(unsigned int numNodes,
  8 | 							unsigned int from,
  9 | 							unsigned int numPartitionedEdges,
 10 | 							unsigned int *activeNodes,
 11 | 							unsigned int *activeNodesPointer,
 12 | 							OutEdge *edgeList,
 13 | 							unsigned int *outDegree,
 14 | 							unsigned int *value,
 15 | 							//bool *finished,
 16 | 							bool *label1,
 17 | 							bool *label2);
 18 | 
 19 | __global__ void cc_kernel(unsigned int numNodes,
 20 | 							unsigned int from,
 21 | 							unsigned int numPartitionedEdges,
 22 | 							unsigned int *activeNodes,
 23 | 							unsigned int *activeNodesPointer,
 24 | 							OutEdge *edgeList,
 25 | 							unsigned int *outDegree,
 26 | 							unsigned int *dist,
 27 | 							//bool *finished,
 28 | 							bool *label1,
 29 | 							bool *label2);
 30 | 
 31 | __global__ void sssp_kernel(unsigned int numNodes,
 32 | 							unsigned int from,
 33 | 							unsigned int numPartitionedEdges,
 34 | 							unsigned int *activeNodes,
 35 | 							unsigned int *activeNodesPointer,
 36 | 							OutEdgeWeighted *edgeList,
 37 | 							unsigned int *outDegree,
 38 | 							unsigned int *dist,
 39 | 							//bool *finished,
 40 | 							bool *label1,
 41 | 							bool *label2);
 42 | 							
 43 | __global__ void sswp_kernel(unsigned int numNodes,
 44 | 							unsigned int from,
 45 | 							unsigned int numPartitionedEdges,
 46 | 							unsigned int *activeNodes,
 47 | 							unsigned int *activeNodesPointer,
 48 | 							OutEdgeWeighted *edgeList,
 49 | 							unsigned int *outDegree,
 50 | 							unsigned int *dist,
 51 | 							//bool *finished,
 52 | 							bool *label1,
 53 | 							bool *label2);
 54 | 							
 55 | __global__ void pr_kernel(unsigned int numNodes,
 56 | 							unsigned int from,
 57 | 							unsigned int numPartitionedEdges,
 58 | 							unsigned int *activeNodes,
 59 | 							unsigned int *activeNodesPointer,
 60 | 							OutEdge *edgeList,
 61 | 							unsigned int *outDegree,
 62 | 							float *dist,
 63 | 							float *delta,
 64 | 							//bool *finished,
 65 | 							float acc);						
 66 | 
 67 | __global__ void bfs_async(unsigned int numNodes,
 68 | 							unsigned int from,
 69 | 							unsigned int numPartitionedEdges,
 70 | 							unsigned int *activeNodes,
 71 | 							unsigned int *activeNodesPointer,
 72 | 							OutEdge *edgeList,
 73 | 							unsigned int *outDegree,
 74 | 							unsigned int *dist,
 75 | 							bool *finished,
 76 | 							bool *label1,
 77 | 							bool *label2);	
 78 | 							
 79 | __global__ void sssp_async(unsigned int numNodes,
 80 | 							unsigned int from,
 81 | 							unsigned int numPartitionedEdges,
 82 | 							unsigned int *activeNodes,
 83 | 							unsigned int *activeNodesPointer,
 84 | 							OutEdgeWeighted *edgeList,
 85 | 							unsigned int *outDegree,
 86 | 							unsigned int *dist,
 87 | 							bool *finished,
 88 | 							bool *label1,
 89 | 							bool *label2);
 90 | 							
 91 | __global__ void sswp_async(unsigned int numNodes,
 92 | 							unsigned int from,
 93 | 							unsigned int numPartitionedEdges,
 94 | 							unsigned int *activeNodes,
 95 | 							unsigned int *activeNodesPointer,
 96 | 							OutEdgeWeighted *edgeList,
 97 | 							unsigned int *outDegree,
 98 | 							unsigned int *dist,
 99 | 							bool *finished,
100 | 							bool *label1,
101 | 							bool *label2);
102 | 							
103 | __global__ void cc_async(unsigned int numNodes,
104 | 							unsigned int from,
105 | 							unsigned int numPartitionedEdges,
106 | 							unsigned int *activeNodes,
107 | 							unsigned int *activeNodesPointer,
108 | 							OutEdge *edgeList,
109 | 							unsigned int *outDegree,
110 | 							unsigned int *dist,
111 | 							bool *finished,
112 | 							bool *label1,
113 | 							bool *label2);		
114 | 							
115 | __global__ void pr_async(unsigned int numNodes,
116 | 							unsigned int from,
117 | 							unsigned int numPartitionedEdges,
118 | 							unsigned int *activeNodes,
119 | 							unsigned int *activeNodesPointer,
120 | 							OutEdge *edgeList,
121 | 							unsigned int *outDegree,
122 | 							float *dist,
123 | 							float *delta,
124 | 							bool *finished,
125 | 							float acc);	
126 | 
127 | __global__ void clearLabel(unsigned int * activeNodes, bool *label, unsigned int size, unsigned int from);
128 | 
129 | __global__ void mixLabels(unsigned int * activeNodes, bool *label1, bool *label2, unsigned int size, unsigned int from);
130 | 
131 | __global__ void moveUpLabels(unsigned int * activeNodes, bool *label1, bool *label2, unsigned int size, unsigned int from);
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/shared/graph.cu:
--------------------------------------------------------------------------------
  1 | #include "graph.cuh"
  2 | #include "gpu_error_check.cuh"
  3 | 
  4 | template <class E>
  5 | Graph<E>::Graph(string graphFilePath, bool isWeighted)
  6 | {
  7 | 	this->graphFilePath = graphFilePath;
  8 | 	this->isWeighted = isWeighted;
  9 | }
 10 | 
 11 | template <class E>
 12 | string Graph<E>::GetFileExtension(string fileName)
 13 | {
 14 |     if(fileName.find_last_of(".") != string::npos)
 15 |         return fileName.substr(fileName.find_last_of(".")+1);
 16 |     return "";
 17 | }
 18 | 
 19 | template <>
 20 | void Graph<OutEdgeWeighted>::AssignW8(uint w8, uint index)
 21 | {
 22 |     edgeList[index].w8 = w8;
 23 | }
 24 | 
 25 | template <>
 26 | void Graph<OutEdge>::AssignW8(uint w8, uint index)
 27 | {
 28 |     edgeList[index].end = edgeList[index].end; // do nothing
 29 | }
 30 | 
 31 | template <class E>
 32 | void Graph<E>::ReadGraph()
 33 | {
 34 | 
 35 | 	cout << "Reading the input graph from the following file:\n>> " << graphFilePath << endl;
 36 | 	
 37 | 	this->graphFormat = GetFileExtension(graphFilePath);
 38 | 	
 39 | 	if(graphFormat == "bcsr" || graphFormat == "bwcsr")
 40 | 	{
 41 | 		ifstream infile (graphFilePath, ios::in | ios::binary);
 42 | 	
 43 | 		infile.read ((char*)&num_nodes, sizeof(uint));
 44 | 		infile.read ((char*)&num_edges, sizeof(uint));
 45 | 		
 46 | 		nodePointer = new uint[num_nodes+1];
 47 | 		gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E)));
 48 | 		
 49 | 		infile.read ((char*)nodePointer, sizeof(uint)*num_nodes);
 50 | 		infile.read ((char*)edgeList, sizeof(E)*num_edges);
 51 | 		nodePointer[num_nodes] = num_edges;
 52 | 	}
 53 | 	else if(graphFormat == "el" || graphFormat == "wel")
 54 | 	{
 55 | 		ifstream infile;
 56 | 		infile.open(graphFilePath);
 57 | 		stringstream ss;
 58 | 		uint max = 0;
 59 | 		string line;
 60 | 		uint edgeCounter = 0;
 61 | 		if(isWeighted)
 62 | 		{
 63 | 			vector<EdgeWeighted> edges;
 64 | 			EdgeWeighted newEdge;
 65 | 			while(getline( infile, line ))
 66 | 			{
 67 | 				ss.str("");
 68 | 				ss.clear();
 69 | 				ss << line;
 70 | 				
 71 | 				ss >> newEdge.source;
 72 | 				ss >> newEdge.end;
 73 | 				ss >> newEdge.w8;
 74 | 				
 75 | 				edges.push_back(newEdge);
 76 | 				edgeCounter++;
 77 | 				
 78 | 				if(max < newEdge.source)
 79 | 					max = newEdge.source;
 80 | 				if(max < newEdge.end)
 81 | 					max = newEdge.end;				
 82 | 			}
 83 | 			infile.close();
 84 | 			num_nodes = max + 1;
 85 | 			num_edges = edgeCounter;
 86 | 			nodePointer = new uint[num_nodes+1];
 87 | 			gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E)));
 88 | 			uint *degree = new uint[num_nodes];
 89 | 			for(uint i=0; i<num_nodes; i++)
 90 | 				degree[i] = 0;
 91 | 			for(uint i=0; i<num_edges; i++)
 92 | 				degree[edges[i].source]++;
 93 | 			
 94 | 			uint counter=0;
 95 | 			for(uint i=0; i<num_nodes; i++)
 96 | 			{
 97 | 				nodePointer[i] = counter;
 98 | 				counter = counter + degree[i];
 99 | 			}
100 | 			nodePointer[num_nodes] = num_edges;
101 | 			uint *outDegreeCounter  = new uint[num_nodes];
102 | 			uint location;  
103 | 			for(uint i=0; i<num_edges; i++)
104 | 			{
105 | 				location = nodePointer[edges[i].source] + outDegreeCounter[edges[i].source];
106 | 				edgeList[location].end = edges[i].end;
107 | 				if(isWeighted)
108 | 					AssignW8(edges[i].w8, location);
109 | 					//edgeList[location].w8 = edges[i].w8;
110 | 				outDegreeCounter[edges[i].source]++;  
111 | 			}
112 | 			edges.clear();
113 | 			delete[] degree;
114 | 			delete[] outDegreeCounter;
115 | 			
116 | 		}
117 | 		else
118 | 		{
119 | 			vector<Edge> edges;
120 | 			Edge newEdge;
121 | 			while(getline( infile, line ))
122 | 			{
123 | 				ss.str("");
124 | 				ss.clear();
125 | 				ss << line;
126 | 				
127 | 				ss >> newEdge.source;
128 | 				ss >> newEdge.end;
129 | 				
130 | 				edges.push_back(newEdge);
131 | 				edgeCounter++;
132 | 				
133 | 				if(max < newEdge.source)
134 | 					max = newEdge.source;
135 | 				if(max < newEdge.end)
136 | 					max = newEdge.end;				
137 | 			}
138 | 			infile.close();
139 | 			num_nodes = max + 1;
140 | 			num_edges = edgeCounter;
141 | 			nodePointer = new uint[num_nodes+1];
142 | 			gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E)));
143 | 			uint *degree = new uint[num_nodes];
144 | 			for(uint i=0; i<num_nodes; i++)
145 | 				degree[i] = 0;
146 | 			for(uint i=0; i<num_edges; i++)
147 | 				degree[edges[i].source]++;
148 | 			
149 | 			uint counter=0;
150 | 			for(uint i=0; i<num_nodes; i++)
151 | 			{
152 | 				nodePointer[i] = counter;
153 | 				counter = counter + degree[i];
154 | 			}
155 | 			nodePointer[num_nodes] = num_edges;
156 | 			uint *outDegreeCounter  = new uint[num_nodes];
157 | 			uint location;  
158 | 			for(uint i=0; i<num_edges; i++)
159 | 			{
160 | 				location = nodePointer[edges[i].source] + outDegreeCounter[edges[i].source];
161 | 				edgeList[location].end = edges[i].end;
162 | 				//if(isWeighted)
163 | 				//	edgeList[location].w8 = edges[i].w8;
164 | 				outDegreeCounter[edges[i].source]++;  
165 | 			}
166 | 			edges.clear();
167 | 			delete[] degree;
168 | 			delete[] outDegreeCounter;						
169 | 		}
170 | 	}
171 | 	else
172 | 	{
173 | 		cout << "The graph format is not supported!\n";
174 | 		exit(-1);
175 | 	}
176 | 	
177 | 	outDegree  = new unsigned int[num_nodes];
178 | 	
179 | 	for(uint i=1; i<num_nodes-1; i++)
180 | 		outDegree[i-1] = nodePointer[i] - nodePointer[i-1];
181 | 	outDegree[num_nodes-1] = num_edges - nodePointer[num_nodes-1];
182 | 	
183 | 	label1 = new bool[num_nodes];
184 | 	label2 = new bool[num_nodes];
185 | 	value  = new unsigned int[num_nodes];
186 | 	
187 | 	gpuErrorcheck(cudaMalloc(&d_outDegree, num_nodes * sizeof(unsigned int)));
188 | 	gpuErrorcheck(cudaMalloc(&d_value, num_nodes * sizeof(unsigned int)));
189 | 	gpuErrorcheck(cudaMalloc(&d_label1, num_nodes * sizeof(bool)));
190 | 	gpuErrorcheck(cudaMalloc(&d_label2, num_nodes * sizeof(bool)));
191 | 	
192 | 	cout << "Done reading.\n";
193 | 	cout << "Number of nodes = " << num_nodes << endl;
194 | 	cout << "Number of edges = " << num_edges << endl;
195 | 
196 | 
197 | }
198 | 
199 | //--------------------------------------
200 | 
201 | template <class E>
202 | GraphPR<E>::GraphPR(string graphFilePath, bool isWeighted)
203 | {
204 | 	this->graphFilePath = graphFilePath;
205 | 	this->isWeighted = isWeighted;
206 | }
207 | 
208 | template <class E>
209 | string GraphPR<E>::GetFileExtension(string fileName)
210 | {
211 |     if(fileName.find_last_of(".") != string::npos)
212 |         return fileName.substr(fileName.find_last_of(".")+1);
213 |     return "";
214 | }
215 | 
216 | template <>
217 | void GraphPR<OutEdgeWeighted>::AssignW8(uint w8, uint index)
218 | {
219 |     edgeList[index].w8 = w8;
220 | }
221 | 
222 | template <>
223 | void GraphPR<OutEdge>::AssignW8(uint w8, uint index)
224 | {
225 |     edgeList[index].end = edgeList[index].end; // do nothing
226 | }
227 | 
228 | template <class E>
229 | void GraphPR<E>::ReadGraph()
230 | {
231 | 
232 | 	cout << "Reading the input graph from the following file:\n>> " << graphFilePath << endl;
233 | 	
234 | 	this->graphFormat = GetFileExtension(graphFilePath);
235 | 	
236 | 	if(graphFormat == "bcsr" || graphFormat == "bwcsr")
237 | 	{
238 | 		ifstream infile (graphFilePath, ios::in | ios::binary);
239 | 	
240 | 		infile.read ((char*)&num_nodes, sizeof(uint));
241 | 		infile.read ((char*)&num_edges, sizeof(uint));
242 | 		
243 | 		nodePointer = new uint[num_nodes+1];
244 | 		gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E)));
245 | 		
246 | 		infile.read ((char*)nodePointer, sizeof(uint)*num_nodes);
247 | 		infile.read ((char*)edgeList, sizeof(E)*num_edges);
248 | 		nodePointer[num_nodes] = num_edges;
249 | 	}
250 | 	else if(graphFormat == "el" || graphFormat == "wel")
251 | 	{
252 | 		ifstream infile;
253 | 		infile.open(graphFilePath);
254 | 		stringstream ss;
255 | 		uint max = 0;
256 | 		string line;
257 | 		uint edgeCounter = 0;
258 | 		if(isWeighted)
259 | 		{
260 | 			vector<EdgeWeighted> edges;
261 | 			EdgeWeighted newEdge;
262 | 			while(getline( infile, line ))
263 | 			{
264 | 				ss.str("");
265 | 				ss.clear();
266 | 				ss << line;
267 | 				
268 | 				ss >> newEdge.source;
269 | 				ss >> newEdge.end;
270 | 				ss >> newEdge.w8;
271 | 				
272 | 				edges.push_back(newEdge);
273 | 				edgeCounter++;
274 | 				
275 | 				if(max < newEdge.source)
276 | 					max = newEdge.source;
277 | 				if(max < newEdge.end)
278 | 					max = newEdge.end;				
279 | 			}
280 | 			infile.close();
281 | 			num_nodes = max + 1;
282 | 			num_edges = edgeCounter;
283 | 			nodePointer = new uint[num_nodes+1];
284 | 			gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E)));
285 | 			uint *degree = new uint[num_nodes];
286 | 			for(uint i=0; i<num_nodes; i++)
287 | 				degree[i] = 0;
288 | 			for(uint i=0; i<num_edges; i++)
289 | 				degree[edges[i].source]++;
290 | 			
291 | 			uint counter=0;
292 | 			for(uint i=0; i<num_nodes; i++)
293 | 			{
294 | 				nodePointer[i] = counter;
295 | 				counter = counter + degree[i];
296 | 			}
297 | 			nodePointer[num_nodes] = num_edges;
298 | 			uint *outDegreeCounter  = new uint[num_nodes];
299 | 			uint location;  
300 | 			for(uint i=0; i<num_edges; i++)
301 | 			{
302 | 				location = nodePointer[edges[i].source] + outDegreeCounter[edges[i].source];
303 | 				edgeList[location].end = edges[i].end;
304 | 				if(isWeighted)
305 | 					AssignW8(edges[i].w8, location);
306 | 					//edgeList[location].w8 = edges[i].w8;
307 | 				outDegreeCounter[edges[i].source]++;  
308 | 			}
309 | 			edges.clear();
310 | 			delete[] degree;
311 | 			delete[] outDegreeCounter;
312 | 			
313 | 		}
314 | 		else
315 | 		{
316 | 			vector<Edge> edges;
317 | 			Edge newEdge;
318 | 			while(getline( infile, line ))
319 | 			{
320 | 				ss.str("");
321 | 				ss.clear();
322 | 				ss << line;
323 | 				
324 | 				ss >> newEdge.source;
325 | 				ss >> newEdge.end;
326 | 				
327 | 				edges.push_back(newEdge);
328 | 				edgeCounter++;
329 | 				
330 | 				if(max < newEdge.source)
331 | 					max = newEdge.source;
332 | 				if(max < newEdge.end)
333 | 					max = newEdge.end;				
334 | 			}
335 | 			infile.close();
336 | 			num_nodes = max + 1;
337 | 			num_edges = edgeCounter;
338 | 			nodePointer = new uint[num_nodes+1];
339 | 			gpuErrorcheck(cudaMallocHost(&edgeList, (num_edges) * sizeof(E)));
340 | 			uint *degree = new uint[num_nodes];
341 | 			for(uint i=0; i<num_nodes; i++)
342 | 				degree[i] = 0;
343 | 			for(uint i=0; i<num_edges; i++)
344 | 				degree[edges[i].source]++;
345 | 			
346 | 			uint counter=0;
347 | 			for(uint i=0; i<num_nodes; i++)
348 | 			{
349 | 				nodePointer[i] = counter;
350 | 				counter = counter + degree[i];
351 | 			}
352 | 			nodePointer[num_nodes] = num_edges;
353 | 			uint *outDegreeCounter  = new uint[num_nodes];
354 | 			uint location;  
355 | 			for(uint i=0; i<num_edges; i++)
356 | 			{
357 | 				location = nodePointer[edges[i].source] + outDegreeCounter[edges[i].source];
358 | 				edgeList[location].end = edges[i].end;
359 | 				//if(isWeighted)
360 | 				//	edgeList[location].w8 = edges[i].w8;
361 | 				outDegreeCounter[edges[i].source]++;  
362 | 			}
363 | 			edges.clear();
364 | 			delete[] degree;
365 | 			delete[] outDegreeCounter;						
366 | 		}
367 | 	}
368 | 	else
369 | 	{
370 | 		cout << "The graph format is not supported!\n";
371 | 		exit(-1);
372 | 	}
373 | 	
374 | 	outDegree  = new unsigned int[num_nodes];
375 | 	
376 | 	for(uint i=1; i<num_nodes-1; i++)
377 | 		outDegree[i-1] = nodePointer[i] - nodePointer[i-1];
378 | 	outDegree[num_nodes-1] = num_edges - nodePointer[num_nodes-1];
379 | 	
380 | 
381 | 	value  = new float[num_nodes];
382 | 	delta  = new float[num_nodes];
383 | 	
384 | 	gpuErrorcheck(cudaMalloc(&d_outDegree, num_nodes * sizeof(unsigned int)));
385 | 	gpuErrorcheck(cudaMalloc(&d_value, num_nodes * sizeof(float)));
386 | 	gpuErrorcheck(cudaMalloc(&d_delta, num_nodes * sizeof(float)));
387 | 	
388 | 	
389 | 	cout << "Done reading.\n";
390 | 	cout << "Number of nodes = " << num_nodes << endl;
391 | 	cout << "Number of edges = " << num_edges << endl;
392 | 	
393 | 
394 | }
395 | 
396 | 
397 | template class Graph<OutEdge>;
398 | template class Graph<OutEdgeWeighted>;
399 | 
400 | template class GraphPR<OutEdge>;
401 | template class GraphPR<OutEdgeWeighted>;
402 | 


--------------------------------------------------------------------------------
/shared/graph.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPH_CUH
 2 | #define GRAPH_CUH
 3 | 
 4 | 
 5 | #include "globals.hpp"
 6 | 
 7 | template <class E>
 8 | class Graph
 9 | {
10 | private:
11 | 
12 | public:
13 | 	string graphFilePath;
14 | 	bool isWeighted;
15 | 	bool isLarge;
16 | 	uint num_nodes;
17 | 	uint num_edges;
18 | 	uint *nodePointer;
19 | 	E *edgeList;
20 | 	uint *outDegree;
21 | 	bool *label1;
22 | 	bool *label2;
23 | 	uint *value;
24 | 	uint *d_outDegree;
25 | 	uint *d_value;
26 | 	bool *d_label1;
27 | 	bool *d_label2;
28 | 	string graphFormat;
29 |     Graph(string graphFilePath, bool isWeighted);
30 |     string GetFileExtension(string fileName);
31 |     void AssignW8(uint w8, uint index);
32 |     void ReadGraph();
33 | };
34 | 
35 | template <class E>
36 | class GraphPR
37 | {
38 | private:
39 | 
40 | public:
41 | 	string graphFilePath;
42 | 	bool isWeighted;
43 | 	bool isLarge;
44 | 	uint num_nodes;
45 | 	uint num_edges;
46 | 	uint *nodePointer;
47 | 	E *edgeList;
48 | 	uint *outDegree;
49 | 	float *value;
50 | 	float *delta;
51 | 	uint *d_outDegree;
52 | 	float *d_value;
53 | 	float *d_delta;
54 | 	string graphFormat;
55 |     GraphPR(string graphFilePath, bool isWeighted);
56 |     string GetFileExtension(string fileName);
57 |     void AssignW8(uint w8, uint index);
58 |     void ReadGraph();
59 | };
60 | 
61 | #endif	//	GRAPH_CUH
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/shared/partitioner.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "partitioner.cuh"
  3 | #include "gpu_error_check.cuh"
  4 | 
  5 | template <class E>
  6 | Partitioner<E>::Partitioner()
  7 | {
  8 | 	reset();
  9 | }
 10 | 
 11 | template <class E>
 12 | void Partitioner<E>::partition(Subgraph<E> &subgraph, uint numActiveNodes)
 13 | {
 14 | 	reset();
 15 | 	
 16 | 	unsigned int from, to;
 17 | 	unsigned int left, right, mid;
 18 | 	unsigned int partitionSize;
 19 | 	unsigned int numNodesInPartition;
 20 | 	unsigned int numPartitionedEdges;
 21 | 	bool foundTo;
 22 | 	unsigned int accurCount;
 23 | 	
 24 | 	
 25 | 	from = 0;
 26 | 	to = numActiveNodes; // last in pointers
 27 | 	numPartitionedEdges = 0;
 28 | 	
 29 | 	do
 30 | 	{
 31 | 		left = from;
 32 | 		right = numActiveNodes;
 33 | 
 34 | 		//cout << "#active nodes: " << numActiveNodes << endl;
 35 | 		//cout << "left: " << left << "    right: " << right << endl;
 36 | 		//cout << "pointer to left: " << subgraph.activeNodesPointer[left] << "    pointer to right: " << subgraph.activeNodesPointer[right] << endl;
 37 | 
 38 | 		partitionSize = subgraph.activeNodesPointer[right] - subgraph.activeNodesPointer[left];
 39 | 		if(partitionSize <= subgraph.max_partition_size)
 40 | 		{
 41 | 			to = right;
 42 | 		}
 43 | 		else
 44 | 		{
 45 | 			foundTo = false;
 46 | 			accurCount = 10;
 47 | 			while(foundTo==false || accurCount>0)
 48 | 			{
 49 | 				mid = (left + right)/2;
 50 | 				partitionSize = subgraph.activeNodesPointer[mid] - subgraph.activeNodesPointer[from];
 51 | 				if(foundTo == true)
 52 | 					accurCount--;
 53 | 				if(partitionSize <= subgraph.max_partition_size)
 54 | 				{
 55 | 					left = mid;
 56 | 					to = mid;
 57 | 					foundTo = true;
 58 | 				}
 59 | 				else
 60 | 				{
 61 | 					right = mid;  
 62 | 				}
 63 | 			}
 64 | 			
 65 | 
 66 | 			if(to == numActiveNodes)
 67 | 			{
 68 | 				cout << "Error in Partitioning...\n";
 69 | 				exit(-1);
 70 | 			}
 71 | 
 72 | 		}
 73 | 
 74 | 		partitionSize = subgraph.activeNodesPointer[to] - subgraph.activeNodesPointer[from];
 75 | 		numNodesInPartition = to - from;
 76 | 
 77 | 		//cout << "from: " << from << "   to: " << to << endl;
 78 | 		//cout << "#nodes in P: " << numNodesInPartition << "    #edges in P: " << partitionSize << endl;
 79 | 		
 80 | 		fromNode.push_back(from);
 81 | 		fromEdge.push_back(numPartitionedEdges);
 82 | 		partitionNodeSize.push_back(numNodesInPartition);
 83 | 		partitionEdgeSize.push_back(partitionSize);
 84 | 		
 85 | 		from = to;
 86 | 		numPartitionedEdges += partitionSize;
 87 | 	
 88 | 	} while (to != numActiveNodes);
 89 | 	
 90 | 	numPartitions = fromNode.size();
 91 | }
 92 | 
 93 | template <class E>
 94 | void Partitioner<E>::reset()
 95 | {
 96 | 	fromNode.clear();
 97 | 	fromEdge.clear();
 98 | 	partitionNodeSize.clear();
 99 | 	partitionEdgeSize.clear();
100 | 	numPartitions = 0;
101 | }
102 | 
103 | template class Partitioner<OutEdge>;
104 | template class Partitioner<OutEdgeWeighted>;
105 | 


--------------------------------------------------------------------------------
/shared/partitioner.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef PARTITIONER_CUH
 2 | #define PARTITIONER_CUH
 3 | 
 4 | 
 5 | #include "globals.hpp"
 6 | #include "subgraph.cuh"
 7 | 
 8 | template <class E>
 9 | class Partitioner
10 | {
11 | private:
12 | 
13 | public:
14 | 	uint numPartitions;
15 | 	vector<uint> fromNode;
16 | 	vector<uint> fromEdge;
17 | 	vector<uint> partitionNodeSize;
18 | 	vector<uint> partitionEdgeSize;
19 | 	Partitioner();
20 |     void partition(Subgraph<E> &subgraph, uint numActiveNodes);
21 |     void reset();
22 | };
23 | 
24 | #endif	//	PARTITIONER_CUH
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/shared/subgraph.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "subgraph.cuh"
 3 | #include "gpu_error_check.cuh"
 4 | #include "graph.cuh"
 5 | #include <cuda_profiler_api.h>
 6 | 
 7 | 
 8 | template <class E>
 9 | Subgraph<E>::Subgraph(uint num_nodes, uint num_edges)
10 | {
11 | 	cudaProfilerStart();
12 | 	cudaError_t error;
13 | 	cudaDeviceProp dev;
14 | 	int deviceID;
15 | 	cudaGetDevice(&deviceID);
16 | 	error = cudaGetDeviceProperties(&dev, deviceID);
17 | 	if(error != cudaSuccess)
18 | 	{
19 | 		printf("Error: %s\n", cudaGetErrorString(error));
20 | 		exit(-1);
21 | 	}
22 | 	cudaProfilerStop();
23 | 	
24 | 	max_partition_size = 0.9 * (dev.totalGlobalMem - 8*4*num_nodes) / sizeof(E);
25 | 	//max_partition_size = 1000000000;
26 | 	
27 | 	if(max_partition_size > DIST_INFINITY)
28 | 		max_partition_size = DIST_INFINITY;
29 | 	
30 | 	//cout << "Max Partition Size: " << max_partition_size << endl;
31 | 	
32 | 	this->num_nodes = num_nodes;
33 | 	this->num_edges = num_edges;
34 | 	
35 | 	gpuErrorcheck(cudaMallocHost(&activeNodes, num_nodes * sizeof(uint)));
36 | 	gpuErrorcheck(cudaMallocHost(&activeNodesPointer, (num_nodes+1) * sizeof(uint)));
37 | 	gpuErrorcheck(cudaMallocHost(&activeEdgeList, num_edges * sizeof(E)));
38 | 	
39 | 	gpuErrorcheck(cudaMalloc(&d_activeNodes, num_nodes * sizeof(unsigned int)));
40 | 	gpuErrorcheck(cudaMalloc(&d_activeNodesPointer, (num_nodes+1) * sizeof(unsigned int)));
41 | 	gpuErrorcheck(cudaMalloc(&d_activeEdgeList, (max_partition_size) * sizeof(E)));
42 | }
43 | 
44 | template class Subgraph<OutEdge>;
45 | template class Subgraph<OutEdgeWeighted>;
46 | 
47 | // For initialization with one active node
48 | //unsigned int numActiveNodes = 1;
49 | //subgraph.activeNodes[0] = SOURCE_NODE;
50 | //for(unsigned int i=graph.nodePointer[SOURCE_NODE], j=0; i<graph.nodePointer[SOURCE_NODE] + graph.outDegree[SOURCE_NODE]; i++, j++)
51 | //	subgraph.activeEdgeList[j] = graph.edgeList[i];
52 | //subgraph.activeNodesPointer[0] = 0;
53 | //subgraph.activeNodesPointer[1] = graph.outDegree[SOURCE_NODE];
54 | //gpuErrorcheck(cudaMemcpy(subgraph.d_activeNodes, subgraph.activeNodes, numActiveNodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
55 | //gpuErrorcheck(cudaMemcpy(subgraph.d_activeNodesPointer, subgraph.activeNodesPointer, (numActiveNodes+1) * sizeof(unsigned int), cudaMemcpyHostToDevice));
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/shared/subgraph.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef SUBGRAPH_HPP
 2 | #define SUBGRAPH_HPP
 3 | 
 4 | 
 5 | #include "globals.hpp"
 6 | 
 7 | 
 8 | template <class E>
 9 | class Subgraph
10 | {
11 | private:
12 | 
13 | public:
14 | 	uint num_nodes;
15 | 	uint num_edges;
16 | 	uint numActiveNodes;
17 | 	
18 | 	uint *activeNodes;
19 | 	uint *activeNodesPointer;
20 | 	E *activeEdgeList;
21 | 	
22 | 	uint *d_activeNodes;
23 | 	uint *d_activeNodesPointer;
24 | 	E *d_activeEdgeList;
25 | 	
26 | 	ull max_partition_size;
27 | 	
28 | 	Subgraph(uint num_nodes, uint num_edges);
29 | };
30 | 
31 | #endif	//	SUBGRAPH_HPP
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/shared/subgraph_generator.cu:
--------------------------------------------------------------------------------
  1 | #include "subgraph_generator.cuh"
  2 | #include "graph.cuh"
  3 | #include "subgraph.cuh"
  4 | #include "gpu_error_check.cuh"
  5 | 
  6 | const unsigned int NUM_THREADS = 64;
  7 | 
  8 | const unsigned int THRESHOLD_THREAD = 50000;
  9 | 
 10 | __global__ void prePrefix(unsigned int *activeNodesLabeling, unsigned int *activeNodesDegree, 
 11 | 							unsigned int *outDegree, bool *label1, bool *label2, unsigned int numNodes)
 12 | {
 13 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
 14 | 	if(id < numNodes){
 15 | 		activeNodesLabeling[id] = label1[id] || label2[id]; // label1 is always zero in sync
 16 | 		//activeNodesLabeling[id] = label[id];
 17 | 		//activeNodesLabeling[id] = 1;
 18 | 		activeNodesDegree[id] = 0;
 19 | 		if(activeNodesLabeling[id] == 1)
 20 | 			activeNodesDegree[id] = outDegree[id];	
 21 | 	}	
 22 | }
 23 | 
 24 | __global__ void prePrefix(unsigned int *activeNodesLabeling, unsigned int *activeNodesDegree, 
 25 | 							unsigned int *outDegree, float *delta, unsigned int numNodes, float acc)
 26 | {
 27 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
 28 | 	if(id < numNodes){
 29 | 		if(delta[id] > acc)
 30 | 		{
 31 | 			activeNodesLabeling[id] = 1;
 32 | 		}
 33 | 		else
 34 | 		{
 35 | 			activeNodesLabeling[id] = 0;
 36 | 		}
 37 | 		activeNodesDegree[id] = 0;
 38 | 		if(activeNodesLabeling[id] == 1)
 39 | 			activeNodesDegree[id] = outDegree[id];	
 40 | 	}
 41 | }
 42 | 
 43 | __global__ void makeQueue(unsigned int *activeNodes, unsigned int *activeNodesLabeling,
 44 | 							unsigned int *prefixLabeling, unsigned int numNodes)
 45 | {
 46 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
 47 | 	if(id < numNodes && activeNodesLabeling[id] == 1){
 48 | 		activeNodes[prefixLabeling[id]] = id;
 49 | 	}
 50 | }
 51 | 
 52 | __global__ void makeActiveNodesPointer(unsigned int *activeNodesPointer, unsigned int *activeNodesLabeling, 
 53 | 											unsigned int *prefixLabeling, unsigned int *prefixSumDegrees, 
 54 | 											unsigned int numNodes)
 55 | {
 56 | 	unsigned int id = blockDim.x * blockIdx.x + threadIdx.x;
 57 | 	if(id < numNodes && activeNodesLabeling[id] == 1){
 58 | 		activeNodesPointer[prefixLabeling[id]] = prefixSumDegrees[id];
 59 | 	}
 60 | }
 61 | 
 62 | // pthread
 63 | template <class E>
 64 | void dynamic(unsigned int tId,
 65 | 				unsigned int numThreads,	
 66 | 				unsigned int numActiveNodes,
 67 | 				unsigned int *activeNodes,
 68 | 				unsigned int *outDegree, 
 69 | 				unsigned int *activeNodesPointer,
 70 | 				unsigned int *nodePointer, 
 71 | 				E *activeEdgeList,
 72 | 				E *edgeList)
 73 | {
 74 | 
 75 | 	unsigned int chunkSize = ceil(numActiveNodes / numThreads);
 76 | 	unsigned int left, right;
 77 | 	left = tId * chunkSize;
 78 | 	right = min(left+chunkSize, numActiveNodes);	
 79 | 	
 80 | 	unsigned int thisNode;
 81 | 	unsigned int thisDegree;
 82 | 	unsigned int fromHere;
 83 | 	unsigned int fromThere;
 84 | 
 85 | 	for(unsigned int i=left; i<right; i++)
 86 | 	{
 87 | 		thisNode = activeNodes[i];
 88 | 		thisDegree = outDegree[thisNode];
 89 | 		fromHere = activeNodesPointer[i];
 90 | 		fromThere = nodePointer[thisNode];
 91 | 		for(unsigned int j=0; j<thisDegree; j++)
 92 | 		{
 93 | 			activeEdgeList[fromHere+j] = edgeList[fromThere+j];
 94 | 		}
 95 | 	}
 96 | 	
 97 | }
 98 | 
 99 | template <class E>
100 | SubgraphGenerator<E>::SubgraphGenerator(Graph<E> &graph)
101 | {
102 | 	gpuErrorcheck(cudaMallocHost(&activeNodesLabeling, graph.num_nodes * sizeof(unsigned int)));
103 | 	gpuErrorcheck(cudaMallocHost(&activeNodesDegree, graph.num_nodes * sizeof(unsigned int)));
104 | 	gpuErrorcheck(cudaMallocHost(&prefixLabeling, graph.num_nodes * sizeof(unsigned int)));
105 | 	gpuErrorcheck(cudaMallocHost(&prefixSumDegrees, (graph.num_nodes+1) * sizeof(unsigned int)));
106 | 	
107 | 	gpuErrorcheck(cudaMalloc(&d_activeNodesLabeling, graph.num_nodes * sizeof(unsigned int)));
108 | 	gpuErrorcheck(cudaMalloc(&d_activeNodesDegree, graph.num_nodes * sizeof(unsigned int)));
109 | 	gpuErrorcheck(cudaMalloc(&d_prefixLabeling, graph.num_nodes * sizeof(unsigned int)));
110 | 	gpuErrorcheck(cudaMalloc(&d_prefixSumDegrees , (graph.num_nodes+1) * sizeof(unsigned int)));
111 | }
112 | 
113 | template <class E>
114 | SubgraphGenerator<E>::SubgraphGenerator(GraphPR<E> &graph)
115 | {
116 | 	gpuErrorcheck(cudaMallocHost(&activeNodesLabeling, graph.num_nodes * sizeof(unsigned int)));
117 | 	gpuErrorcheck(cudaMallocHost(&activeNodesDegree, graph.num_nodes * sizeof(unsigned int)));
118 | 	gpuErrorcheck(cudaMallocHost(&prefixLabeling, graph.num_nodes * sizeof(unsigned int)));
119 | 	gpuErrorcheck(cudaMallocHost(&prefixSumDegrees, (graph.num_nodes+1) * sizeof(unsigned int)));
120 | 	
121 | 	gpuErrorcheck(cudaMalloc(&d_activeNodesLabeling, graph.num_nodes * sizeof(unsigned int)));
122 | 	gpuErrorcheck(cudaMalloc(&d_activeNodesDegree, graph.num_nodes * sizeof(unsigned int)));
123 | 	gpuErrorcheck(cudaMalloc(&d_prefixLabeling, graph.num_nodes * sizeof(unsigned int)));
124 | 	gpuErrorcheck(cudaMalloc(&d_prefixSumDegrees , (graph.num_nodes+1) * sizeof(unsigned int)));
125 | }
126 | 
127 | template <class E>
128 | void SubgraphGenerator<E>::generate(Graph<E> &graph, Subgraph<E> &subgraph)
129 | {
130 | 	//std::chrono::time_point<std::chrono::system_clock> startDynG, finishDynG;
131 | 	//startDynG = std::chrono::system_clock::now();
132 | 	
133 | 	prePrefix<<<graph.num_nodes/512+1, 512>>>(d_activeNodesLabeling, d_activeNodesDegree, graph.d_outDegree, graph.d_label1, graph.d_label2, graph.num_nodes);
134 | 		
135 | 	thrust::device_ptr<unsigned int> ptr_labeling(d_activeNodesLabeling);
136 | 	thrust::device_ptr<unsigned int> ptr_labeling_prefixsum(d_prefixLabeling);
137 | 	
138 | 	subgraph.numActiveNodes = thrust::reduce(ptr_labeling, ptr_labeling + graph.num_nodes);
139 | 	//cout << "Number of Active Nodes = " << subgraph.numActiveNodes << endl;
140 | 				
141 | 	thrust::exclusive_scan(ptr_labeling, ptr_labeling + graph.num_nodes, ptr_labeling_prefixsum);
142 | 	
143 | 	makeQueue<<<graph.num_nodes/512+1, 512>>>(subgraph.d_activeNodes, d_activeNodesLabeling, d_prefixLabeling, graph.num_nodes);
144 | 	
145 | 	gpuErrorcheck(cudaMemcpy(subgraph.activeNodes, subgraph.d_activeNodes, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost));
146 | 	
147 | 	thrust::device_ptr<unsigned int> ptr_degrees(d_activeNodesDegree);
148 | 	thrust::device_ptr<unsigned int> ptr_degrees_prefixsum(d_prefixSumDegrees);
149 | 	
150 | 	thrust::exclusive_scan(ptr_degrees, ptr_degrees + graph.num_nodes, ptr_degrees_prefixsum);
151 | 	
152 | 	makeActiveNodesPointer<<<graph.num_nodes/512+1, 512>>>(subgraph.d_activeNodesPointer, d_activeNodesLabeling, d_prefixLabeling, d_prefixSumDegrees, graph.num_nodes);
153 | 	gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost));
154 | 	
155 | 	unsigned int numActiveEdges = 0;
156 | 	if(subgraph.numActiveNodes>0)
157 | 		numActiveEdges = subgraph.activeNodesPointer[subgraph.numActiveNodes-1] + graph.outDegree[subgraph.activeNodes[subgraph.numActiveNodes-1]];	
158 | 	
159 | 	unsigned int last = numActiveEdges;
160 | 	gpuErrorcheck(cudaMemcpy(subgraph.d_activeNodesPointer+subgraph.numActiveNodes, &last, sizeof(unsigned int), cudaMemcpyHostToDevice));
161 | 	
162 | 	gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, (subgraph.numActiveNodes+1)*sizeof(unsigned int), cudaMemcpyDeviceToHost));
163 | 	
164 | 	
165 | 	//finishDynG = std::chrono::system_clock::now();
166 | 	//std::chrono::duration<double> elapsed_seconds_dyng = finishDynG-startDynG;
167 | 	//std::time_t finish_time_dyng = std::chrono::system_clock::to_time_t(finishDynG);
168 | 	//std::cout << "Dynamic GPU Time = " << elapsed_seconds_dyng.count() << std::endl;
169 | 	
170 | 	//td::chrono::time_point<std::chrono::system_clock> startDynC, finishDynC;
171 | 	//startDynC = std::chrono::system_clock::now();
172 | 	
173 | 	unsigned int numThreads = NUM_THREADS;
174 | 
175 | 	if(subgraph.numActiveNodes < THRESHOLD_THREAD)
176 | 		numThreads = 1;
177 | 
178 | 	thread runThreads[numThreads];
179 | 	
180 | 	for(unsigned int t=0; t<numThreads; t++)
181 | 	{
182 | 
183 | 		runThreads[t] = thread(dynamic<E>,
184 | 								t,
185 | 								numThreads,
186 | 								subgraph.numActiveNodes,
187 | 								subgraph.activeNodes,
188 | 								graph.outDegree, 
189 | 								subgraph.activeNodesPointer,
190 | 								graph.nodePointer, 
191 | 								subgraph.activeEdgeList,
192 | 								graph.edgeList);
193 | 
194 | 	}
195 | 		
196 | 	for(unsigned int t=0; t<numThreads; t++)
197 | 		runThreads[t].join();
198 | 	
199 | 	//finishDynC = std::chrono::system_clock::now();
200 | 	//std::chrono::duration<double> elapsed_seconds_dync = finishDynC-startDynC;
201 | 	//std::time_t finish_time_dync = std::chrono::system_clock::to_time_t(finishDynC);
202 | 	//std::cout << "Dynamic CPU Time = " << elapsed_seconds_dync.count() << std::endl;
203 | 	
204 | }
205 | 
206 | 
207 | 
208 | template <class E>
209 | void SubgraphGenerator<E>::generate(GraphPR<E> &graph, Subgraph<E> &subgraph, float acc)
210 | {
211 | 	//std::chrono::time_point<std::chrono::system_clock> startDynG, finishDynG;
212 | 	//startDynG = std::chrono::system_clock::now();
213 | 	
214 | 	prePrefix<<<graph.num_nodes/512+1, 512>>>(d_activeNodesLabeling, d_activeNodesDegree, graph.d_outDegree, graph.d_delta, graph.num_nodes, acc);
215 | 		
216 | 	thrust::device_ptr<unsigned int> ptr_labeling(d_activeNodesLabeling);
217 | 	thrust::device_ptr<unsigned int> ptr_labeling_prefixsum(d_prefixLabeling);
218 | 	
219 | 	subgraph.numActiveNodes = thrust::reduce(ptr_labeling, ptr_labeling + graph.num_nodes);
220 | 	//cout << "Number of Active Nodes = " << subgraph.numActiveNodes << endl;
221 | 				
222 | 	thrust::exclusive_scan(ptr_labeling, ptr_labeling + graph.num_nodes, ptr_labeling_prefixsum);
223 | 	
224 | 	makeQueue<<<graph.num_nodes/512+1, 512>>>(subgraph.d_activeNodes, d_activeNodesLabeling, d_prefixLabeling, graph.num_nodes);
225 | 	
226 | 	gpuErrorcheck(cudaMemcpy(subgraph.activeNodes, subgraph.d_activeNodes, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost));
227 | 	
228 | 	thrust::device_ptr<unsigned int> ptr_degrees(d_activeNodesDegree);
229 | 	thrust::device_ptr<unsigned int> ptr_degrees_prefixsum(d_prefixSumDegrees);
230 | 	
231 | 	thrust::exclusive_scan(ptr_degrees, ptr_degrees + graph.num_nodes, ptr_degrees_prefixsum);
232 | 	
233 | 	makeActiveNodesPointer<<<graph.num_nodes/512+1, 512>>>(subgraph.d_activeNodesPointer, d_activeNodesLabeling, d_prefixLabeling, d_prefixSumDegrees, graph.num_nodes);
234 | 	gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, subgraph.numActiveNodes*sizeof(unsigned int), cudaMemcpyDeviceToHost));
235 | 	
236 | 	unsigned int numActiveEdges = 0;
237 | 	if(subgraph.numActiveNodes>0)
238 | 		numActiveEdges = subgraph.activeNodesPointer[subgraph.numActiveNodes-1] + graph.outDegree[subgraph.activeNodes[subgraph.numActiveNodes-1]];	
239 | 	
240 | 	unsigned int last = numActiveEdges;
241 | 	gpuErrorcheck(cudaMemcpy(subgraph.d_activeNodesPointer+subgraph.numActiveNodes, &last, sizeof(unsigned int), cudaMemcpyHostToDevice));
242 | 	
243 | 	gpuErrorcheck(cudaMemcpy(subgraph.activeNodesPointer, subgraph.d_activeNodesPointer, (subgraph.numActiveNodes+1)*sizeof(unsigned int), cudaMemcpyDeviceToHost));
244 | 	
245 | 	
246 | 	//finishDynG = std::chrono::system_clock::now();
247 | 	//std::chrono::duration<double> elapsed_seconds_dyng = finishDynG-startDynG;
248 | 	//std::time_t finish_time_dyng = std::chrono::system_clock::to_time_t(finishDynG);
249 | 	//std::cout << "Dynamic GPU Time = " << elapsed_seconds_dyng.count() << std::endl;
250 | 	
251 | 	//td::chrono::time_point<std::chrono::system_clock> startDynC, finishDynC;
252 | 	//startDynC = std::chrono::system_clock::now();
253 | 	
254 | 	unsigned int numThreads = NUM_THREADS;
255 | 
256 | 	if(subgraph.numActiveNodes < THRESHOLD_THREAD)
257 | 		numThreads = 1;
258 | 
259 | 	thread runThreads[numThreads];
260 | 	
261 | 	for(unsigned int t=0; t<numThreads; t++)
262 | 	{
263 | 
264 | 		runThreads[t] = thread(dynamic<E>,
265 | 								t,
266 | 								numThreads,
267 | 								subgraph.numActiveNodes,
268 | 								subgraph.activeNodes,
269 | 								graph.outDegree, 
270 | 								subgraph.activeNodesPointer,
271 | 								graph.nodePointer, 
272 | 								subgraph.activeEdgeList,
273 | 								graph.edgeList);
274 | 
275 | 	}
276 | 		
277 | 	for(unsigned int t=0; t<numThreads; t++)
278 | 		runThreads[t].join();
279 | 	
280 | 	//finishDynC = std::chrono::system_clock::now();
281 | 	//std::chrono::duration<double> elapsed_seconds_dync = finishDynC-startDynC;
282 | 	//std::time_t finish_time_dync = std::chrono::system_clock::to_time_t(finishDynC);
283 | 	//std::cout << "Dynamic CPU Time = " << elapsed_seconds_dync.count() << std::endl;
284 | 	
285 | }
286 | 
287 | template class SubgraphGenerator<OutEdge>;
288 | template class SubgraphGenerator<OutEdgeWeighted>;
289 | 
290 | 


--------------------------------------------------------------------------------
/shared/subgraph_generator.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef SUBGRAPH_GENERATOR_HPP
 2 | #define SUBGRAPH_GENERATOR_HPP
 3 | 
 4 | 
 5 | #include "globals.hpp"
 6 | #include "graph.cuh"
 7 | #include "subgraph.cuh"
 8 | #include <thrust/host_vector.h>
 9 | #include <thrust/device_vector.h>
10 | #include <thread>
11 | 
12 | template <class E>
13 | class SubgraphGenerator
14 | {
15 | private:
16 | 
17 | public:
18 | 	unsigned int *activeNodesLabeling;
19 | 	unsigned int *activeNodesDegree;
20 | 	unsigned int *prefixLabeling;
21 | 	unsigned int *prefixSumDegrees;
22 | 	unsigned int *d_activeNodesLabeling;
23 | 	unsigned int *d_activeNodesDegree;
24 | 	unsigned int *d_prefixLabeling;
25 | 	unsigned int *d_prefixSumDegrees;
26 | 	SubgraphGenerator(Graph<E> &graph);
27 | 	SubgraphGenerator(GraphPR<E> &graph);
28 | 	void generate(Graph<E> &graph, Subgraph<E> &subgraph);
29 | 	void generate(GraphPR<E> &graph, Subgraph<E> &subgraph, float acc);
30 | };
31 | 
32 | #endif	//	SUBGRAPH_GENERATOR_HPP
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/shared/subway_utilities.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "subway_utilities.hpp"
 3 | 
 4 | void utilities::PrintResults(uint *results, uint n)
 5 | {
 6 | 	cout << "Results of first "<< n << " nodes:\n[";
 7 | 	for(int i=0; i<n; i++)
 8 | 	{
 9 | 		if(i>0)
10 | 			cout << " ";
11 | 		cout << i << ":" << results[i];
12 | 	}
13 | 	cout << "]\n";
14 | }
15 | 
16 | void utilities::PrintResults(float *results, uint n)
17 | {
18 | 	cout << "Results of first "<< n << " nodes:\n[";
19 | 	for(int i=0; i<n; i++)
20 | 	{
21 | 		if(i>0)
22 | 			cout << " ";
23 | 		cout << i << ":" << results[i];
24 | 	}
25 | 	cout << "]\n";
26 | }
27 | 
28 | void utilities::PrintResults(double *results, uint n)
29 | {
30 | 	cout << "Results of first "<< n << " nodes:\n[";
31 | 	for(int i=0; i<n; i++)
32 | 	{
33 | 		if(i>0)
34 | 			cout << " ";
35 | 		cout << i << ":" << results[i];
36 | 	}
37 | 	cout << "]\n";
38 | }
39 | 
40 | void utilities::SaveResults(string filepath, uint *results, uint n)
41 | {
42 | 	cout << "Saving the results into the following file:\n";
43 | 	cout << ">> " << filepath << endl;
44 | 	ofstream outfile;
45 | 	outfile.open(filepath);
46 | 	for(int i=0; i<n; i++)
47 | 		outfile << i << " " << results[i] << endl;
48 | 	outfile.close();
49 | 	cout << "Done saving.\n";
50 | }
51 | 
52 | void utilities::SaveResults(string filepath, float *results, uint n)
53 | {
54 | 	cout << "Saving the results into " << filepath << " ...... " << flush;
55 | 	ofstream outfile;
56 | 	outfile.open(filepath);
57 | 	for(int i=0; i<n; i++)
58 | 		outfile << i << " " << results[i] << endl;
59 | 	outfile.close();
60 | 	cout << " Completed.\n";
61 | }
62 | 
63 | void utilities::SaveResults(string filepath, double *results, uint n)
64 | {
65 | 	cout << "Saving the results into " << filepath << " ...... " << flush;
66 | 	ofstream outfile;
67 | 	outfile.open(filepath);
68 | 	for(int i=0; i<n; i++)
69 | 		outfile << i << " " << results[i] << endl;
70 | 	outfile.close();
71 | 	cout << " Completed.\n";
72 | }
73 | 


--------------------------------------------------------------------------------
/shared/subway_utilities.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SUBWAY_UTILITIES_HPP
 2 | #define SUBWAY_UTILITIES_HPP
 3 | 
 4 | 
 5 | #include "globals.hpp"
 6 | 
 7 | namespace utilities {
 8 | 	void PrintResults(uint *results, uint n);
 9 | 	void PrintResults(float *results, uint n);
10 | 	void PrintResults(double *results, uint n);
11 | 	void SaveResults(string filepath, uint *results, uint n);
12 | 	void SaveResults(string filepath, float *results, uint n);
13 | 	void SaveResults(string filepath, double *results, uint n);
14 | }
15 | 
16 | #endif	//	SUBWAY_UTILITIES_HPP
17 | 


--------------------------------------------------------------------------------
/shared/test.cu:
--------------------------------------------------------------------------------
 1 | #include "test.cuh"
 2 | 
 3 | 
 4 | template <class T>
 5 | Test<T>::Test()
 6 | {
 7 | 	this->a = 1;
 8 | 	this->b = 1;
 9 | }
10 | 
11 | template <class T>
12 | int Test<T>::sum(int a, int b)
13 | {
14 | 	return a + b;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/shared/test.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef TEST_HPP
 2 | #define TEST_HPP
 3 | 
 4 | template <class T>
 5 | class Test
 6 | {
 7 | private:
 8 | 
 9 | public:
10 | 	int a;
11 | 	int b;
12 |     Test();
13 |     int sum(int a, int b);
14 | };
15 | 
16 | #endif	//	TEST_HPP
17 | 


--------------------------------------------------------------------------------
/shared/timer.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "timer.hpp"
 3 | 
 4 | 
 5 | void Timer::Start()
 6 | {
 7 | 	//A = chrono::system_clock::now();
 8 | 	gettimeofday( &StartingTime, NULL );
 9 | }
10 | 
11 | 
12 | float Timer::Finish()
13 | {
14 | 	//B = std::chrono::system_clock::now();
15 | 	//chrono::duration<double> elapsed_seconds = B - A;
16 | 	//time_t finish_time = std::chrono::system_clock::to_time_t(B);
17 | 	//cout << "title" << elapsed_seconds.count()*1000;
18 | 	timeval PausingTime, ElapsedTime;
19 | 	gettimeofday( &PausingTime, NULL );
20 | 	timersub(&PausingTime, &StartingTime, &ElapsedTime);
21 | 	float d = ElapsedTime.tv_sec*1000.0+ElapsedTime.tv_usec/1000.0;
22 | 	return d;
23 | }
24 | 


--------------------------------------------------------------------------------
/shared/timer.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef TIMER_HPP
 2 | #define TIMER_HPP
 3 | 
 4 | 
 5 | #include "globals.hpp"
 6 | #include <stdlib.h>
 7 | #include <sys/time.h>
 8 | 
 9 | 
10 | class Timer
11 | {
12 | private:
13 | 	//chrono::time_point<chrono::system_clock> A, B;
14 | 	timeval StartingTime;
15 | public:
16 |     void Start();
17 |     float Finish();
18 | };
19 | 
20 | #endif	//	TIMER_HPP
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/subway/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CC=g++
 3 | NC=nvcc
 4 | CFLAGS=-std=c++11 -O3
 5 | NFLAGS=-arch=sm_60
 6 | 
 7 | SHARED=../shared
 8 | 
 9 | all: bfs-sync.o cc-sync.o sssp-sync.o sswp-sync.o pr-sync.o bfs-async.o cc-async.o sssp-async.o sswp-async.o pr-async.o
10 | 
11 | 
12 | bfs-sync.o: bfs-sync.cu
13 | 	$(NC) -c bfs-sync.cu $(CFLAGS) $(NFLAGS)
14 | 	
15 | cc-sync.o: cc-sync.cu
16 | 	$(NC) -c cc-sync.cu $(CFLAGS) $(NFLAGS)	
17 | 
18 | sssp-sync.o: sssp-sync.cu
19 | 	$(NC) -c sssp-sync.cu $(CFLAGS) $(NFLAGS)	
20 | 
21 | sswp-sync.o: sswp-sync.cu
22 | 	$(NC) -c sswp-sync.cu $(CFLAGS) $(NFLAGS)
23 | 	
24 | pr-sync.o: pr-sync.cu
25 | 	$(NC) -c pr-sync.cu $(CFLAGS) $(NFLAGS)			
26 | 	
27 | bfs-async.o: bfs-async.cu
28 | 	$(NC) -c bfs-async.cu $(CFLAGS) $(NFLAGS)		
29 | 
30 | cc-async.o: cc-async.cu
31 | 	$(NC) -c cc-async.cu $(CFLAGS) $(NFLAGS)	
32 | 		
33 | sssp-async.o: sssp-async.cu
34 | 	$(NC) -c sssp-async.cu $(CFLAGS) $(NFLAGS)		
35 | 
36 | sswp-async.o: sswp-async.cu
37 | 	$(NC) -c sswp-async.cu $(CFLAGS) $(NFLAGS)	
38 | 	
39 | pr-async.o: pr-async.cu
40 | 	$(NC) -c pr-async.cu $(CFLAGS) $(NFLAGS)		
41 | 			
42 | clean:
43 | 	rm  *.o 
44 | 


--------------------------------------------------------------------------------
/subway/bfs-async.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, true, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdge> graph(arguments.input, false);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 29 | 	{
 30 | 		graph.value[i] = DIST_INFINITY;
 31 | 		graph.label1[i] = true;
 32 | 		graph.label2[i] = false;
 33 | 	}
 34 | 	graph.value[arguments.sourceNode] = 0;
 35 | 	//graph.label[arguments.sourceNode] = true;
 36 | 
 37 | 
 38 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 39 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 40 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 41 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 42 | 	
 43 | 	Subgraph<OutEdge> subgraph(graph.num_nodes, graph.num_edges);
 44 | 	
 45 | 	SubgraphGenerator<OutEdge> subgen(graph);
 46 | 	
 47 | 	subgen.generate(graph, subgraph);
 48 | 	
 49 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 50 | 	{
 51 | 		graph.label1[i] = false;
 52 | 	}
 53 | 	graph.label1[arguments.sourceNode] = true;
 54 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 55 | 
 56 | 	Partitioner<OutEdge> partitioner;
 57 | 	
 58 | 	timer.Start();
 59 | 	
 60 | 	unsigned int gItr = 0;
 61 | 	
 62 | 	bool finished;
 63 | 	bool *d_finished;
 64 | 	gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool)));
 65 | 		
 66 | 	while (subgraph.numActiveNodes>0)
 67 | 	{
 68 | 		gItr++;
 69 | 		
 70 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 71 | 		// a super iteration
 72 | 		for(int i=0; i<partitioner.numPartitions; i++)
 73 | 		{
 74 | 			cudaDeviceSynchronize();
 75 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdge), cudaMemcpyHostToDevice));
 76 | 			cudaDeviceSynchronize();
 77 | 
 78 | 			//moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 79 | 			mixLabels<<<partitioner.partitionNodeSize[i]/512 + 1 , 512>>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 80 | 			
 81 | 			uint itr = 0;
 82 | 			do
 83 | 			{
 84 | 				itr++;
 85 | 				finished = true;
 86 | 				gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice));
 87 | 				
 88 | 				bfs_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 89 | 														partitioner.fromNode[i],
 90 | 														partitioner.fromEdge[i],
 91 | 														subgraph.d_activeNodes,
 92 | 														subgraph.d_activeNodesPointer,
 93 | 														subgraph.d_activeEdgeList,
 94 | 														graph.d_outDegree,
 95 | 														graph.d_value, 
 96 | 														d_finished,
 97 | 														(itr%2==1) ? graph.d_label1 : graph.d_label2,
 98 | 														(itr%2==1) ? graph.d_label2 : graph.d_label1);
 99 | 
100 | 				cudaDeviceSynchronize();
101 | 				gpuErrorcheck( cudaPeekAtLastError() );
102 | 				
103 | 				gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost));
104 | 			}while(!(finished));
105 | 			
106 | 			cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i  << endl;
107 | 		}
108 | 		
109 | 		subgen.generate(graph, subgraph);
110 | 			
111 | 	}	
112 | 	
113 | 	float runtime = timer.Finish();
114 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
115 | 	
116 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
117 | 	
118 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
119 | 			
120 | 	if(arguments.hasOutput)
121 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
122 | }
123 | 
124 | 


--------------------------------------------------------------------------------
/subway/bfs-sync.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, true, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdge> graph(arguments.input, false);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 29 | 	{
 30 | 		graph.value[i] = DIST_INFINITY;
 31 | 		graph.label1[i] = false;
 32 | 		graph.label2[i] = false;
 33 | 	}
 34 | 	graph.value[arguments.sourceNode] = 0;
 35 | 	graph.label1[arguments.sourceNode] = false;
 36 | 	graph.label2[arguments.sourceNode] = true;
 37 | 
 38 | 
 39 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 40 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 41 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 42 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 43 | 	
 44 | 	Subgraph<OutEdge> subgraph(graph.num_nodes, graph.num_edges);
 45 | 	
 46 | 	SubgraphGenerator<OutEdge> subgen(graph);
 47 | 	
 48 | 	subgen.generate(graph, subgraph);
 49 | 
 50 | 
 51 | 	Partitioner<OutEdge> partitioner;
 52 | 	
 53 | 	timer.Start();
 54 | 	
 55 | 	uint itr = 0;
 56 | 		
 57 | 	while (subgraph.numActiveNodes>0)
 58 | 	{
 59 | 		itr++;
 60 | 		
 61 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 62 | 		// a super iteration
 63 | 		for(int i=0; i<partitioner.numPartitions; i++)
 64 | 		{
 65 | 			cudaDeviceSynchronize();
 66 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdge), cudaMemcpyHostToDevice));
 67 | 			cudaDeviceSynchronize();
 68 | 
 69 | 			moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 70 | 
 71 | 			bfs_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 72 | 													partitioner.fromNode[i],
 73 | 													partitioner.fromEdge[i],
 74 | 													subgraph.d_activeNodes,
 75 | 													subgraph.d_activeNodesPointer,
 76 | 													subgraph.d_activeEdgeList,
 77 | 													graph.d_outDegree,
 78 | 													graph.d_value, 
 79 | 													//d_finished,
 80 | 													graph.d_label1,
 81 | 													graph.d_label2);
 82 | 
 83 | 			cudaDeviceSynchronize();
 84 | 			gpuErrorcheck( cudaPeekAtLastError() );	
 85 | 		}
 86 | 		
 87 | 		subgen.generate(graph, subgraph);
 88 | 			
 89 | 	}
 90 | 	
 91 | 	float runtime = timer.Finish();
 92 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
 93 | 	
 94 | 	cout << "Number of iterations = " << itr << endl;
 95 | 	
 96 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
 97 | 	
 98 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
 99 | 			
100 | 	if(arguments.hasOutput)
101 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/subway/cc-async.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, true, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdge> graph(arguments.input, false);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 29 | 	{
 30 | 		graph.value[i] = i;
 31 | 		graph.label1[i] = true;
 32 | 		graph.label2[i] = false;
 33 | 	}
 34 | 
 35 | 
 36 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 37 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 38 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 39 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 40 | 	
 41 | 	Subgraph<OutEdge> subgraph(graph.num_nodes, graph.num_edges);
 42 | 	
 43 | 	SubgraphGenerator<OutEdge> subgen(graph);
 44 | 	
 45 | 	subgen.generate(graph, subgraph);
 46 | 
 47 | 
 48 | 	Partitioner<OutEdge> partitioner;
 49 | 	
 50 | 	timer.Start();
 51 | 	
 52 | 	unsigned int gItr = 0;
 53 | 	
 54 | 	bool finished;
 55 | 	bool *d_finished;
 56 | 	gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool)));
 57 | 		
 58 | 	while (subgraph.numActiveNodes>0)
 59 | 	{
 60 | 		gItr++;
 61 | 		
 62 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 63 | 		// a super iteration
 64 | 		for(int i=0; i<partitioner.numPartitions; i++)
 65 | 		{
 66 | 			cudaDeviceSynchronize();
 67 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdge), cudaMemcpyHostToDevice));
 68 | 			cudaDeviceSynchronize();
 69 | 
 70 | 			//moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 71 | 			mixLabels<<<partitioner.partitionNodeSize[i]/512 + 1 , 512>>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 72 | 			
 73 | 			uint itr = 0;
 74 | 			do
 75 | 			{
 76 | 				itr++;
 77 | 				finished = true;
 78 | 				gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice));
 79 | 				
 80 | 				cc_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 81 | 														partitioner.fromNode[i],
 82 | 														partitioner.fromEdge[i],
 83 | 														subgraph.d_activeNodes,
 84 | 														subgraph.d_activeNodesPointer,
 85 | 														subgraph.d_activeEdgeList,
 86 | 														graph.d_outDegree,
 87 | 														graph.d_value, 
 88 | 														d_finished,
 89 | 														(itr%2==1) ? graph.d_label1 : graph.d_label2,
 90 | 														(itr%2==1) ? graph.d_label2 : graph.d_label1);
 91 | 
 92 | 				cudaDeviceSynchronize();
 93 | 				gpuErrorcheck( cudaPeekAtLastError() );
 94 | 				
 95 | 				gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost));
 96 | 			}while(!(finished));
 97 | 			
 98 | 			cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i  << endl;
 99 | 		}
100 | 		
101 | 		subgen.generate(graph, subgraph);
102 | 			
103 | 	}	
104 | 	
105 | 	float runtime = timer.Finish();
106 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
107 | 	
108 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
109 | 	
110 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
111 | 			
112 | 	if(arguments.hasOutput)
113 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
114 | }
115 | 
116 | 


--------------------------------------------------------------------------------
/subway/cc-sync.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, false, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdge> graph(arguments.input, false);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 
 29 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 30 | 	{
 31 | 		graph.value[i] = i;
 32 | 		graph.label1[i] = false;
 33 | 		graph.label2[i] = true;
 34 | 	}
 35 | 
 36 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 37 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 38 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 39 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 40 | 	
 41 | 	Subgraph<OutEdge> subgraph(graph.num_nodes, graph.num_edges);
 42 | 	
 43 | 	SubgraphGenerator<OutEdge> subgen(graph);
 44 | 	
 45 | 	subgen.generate(graph, subgraph);
 46 | 
 47 | 
 48 | 	Partitioner<OutEdge> partitioner;
 49 | 	
 50 | 	timer.Start();
 51 | 	
 52 | 	uint itr = 0;
 53 | 		
 54 | 	while (subgraph.numActiveNodes>0)
 55 | 	{
 56 | 		itr++;
 57 | 		
 58 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 59 | 		// a super iteration
 60 | 		for(int i=0; i<partitioner.numPartitions; i++)
 61 | 		{
 62 | 			cudaDeviceSynchronize();
 63 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdge), cudaMemcpyHostToDevice));
 64 | 			cudaDeviceSynchronize();
 65 | 
 66 | 			moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 67 | 
 68 | 			cc_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 69 | 													partitioner.fromNode[i],
 70 | 													partitioner.fromEdge[i],
 71 | 													subgraph.d_activeNodes,
 72 | 													subgraph.d_activeNodesPointer,
 73 | 													subgraph.d_activeEdgeList,
 74 | 													graph.d_outDegree,
 75 | 													graph.d_value, 
 76 | 													//d_finished,
 77 | 													graph.d_label1,
 78 | 													graph.d_label2);
 79 | 
 80 | 			cudaDeviceSynchronize();
 81 | 			gpuErrorcheck( cudaPeekAtLastError() );	
 82 | 		}
 83 | 		
 84 | 		subgen.generate(graph, subgraph);
 85 | 			
 86 | 	}	
 87 | 	
 88 | 	float runtime = timer.Finish();
 89 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
 90 | 	
 91 | 	cout << "Number of iterations = " << itr << endl;
 92 | 	
 93 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
 94 | 	
 95 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
 96 | 			
 97 | 	if(arguments.hasOutput)
 98 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/subway/pr-async.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | #include "../shared/test.cuh"
 12 | #include "../shared/test.cu"
 13 | 
 14 | 
 15 | int main(int argc, char** argv)
 16 | {
 17 | 	
 18 | 	cudaFree(0);
 19 | 
 20 | 	ArgumentParser arguments(argc, argv, true, false);
 21 | 	
 22 | 	Timer timer;
 23 | 	timer.Start();
 24 | 	
 25 | 	GraphPR<OutEdge> graph(arguments.input, true);
 26 | 	graph.ReadGraph();
 27 | 	
 28 | 	float readtime = timer.Finish();
 29 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 30 | 	
 31 | 	//for(unsigned int i=0; i<100; i++)
 32 | 	//	cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8;
 33 | 	
 34 | 	float initPR = 0.15;
 35 | 	float acc = 0.01;
 36 | 	
 37 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 38 | 	{
 39 | 		graph.delta[i] = initPR;
 40 | 		graph.value[i] = 0;
 41 | 	}
 42 | 	//graph.value[arguments.sourceNode] = 0;
 43 | 	//graph.label[arguments.sourceNode] = true;
 44 | 
 45 | 
 46 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 47 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(float), cudaMemcpyHostToDevice));
 48 | 	gpuErrorcheck(cudaMemcpy(graph.d_delta, graph.delta, graph.num_nodes * sizeof(float), cudaMemcpyHostToDevice));
 49 | 	
 50 | 	Subgraph<OutEdge> subgraph(graph.num_nodes, graph.num_edges);
 51 | 	
 52 | 	SubgraphGenerator<OutEdge> subgen(graph);
 53 | 	
 54 | 	subgen.generate(graph, subgraph, acc);	
 55 | 
 56 | 	Partitioner<OutEdge> partitioner;
 57 | 	
 58 | 	timer.Start();
 59 | 	
 60 | 	uint gItr = 0;
 61 | 	
 62 | 	bool finished;
 63 | 	bool *d_finished;
 64 | 	gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool)));
 65 | 		
 66 | 	while (subgraph.numActiveNodes>0)
 67 | 	{
 68 | 		gItr++;
 69 | 		
 70 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 71 | 		// a super iteration
 72 | 		for(int i=0; i<partitioner.numPartitions; i++)
 73 | 		{
 74 | 			cudaDeviceSynchronize();
 75 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdge), cudaMemcpyHostToDevice));
 76 | 			cudaDeviceSynchronize();
 77 | 
 78 | 			//moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 79 | 			//mixLabels<<<partitioner.partitionNodeSize[i]/512 + 1 , 512>>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 80 | 			
 81 | 			uint itr = 0;
 82 | 			do
 83 | 			{
 84 | 				itr++;
 85 | 				finished = true;
 86 | 				gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice));
 87 | 
 88 | 				pr_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 89 | 													partitioner.fromNode[i],
 90 | 													partitioner.fromEdge[i],
 91 | 													subgraph.d_activeNodes,
 92 | 													subgraph.d_activeNodesPointer,
 93 | 													subgraph.d_activeEdgeList,
 94 | 													graph.d_outDegree,
 95 | 													graph.d_value,
 96 | 													graph.d_delta,
 97 | 													d_finished,
 98 | 													acc);		
 99 | 																						
100 | 
101 | 				cudaDeviceSynchronize();
102 | 				gpuErrorcheck( cudaPeekAtLastError() );	
103 | 				
104 | 				gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost));
105 | 			}while(!(finished));
106 | 			
107 | 			cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i  << endl;			
108 | 		}
109 | 		
110 | 		subgen.generate(graph, subgraph, acc);
111 | 			
112 | 	}	
113 | 	
114 | 	float runtime = timer.Finish();
115 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
116 | 	
117 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(float), cudaMemcpyDeviceToHost));
118 | 	
119 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
120 | 	
121 | 		
122 | 	if(arguments.hasOutput)
123 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/subway/pr-sync.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | #include "../shared/test.cuh"
 12 | #include "../shared/test.cu"
 13 | 
 14 | 
 15 | int main(int argc, char** argv)
 16 | {	
 17 | 	cudaFree(0);
 18 | 
 19 | 	ArgumentParser arguments(argc, argv, true, false);
 20 | 	
 21 | 	Timer timer;
 22 | 	timer.Start();
 23 | 	
 24 | 	GraphPR<OutEdge> graph(arguments.input, true);
 25 | 	graph.ReadGraph();
 26 | 	
 27 | 	float readtime = timer.Finish();
 28 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 29 | 	
 30 | 	//for(unsigned int i=0; i<100; i++)
 31 | 	//	cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8;
 32 | 	
 33 | 	float initPR = 0.15;
 34 | 	float acc = 0.01;
 35 | 	
 36 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 37 | 	{
 38 | 		graph.delta[i] = initPR;
 39 | 		graph.value[i] = 0;
 40 | 	}
 41 | 	//graph.value[arguments.sourceNode] = 0;
 42 | 	//graph.label[arguments.sourceNode] = true;
 43 | 
 44 | 
 45 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 46 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(float), cudaMemcpyHostToDevice));
 47 | 	gpuErrorcheck(cudaMemcpy(graph.d_delta, graph.delta, graph.num_nodes * sizeof(float), cudaMemcpyHostToDevice));
 48 | 	
 49 | 	Subgraph<OutEdge> subgraph(graph.num_nodes, graph.num_edges);
 50 | 	
 51 | 	SubgraphGenerator<OutEdge> subgen(graph);
 52 | 	
 53 | 	subgen.generate(graph, subgraph, acc);	
 54 | 
 55 | 	Partitioner<OutEdge> partitioner;
 56 | 	
 57 | 	timer.Start();
 58 | 	
 59 | 	uint gItr = 0;
 60 | 	
 61 | 		
 62 | 	while (subgraph.numActiveNodes>0)
 63 | 	{
 64 | 		gItr++;
 65 | 		
 66 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 67 | 		// a super iteration
 68 | 		for(int i=0; i<partitioner.numPartitions; i++)
 69 | 		{
 70 | 			cudaDeviceSynchronize();
 71 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdge), cudaMemcpyHostToDevice));
 72 | 			cudaDeviceSynchronize();
 73 | 			
 74 | 
 75 | 			pr_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 76 | 												partitioner.fromNode[i],
 77 | 												partitioner.fromEdge[i],
 78 | 												subgraph.d_activeNodes,
 79 | 												subgraph.d_activeNodesPointer,
 80 | 												subgraph.d_activeEdgeList,
 81 | 												graph.d_outDegree,
 82 | 												graph.d_value,
 83 | 												graph.d_delta,
 84 | 												acc);		
 85 | 
 86 | 
 87 | 			cudaDeviceSynchronize();
 88 | 			gpuErrorcheck( cudaPeekAtLastError() );	
 89 | 	
 90 | 		}
 91 | 		
 92 | 		subgen.generate(graph, subgraph, acc);
 93 | 			
 94 | 	}	
 95 | 	
 96 | 	float runtime = timer.Finish();
 97 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
 98 | 	
 99 | 	cout << "Number of iterations = " << gItr << endl;
100 | 	
101 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(float), cudaMemcpyDeviceToHost));
102 | 	
103 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
104 | 
105 | 			
106 | 	if(arguments.hasOutput)
107 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/subway/sssp-async.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | #include "../shared/test.cuh"
 12 | #include "../shared/test.cu"
 13 | 
 14 | 
 15 | int main(int argc, char** argv)
 16 | {
 17 | 	/*
 18 | 	Test<int> test;
 19 | 	cout << test.sum(20, 30) << endl;
 20 | 	*/
 21 | 	
 22 | 	cudaFree(0);
 23 | 
 24 | 	ArgumentParser arguments(argc, argv, true, false);
 25 | 	
 26 | 	Timer timer;
 27 | 	timer.Start();
 28 | 	
 29 | 	Graph<OutEdgeWeighted> graph(arguments.input, true);
 30 | 	graph.ReadGraph();
 31 | 	
 32 | 	float readtime = timer.Finish();
 33 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 34 | 	
 35 | 	//for(unsigned int i=0; i<100; i++)
 36 | 	//	cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8;
 37 | 	
 38 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 39 | 	{
 40 | 		graph.value[i] = DIST_INFINITY;
 41 | 		graph.label1[i] = true;
 42 | 		graph.label2[i] = false;
 43 | 	}
 44 | 	graph.value[arguments.sourceNode] = 0;
 45 | 	//graph.label[arguments.sourceNode] = true;
 46 | 
 47 | 
 48 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 49 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 50 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 51 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 52 | 	
 53 | 	Subgraph<OutEdgeWeighted> subgraph(graph.num_nodes, graph.num_edges);
 54 | 	
 55 | 	SubgraphGenerator<OutEdgeWeighted> subgen(graph);
 56 | 	
 57 | 	subgen.generate(graph, subgraph);
 58 | 	
 59 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 60 | 	{
 61 | 		graph.label1[i] = false;
 62 | 	}
 63 | 	graph.label1[arguments.sourceNode] = true;
 64 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));	
 65 | 	
 66 | 
 67 | 	Partitioner<OutEdgeWeighted> partitioner;
 68 | 	
 69 | 	timer.Start();
 70 | 	
 71 | 	uint gItr = 0;
 72 | 	
 73 | 	bool finished;
 74 | 	bool *d_finished;
 75 | 	gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool)));
 76 | 		
 77 | 	while (subgraph.numActiveNodes>0)
 78 | 	{
 79 | 		gItr++;
 80 | 		
 81 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 82 | 		// a super iteration
 83 | 		for(int i=0; i<partitioner.numPartitions; i++)
 84 | 		{
 85 | 			cudaDeviceSynchronize();
 86 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdgeWeighted), cudaMemcpyHostToDevice));
 87 | 			cudaDeviceSynchronize();
 88 | 
 89 | 			//moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 90 | 			mixLabels<<<partitioner.partitionNodeSize[i]/512 + 1 , 512>>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 91 | 			
 92 | 			uint itr = 0;
 93 | 			do
 94 | 			{
 95 | 				itr++;
 96 | 				finished = true;
 97 | 				gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice));
 98 | 
 99 | 				sssp_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
100 | 													partitioner.fromNode[i],
101 | 													partitioner.fromEdge[i],
102 | 													subgraph.d_activeNodes,
103 | 													subgraph.d_activeNodesPointer,
104 | 													subgraph.d_activeEdgeList,
105 | 													graph.d_outDegree,
106 | 													graph.d_value, 
107 | 													d_finished,
108 | 													(itr%2==1) ? graph.d_label1 : graph.d_label2,
109 | 													(itr%2==1) ? graph.d_label2 : graph.d_label1);													
110 | 
111 | 				cudaDeviceSynchronize();
112 | 				gpuErrorcheck( cudaPeekAtLastError() );	
113 | 				
114 | 				gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost));
115 | 			}while(!(finished));
116 | 			
117 | 			cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i  << endl;			
118 | 		}
119 | 		
120 | 		subgen.generate(graph, subgraph);
121 | 			
122 | 	}	
123 | 	
124 | 	float runtime = timer.Finish();
125 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
126 | 	
127 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
128 | 	
129 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
130 | 		
131 | 	//for(int i=0; i<20; i++)
132 | 	//	cout << graph.value[i] << endl;
133 | 			
134 | 	if(arguments.hasOutput)
135 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
136 | }
137 | 
138 | 


--------------------------------------------------------------------------------
/subway/sssp-sync.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, true, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdgeWeighted> graph(arguments.input, true);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 29 | 	{
 30 | 		graph.value[i] = DIST_INFINITY;
 31 | 		graph.label1[i] = false;
 32 | 		graph.label2[i] = false;
 33 | 	}
 34 | 	graph.value[arguments.sourceNode] = 0;
 35 | 	graph.label1[arguments.sourceNode] = false;
 36 | 	graph.label2[arguments.sourceNode] = true;
 37 | 
 38 | 
 39 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 40 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 41 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 42 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 43 | 	
 44 | 	Subgraph<OutEdgeWeighted> subgraph(graph.num_nodes, graph.num_edges);
 45 | 	
 46 | 	SubgraphGenerator<OutEdgeWeighted> subgen(graph);
 47 | 	
 48 | 	subgen.generate(graph, subgraph);
 49 | 
 50 | 
 51 | 	Partitioner<OutEdgeWeighted> partitioner;
 52 | 	
 53 | 	timer.Start();
 54 | 	
 55 | 	uint itr = 0;
 56 | 		
 57 | 	while (subgraph.numActiveNodes>0)
 58 | 	{
 59 | 		itr++;
 60 | 		
 61 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 62 | 		// a super iteration
 63 | 		for(int i=0; i<partitioner.numPartitions; i++)
 64 | 		{
 65 | 			cudaDeviceSynchronize();
 66 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdgeWeighted), cudaMemcpyHostToDevice));
 67 | 			cudaDeviceSynchronize();
 68 | 
 69 | 			moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 70 | 
 71 | 			sssp_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 72 | 													partitioner.fromNode[i],
 73 | 													partitioner.fromEdge[i],
 74 | 													subgraph.d_activeNodes,
 75 | 													subgraph.d_activeNodesPointer,
 76 | 													subgraph.d_activeEdgeList,
 77 | 													graph.d_outDegree,
 78 | 													graph.d_value, 
 79 | 													//d_finished,
 80 | 													graph.d_label1,
 81 | 													graph.d_label2);
 82 | 
 83 | 			cudaDeviceSynchronize();
 84 | 			gpuErrorcheck( cudaPeekAtLastError() );	
 85 | 		}
 86 | 		
 87 | 		subgen.generate(graph, subgraph);
 88 | 			
 89 | 	}	
 90 | 	
 91 | 	float runtime = timer.Finish();
 92 | 	cout << "Processing finished in " << runtime << " (ms).\n";
 93 | 	
 94 | 	cout << "Number of iterations = " << itr << endl;
 95 | 	
 96 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
 97 | 	
 98 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
 99 | 			
100 | 	if(arguments.hasOutput)
101 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/subway/sswp-async.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, true, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdgeWeighted> graph(arguments.input, true);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 	//for(unsigned int i=0; i<100; i++)
 29 | 	//	cout << graph.edgeList[i].end << " " << graph.edgeList[i].w8;
 30 | 	
 31 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 32 | 	{
 33 | 		graph.value[i] = 0;
 34 | 		graph.label1[i] = true;
 35 | 		graph.label2[i] = false;
 36 | 	}
 37 | 	graph.value[arguments.sourceNode] = DIST_INFINITY;
 38 | 	//graph.label[arguments.sourceNode] = true;
 39 | 
 40 | 
 41 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 42 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 43 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 44 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 45 | 	
 46 | 	Subgraph<OutEdgeWeighted> subgraph(graph.num_nodes, graph.num_edges);
 47 | 	
 48 | 	SubgraphGenerator<OutEdgeWeighted> subgen(graph);
 49 | 	
 50 | 	subgen.generate(graph, subgraph);
 51 | 	
 52 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 53 | 	{
 54 | 		graph.label1[i] = false;
 55 | 	}
 56 | 	graph.label1[arguments.sourceNode] = true;
 57 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));	
 58 | 	
 59 | 
 60 | 	Partitioner<OutEdgeWeighted> partitioner;
 61 | 	
 62 | 	timer.Start();
 63 | 	
 64 | 	uint gItr = 0;
 65 | 	
 66 | 	bool finished;
 67 | 	bool *d_finished;
 68 | 	gpuErrorcheck(cudaMalloc(&d_finished, sizeof(bool)));
 69 | 		
 70 | 	while (subgraph.numActiveNodes>0)
 71 | 	{
 72 | 		gItr++;
 73 | 		
 74 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 75 | 		// a super iteration
 76 | 		for(int i=0; i<partitioner.numPartitions; i++)
 77 | 		{
 78 | 			cudaDeviceSynchronize();
 79 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdgeWeighted), cudaMemcpyHostToDevice));
 80 | 			cudaDeviceSynchronize();
 81 | 
 82 | 			//moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 83 | 			mixLabels<<<partitioner.partitionNodeSize[i]/512 + 1 , 512>>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 84 | 			
 85 | 			uint itr = 0;
 86 | 			do
 87 | 			{
 88 | 				cout << "\t\tIteration " << ++itr << endl;
 89 | 				finished = true;
 90 | 				gpuErrorcheck(cudaMemcpy(d_finished, &finished, sizeof(bool), cudaMemcpyHostToDevice));
 91 | 
 92 | 				sswp_async<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 93 | 													partitioner.fromNode[i],
 94 | 													partitioner.fromEdge[i],
 95 | 													subgraph.d_activeNodes,
 96 | 													subgraph.d_activeNodesPointer,
 97 | 													subgraph.d_activeEdgeList,
 98 | 													graph.d_outDegree,
 99 | 													graph.d_value, 
100 | 													d_finished,
101 | 													(itr%2==1) ? graph.d_label1 : graph.d_label2,
102 | 													(itr%2==1) ? graph.d_label2 : graph.d_label1);	
103 | 
104 | 				cudaDeviceSynchronize();
105 | 				gpuErrorcheck( cudaPeekAtLastError() );	
106 | 				
107 | 				gpuErrorcheck(cudaMemcpy(&finished, d_finished, sizeof(bool), cudaMemcpyDeviceToHost));
108 | 			}while(!(finished));
109 | 			
110 | 			cout << itr << ((itr>1) ? " Inner Iterations" : " Inner Iteration") << " in Global Iteration " << gItr << ", Partition " << i  << endl;			
111 | 		}
112 | 		
113 | 		subgen.generate(graph, subgraph);
114 | 			
115 | 	}	
116 | 	
117 | 	float runtime = timer.Finish();
118 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
119 | 	
120 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
121 | 	
122 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
123 | 			
124 | 	if(arguments.hasOutput)
125 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
126 | }
127 | 
128 | 


--------------------------------------------------------------------------------
/subway/sswp-sync.cu:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | #include "../shared/timer.hpp"
  3 | #include "../shared/argument_parsing.cuh"
  4 | #include "../shared/graph.cuh"
  5 | #include "../shared/subgraph.cuh"
  6 | #include "../shared/partitioner.cuh"
  7 | #include "../shared/subgraph_generator.cuh"
  8 | #include "../shared/gpu_error_check.cuh"
  9 | #include "../shared/gpu_kernels.cuh"
 10 | #include "../shared/subway_utilities.hpp"
 11 | 
 12 | 
 13 | int main(int argc, char** argv)
 14 | {	
 15 | 	cudaFree(0);
 16 | 
 17 | 	ArgumentParser arguments(argc, argv, true, false);
 18 | 	
 19 | 	Timer timer;
 20 | 	timer.Start();
 21 | 	
 22 | 	Graph<OutEdgeWeighted> graph(arguments.input, true);
 23 | 	graph.ReadGraph();
 24 | 	
 25 | 	float readtime = timer.Finish();
 26 | 	cout << "Graph Reading finished in " << readtime/1000 << " (s).\n";
 27 | 	
 28 | 	for(unsigned int i=0; i<graph.num_nodes; i++)
 29 | 	{
 30 | 		graph.value[i] = 0;
 31 | 		graph.label1[i] = false;
 32 | 		graph.label2[i] = false;
 33 | 	}
 34 | 	graph.value[arguments.sourceNode] = DIST_INFINITY;
 35 | 	graph.label1[arguments.sourceNode] = false;
 36 | 	graph.label2[arguments.sourceNode] = true;
 37 | 
 38 | 
 39 | 	gpuErrorcheck(cudaMemcpy(graph.d_outDegree, graph.outDegree, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 40 | 	gpuErrorcheck(cudaMemcpy(graph.d_value, graph.value, graph.num_nodes * sizeof(unsigned int), cudaMemcpyHostToDevice));
 41 | 	gpuErrorcheck(cudaMemcpy(graph.d_label1, graph.label1, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 42 | 	gpuErrorcheck(cudaMemcpy(graph.d_label2, graph.label2, graph.num_nodes * sizeof(bool), cudaMemcpyHostToDevice));
 43 | 	
 44 | 	Subgraph<OutEdgeWeighted> subgraph(graph.num_nodes, graph.num_edges);
 45 | 	
 46 | 	SubgraphGenerator<OutEdgeWeighted> subgen(graph);
 47 | 	
 48 | 	subgen.generate(graph, subgraph);
 49 | 
 50 | 
 51 | 	Partitioner<OutEdgeWeighted> partitioner;
 52 | 	
 53 | 	timer.Start();
 54 | 	
 55 | 	uint itr = 0;
 56 | 		
 57 | 	while (subgraph.numActiveNodes>0)
 58 | 	{
 59 | 		itr++;
 60 | 		
 61 | 		partitioner.partition(subgraph, subgraph.numActiveNodes);
 62 | 		// a super iteration
 63 | 		for(int i=0; i<partitioner.numPartitions; i++)
 64 | 		{
 65 | 			cudaDeviceSynchronize();
 66 | 			gpuErrorcheck(cudaMemcpy(subgraph.d_activeEdgeList, subgraph.activeEdgeList + partitioner.fromEdge[i], (partitioner.partitionEdgeSize[i]) * sizeof(OutEdgeWeighted), cudaMemcpyHostToDevice));
 67 | 			cudaDeviceSynchronize();
 68 | 
 69 | 			moveUpLabels<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(subgraph.d_activeNodes, graph.d_label1, graph.d_label2, partitioner.partitionNodeSize[i], partitioner.fromNode[i]);
 70 | 
 71 | 			sswp_kernel<<< partitioner.partitionNodeSize[i]/512 + 1 , 512 >>>(partitioner.partitionNodeSize[i],
 72 | 													partitioner.fromNode[i],
 73 | 													partitioner.fromEdge[i],
 74 | 													subgraph.d_activeNodes,
 75 | 													subgraph.d_activeNodesPointer,
 76 | 													subgraph.d_activeEdgeList,
 77 | 													graph.d_outDegree,
 78 | 													graph.d_value, 
 79 | 													//d_finished,
 80 | 													graph.d_label1,
 81 | 													graph.d_label2);
 82 | 
 83 | 			cudaDeviceSynchronize();
 84 | 			gpuErrorcheck( cudaPeekAtLastError() );	
 85 | 		}
 86 | 		
 87 | 		subgen.generate(graph, subgraph);
 88 | 			
 89 | 	}	
 90 | 	
 91 | 	float runtime = timer.Finish();
 92 | 	cout << "Processing finished in " << runtime/1000 << " (s).\n";
 93 | 	
 94 | 	cout << "Number of iterations = " << itr << endl;
 95 | 	
 96 | 	gpuErrorcheck(cudaMemcpy(graph.value, graph.d_value, graph.num_nodes*sizeof(uint), cudaMemcpyDeviceToHost));
 97 | 	
 98 | 	utilities::PrintResults(graph.value, min(30, graph.num_nodes));
 99 | 			
100 | 	if(arguments.hasOutput)
101 | 		utilities::SaveResults(arguments.output, graph.value, graph.num_nodes);
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | CC=g++
 3 | NC=nvcc
 4 | CFLAGS=-std=c++11 -O3
 5 | NFLAGS=-arch=sm_60
 6 | 
 7 | SHARED=../shared
 8 | 
 9 | all: converter
10 | 
11 | converter: converter.cpp
12 | 	$(CC) converter.cpp -o converter $(CFLAGS)
13 | 			
14 | clean:
15 | 	rm -f converter
16 | 


--------------------------------------------------------------------------------
/tools/converter.cpp:
--------------------------------------------------------------------------------
  1 | #include "../shared/globals.hpp"
  2 | 
  3 | 
  4 | bool IsWeightedFormat(string format)
  5 | {
  6 | 	if((format == "bwcsr")	||
  7 | 		(format == "wcsr")	||
  8 | 		(format == "wel"))
  9 | 			return true;
 10 | 	return false;
 11 | }
 12 | 
 13 | string GetFileExtension(string fileName)
 14 | {
 15 |     if(fileName.find_last_of(".") != string::npos)
 16 |         return fileName.substr(fileName.find_last_of(".")+1);
 17 |     return "";
 18 | }
 19 | 
 20 | int main(int argc, char** argv)
 21 | {
 22 | 	if(argc!= 2)
 23 | 	{
 24 | 		cout << "\nThere was an error parsing command line arguments\n";
 25 | 		exit(0);
 26 | 	}
 27 | 	
 28 | 	string input = string(argv[1]);
 29 | 	
 30 | 	if(GetFileExtension(input) == "el")
 31 | 	{
 32 | 		ifstream infile;
 33 | 		infile.open(input);
 34 | 		stringstream ss;
 35 | 		uint max = 0;
 36 | 		string line;
 37 | 		uint edgeCounter = 0;
 38 | 		
 39 | 		vector<Edge> edges;
 40 | 		Edge newEdge;
 41 | 		while(getline( infile, line ))
 42 | 		{
 43 | 			ss.str("");
 44 | 			ss.clear();
 45 | 			ss << line;
 46 | 			
 47 | 			ss >> newEdge.source;
 48 | 			ss >> newEdge.end;
 49 | 			
 50 | 			edges.push_back(newEdge);
 51 | 			edgeCounter++;
 52 | 			
 53 | 			if(max < newEdge.source)
 54 | 				max = newEdge.source;
 55 | 			if(max < newEdge.end)
 56 | 				max = newEdge.end;				
 57 | 		}			
 58 | 		infile.close();
 59 | 		
 60 | 		uint num_nodes = max + 1;
 61 | 		uint num_edges = edgeCounter;
 62 | 		uint *nodePointer = new uint[num_nodes+1];
 63 | 		OutEdge *edgeList = new OutEdge[num_edges];
 64 | 		uint *degree = new uint[num_nodes];
 65 | 		for(uint i=0; i<num_nodes; i++)
 66 | 			degree[i] = 0;
 67 | 		for(uint i=0; i<num_edges; i++)
 68 | 			degree[edges[i].source]++;
 69 | 		
 70 | 		uint counter=0;
 71 | 		for(uint i=0; i<num_nodes; i++)
 72 | 		{
 73 | 			nodePointer[i] = counter;
 74 | 			counter = counter + degree[i];
 75 | 		}
 76 | 		uint *outDegreeCounter  = new uint[num_nodes];
 77 | 		uint location;  
 78 | 		for(uint i=0; i<num_edges; i++)
 79 | 		{
 80 | 			uint location = nodePointer[edges[i].source] + outDegreeCounter[edges[i].source];
 81 | 			edgeList[location].end = edges[i].end;
 82 | 			outDegreeCounter[edges[i].source]++;  
 83 | 		}
 84 | 		edges.clear();
 85 | 		delete[] degree;
 86 | 		delete[] outDegreeCounter;
 87 | 		
 88 | 		std::ofstream outfile(input.substr(0, input.length()-2)+"bcsr", std::ofstream::binary);
 89 | 		
 90 | 		outfile.write((char*)&num_nodes, sizeof(unsigned int));
 91 | 		outfile.write((char*)&num_edges, sizeof(unsigned int));
 92 | 		outfile.write ((char*)nodePointer, sizeof(unsigned int)*num_nodes);
 93 | 		outfile.write ((char*)edgeList, sizeof(OutEdge)*num_edges);
 94 | 		
 95 | 		outfile.close();
 96 | 	}
 97 | 	else if(GetFileExtension(input) == "wel")
 98 | 	{
 99 | 		ifstream infile;
100 | 		infile.open(input);
101 | 		stringstream ss;
102 | 		uint max = 0;
103 | 		string line;
104 | 		uint edgeCounter = 0;
105 | 		
106 | 		vector<EdgeWeighted> edges;
107 | 		EdgeWeighted newEdge;
108 | 		while(getline( infile, line ))
109 | 		{
110 | 			ss.str("");
111 | 			ss.clear();
112 | 			ss << line;
113 | 			
114 | 			ss >> newEdge.source;
115 | 			ss >> newEdge.end;
116 | 			ss >> newEdge.w8;
117 | 			
118 | 			edges.push_back(newEdge);
119 | 			edgeCounter++;
120 | 			
121 | 			if(max < newEdge.source)
122 | 				max = newEdge.source;
123 | 			if(max < newEdge.end)
124 | 				max = newEdge.end;				
125 | 		}			
126 | 		infile.close();
127 | 		
128 | 		uint num_nodes = max + 1;
129 | 		uint num_edges = edgeCounter;
130 | 		uint *nodePointer = new uint[num_nodes+1];
131 | 		OutEdgeWeighted *edgeList = new OutEdgeWeighted[num_edges];
132 | 		uint *degree = new uint[num_nodes];
133 | 		for(uint i=0; i<num_nodes; i++)
134 | 			degree[i] = 0;
135 | 		for(uint i=0; i<num_edges; i++)
136 | 			degree[edges[i].source]++;
137 | 		
138 | 		uint counter=0;
139 | 		for(uint i=0; i<num_nodes; i++)
140 | 		{
141 | 			nodePointer[i] = counter;
142 | 			counter = counter + degree[i];
143 | 		}
144 | 		uint *outDegreeCounter  = new uint[num_nodes];
145 | 		uint location;  
146 | 		for(uint i=0; i<num_edges; i++)
147 | 		{
148 | 			uint location = nodePointer[edges[i].source] + outDegreeCounter[edges[i].source];
149 | 			edgeList[location].end = edges[i].end;
150 | 			edgeList[location].w8 = edges[i].w8;
151 | 			outDegreeCounter[edges[i].source]++;  
152 | 		}
153 | 		edges.clear();
154 | 		delete[] degree;
155 | 		delete[] outDegreeCounter;
156 | 		
157 | 		std::ofstream outfile(input.substr(0, input.length()-3)+"bwcsr", std::ofstream::binary);
158 | 		
159 | 		outfile.write((char*)&num_nodes, sizeof(unsigned int));
160 | 		outfile.write((char*)&num_edges, sizeof(unsigned int));
161 | 		outfile.write ((char*)nodePointer, sizeof(unsigned int)*num_nodes);
162 | 		outfile.write ((char*)edgeList, sizeof(OutEdgeWeighted)*num_edges);
163 | 		
164 | 		outfile.close();
165 | 	}
166 | 	else
167 | 	{
168 | 		cout << "\nInput file format is not supported.\n";
169 | 		exit(0);
170 | 	}
171 | 
172 | }
173 | 


--------------------------------------------------------------------------------