├── bfs
    ├── ref_bfs.o
    ├── Makefile
    ├── bfs.h
    ├── bfs.cpp
    ├── grade.cpp
    └── main.cpp
├── handout
    ├── AMI.png
    ├── storage.png
    ├── ip_address.png
    ├── instance_nav.png
    ├── instance_type.png
    └── new_key_pair.png
├── create_submission.sh
├── tools
    ├── Makefile
    ├── plaintext.graph
    └── graphTools.cpp
├── common
    ├── contracts.h
    ├── graph.h
    ├── graph_internal.h
    ├── grade.h
    ├── CycleTimer.h
    └── graph.cpp
├── cloud_readme.md
└── README.md


/bfs/ref_bfs.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/bfs/ref_bfs.o


--------------------------------------------------------------------------------
/handout/AMI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/AMI.png


--------------------------------------------------------------------------------
/handout/storage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/storage.png


--------------------------------------------------------------------------------
/handout/ip_address.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/ip_address.png


--------------------------------------------------------------------------------
/handout/instance_nav.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/instance_nav.png


--------------------------------------------------------------------------------
/handout/instance_type.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/instance_type.png


--------------------------------------------------------------------------------
/handout/new_key_pair.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/new_key_pair.png


--------------------------------------------------------------------------------
/create_submission.sh:
--------------------------------------------------------------------------------
1 | # archive bfs/bfs.cpp and bfs/bfs.h into bfs.tar.gz
2 | 
3 | tar -czvf asst5.tar.gz bfs/bfs.cpp bfs/bfs.h


--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
1 | BINARYNAME=graphTools
2 | 
3 | main:
4 | 	g++ -std=c++11 -g -O3 -o ${BINARYNAME} graphTools.cpp ../common/graph.cpp
5 | clean:
6 | 	rm -rf pr *~ *.*~ ${BINARYNAME}
7 | 


--------------------------------------------------------------------------------
/tools/plaintext.graph:
--------------------------------------------------------------------------------
 1 | AdjacencyGraph
 2 | # num vertices
 3 | 5
 4 | # num edges
 5 | 8
 6 | # edge starts
 7 | 0 4 6 7 8
 8 | # all the outgoing edges (target vertex)
 9 | 1 2 3 4
10 | 2 3
11 | 0
12 | 0
13 | 


--------------------------------------------------------------------------------
/bfs/Makefile:
--------------------------------------------------------------------------------
1 | all: default grade
2 | 
3 | default: main.cpp bfs.cpp
4 | 	g++ -I../ -std=c++11 -fopenmp -O3 -g -o bfs main.cpp bfs.cpp ../common/graph.cpp ref_bfs.o
5 | grade: grade.cpp bfs.cpp
6 | 	g++ -I../ -std=c++11 -fopenmp -O3 -g -o bfs_grader grade.cpp bfs.cpp ../common/graph.cpp ref_bfs.o
7 | clean:
8 | 	rm -rf bfs_grader bfs  *~ *.*~
9 | 


--------------------------------------------------------------------------------
/bfs/bfs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __BFS_H__
 2 | #define __BFS_H__
 3 | 
 4 | //#define DEBUG
 5 | 
 6 | #include "common/graph.h"
 7 | 
 8 | struct solution
 9 | {
10 |   int *distances;
11 | };
12 | 
13 | struct vertex_set {
14 |   // # of vertices in the set
15 |   int count;
16 |   // max size of buffer vertices 
17 |   int max_vertices;
18 |   // array of vertex ids in set
19 |   int *vertices;
20 | };
21 | 
22 | 
23 | void bfs_top_down(Graph graph, solution* sol);
24 | void bfs_bottom_up(Graph graph, solution* sol);
25 | void bfs_hybrid(Graph graph, solution* sol);
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/common/contracts.h:
--------------------------------------------------------------------------------
 1 | /* Debugging with contracts; simulating cc0 -d
 2 |  * Enable with gcc -DDEBUG ...
 3 |  *
 4 |  * 15-122 Principles of Imperative Computation
 5 |  * Frank Pfenning
 6 |  */
 7 | 
 8 | #include <assert.h>
 9 | 
10 | /* Unlike typical header files, "contracts.h" may be
11 |  * included multiple times, with and without DEBUG defined.
12 |  * For this to succeed we first undefine the macros in
13 |  * question in order to avoid a redefinition warning.
14 |  */
15 | 
16 | #undef ASSERT
17 | #undef REQUIRES
18 | #undef ENSURES
19 | 
20 | #ifdef DEBUG
21 | 
22 | #define ASSERT(COND) assert(COND)
23 | #define REQUIRES(COND) assert(COND)
24 | #define ENSURES(COND) assert(COND)
25 | 
26 | #else
27 | 
28 | #define ASSERT(COND) ((void)0)
29 | #define REQUIRES(COND) ((void)0)
30 | #define ENSURES(COND) ((void)0)
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/common/graph.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GRAPH_H__
 2 | #define __GRAPH_H__
 3 | 
 4 | using Vertex = int;
 5 | 
 6 | struct graph
 7 | {
 8 |     // Number of edges in the graph
 9 |     int num_edges;
10 |     // Number of vertices in the graph
11 |     int num_nodes;
12 | 
13 |     // The node reached by vertex i's first outgoing edge is given by
14 |     // outgoing_edges[outgoing_starts[i]].  To iterate over all
15 |     // outgoing edges, please see the top-down bfs implementation.
16 |     int* outgoing_starts;
17 |     Vertex* outgoing_edges;
18 | 
19 |     int* incoming_starts;
20 |     Vertex* incoming_edges;
21 | };
22 | 
23 | using Graph = graph*;
24 | 
25 | /* Getters */
26 | static inline int num_nodes(const Graph);
27 | static inline int num_edges(const Graph);
28 | 
29 | static inline const Vertex* outgoing_begin(const Graph, Vertex);
30 | static inline const Vertex* outgoing_end(const Graph, Vertex);
31 | static inline int outgoing_size(const Graph, Vertex);
32 | 
33 | static inline const Vertex* incoming_begin(const Graph, Vertex);
34 | static inline const Vertex* incoming_end(const Graph, Vertex);
35 | static inline int incoming_size(const Graph, Vertex);
36 | 
37 | 
38 | /* IO */
39 | Graph load_graph(const char* filename);
40 | Graph load_graph_binary(const char* filename);
41 | void store_graph_binary(const char* filename, Graph);
42 | 
43 | void print_graph(const graph*);
44 | 
45 | 
46 | /* Deallocation */
47 | void free_graph(Graph);
48 | 
49 | 
50 | /* Included here to enable inlining. Don't look. */
51 | #include "graph_internal.h"
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/common/graph_internal.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GRAPH_INTERNAL_H__
 2 | #define __GRAPH_INTERNAL_H__
 3 | 
 4 | #include <stdlib.h>
 5 | #include "contracts.h"
 6 | 
 7 | static inline int num_nodes(const Graph graph)
 8 | {
 9 |   REQUIRES(graph != NULL);
10 |   return graph->num_nodes;
11 | }
12 | 
13 | static inline int num_edges(const Graph graph)
14 | {
15 |   REQUIRES(graph != NULL);
16 |   return graph->num_edges;
17 | }
18 | 
19 | static inline const Vertex* outgoing_begin(const Graph g, Vertex v)
20 | {
21 |   REQUIRES(g != NULL);
22 |   REQUIRES(0 <= v && v < num_nodes(g));
23 |   return g->outgoing_edges + g->outgoing_starts[v];
24 | }
25 | 
26 | static inline const Vertex* outgoing_end(const Graph g, Vertex v)
27 | {
28 |   REQUIRES(g != NULL);
29 |   REQUIRES(0 <= v && v < num_nodes(g));
30 |   int offset = (v == g->num_nodes - 1) ? g->num_edges : g->outgoing_starts[v + 1];
31 |   return g->outgoing_edges + offset;
32 | }
33 | 
34 | static inline int outgoing_size(const Graph g, Vertex v)
35 | {
36 |   REQUIRES(g != NULL);
37 |   REQUIRES(0 <= v && v < num_nodes(g));
38 |   if (v == g->num_nodes - 1) {
39 |     return g->num_edges - g->outgoing_starts[v];
40 |   } else {
41 |     return g->outgoing_starts[v + 1] - g->outgoing_starts[v];
42 |   }
43 | }
44 | 
45 | static inline const Vertex* incoming_begin(const Graph g, Vertex v)
46 | {
47 |   REQUIRES(g != NULL);
48 |   REQUIRES(0 <= v && v < num_nodes(g));
49 |   return g->incoming_edges + g->incoming_starts[v];
50 | }
51 | 
52 | static inline const Vertex* incoming_end(const Graph g, Vertex v)
53 | {
54 |   REQUIRES(g != NULL);
55 |   REQUIRES(0 <= v && v < num_nodes(g));
56 |   int offset = (v == g->num_nodes - 1) ? g->num_edges : g->incoming_starts[v + 1];
57 |   return g->incoming_edges + offset;
58 | }
59 | 
60 | static inline int incoming_size(const Graph g, Vertex v)
61 | {
62 |   REQUIRES(g != NULL);
63 |   REQUIRES(0 <= v && v < num_nodes(g));
64 |   if (v == g->num_nodes - 1) {
65 |     return g->num_edges - g->incoming_starts[v];
66 |   } else {
67 |     return g->incoming_starts[v + 1] - g->incoming_starts[v];
68 |   }
69 | }
70 | 
71 | #endif // __GRAPH_INTERNAL_H__
72 | 


--------------------------------------------------------------------------------
/cloud_readme.md:
--------------------------------------------------------------------------------
 1 | # AWS Setup Instructions #
 2 | 
 3 | For performance testing, you will need to run your code on a VM instance on Amazon Web Services (AWS). Here are the steps for how to get setup for running on AWS. Note that if you are using the shared cluster, you do not need to do any setup.
 4 | 
 5 | NOTE: __Please don't forget to SHUT DOWN your instances when you're done for the day to avoid burning through credits overnight!__
 6 | 
 7 | ### Creating a VM with 32 vCPU ###
 8 |       
 9 | 1. Navigate to your "Instances" page in AWS by selecting the correct link in the left hand menu.
10 | ![Instance Page](handout/instance_nav.png?raw=true)
11 | 
12 | 2. Click on the button that says `Launch Instances`. Choose the `Ubuntu Server 20.04 LTS (HVM), SSD Volume Type` AMI:
13 | ![AMI Selection](handout/AMI.png?raw=true)
14 | 
15 | 3. Choose the `m5.8xlarge` Instance Type
16 | ![instance](handout/instance_type.png?raw=true)
17 | 
18 | 4. Next, under `Key pair (login)`, choose a key pair. You can use the same key pair from assignment 3. Alternatively, you can create a new one. To create a new one, click `Create new key pair` and give it whatever name you'd like. This will download a keyfile to your computer called `<key_name>.pem` which you will use to login to the VM instance you are about to create.
19 | ![Key Pair](handout/new_key_pair.png?raw=true)
20 | 
21 | 5. Scroll down to `Configure storage` and change the size of the `Root volume` to 100 GiB to accomodate the packages we will need to install to make the instance functional for the assignment.
22 | ![Storage](handout/storage.png?raw=true)
23 | 
24 | 6. Once you've done all previous steps, scroll to the bottom and click `Launch instance`
25 | 
26 | __Note: `m5.8xlarge` instances cost $1.792 / hour, so leaving one running for a whole day will consume $43 worth of your AWS coupon.__
27 | 
28 | 7. Now that you've created your VM, you should be able to __SSH__ into it. You need the public IP address to SSH into it, which you can find on the instance page by clicking the `View All Instances` button on the current page and then the instance ID for your created instance (note, it may take a moment for the instance to startup and be assigned an IP address):
29 | ![IP Address](handout/ip_address.png?raw=true)
30 | Once you have the IP address, you can login to the instance by running this command:
31 | ~~~~
32 | ssh -i path/to/key_name.pem ubuntu@<public_ip_address>
33 | ~~~~
34 | 
35 | 8. Once you SSH into your VM instance, you'll want to install whatever software you need to make the machine a useful development environment for you.  For example we recommend:
36 | ~~~~
37 | sudo apt update
38 | sudo apt install emacs25
39 | sudo apt install make
40 | sudo apt install g++
41 | ~~~~
42 | 
43 | If you're confused about any of the steps, having problems with setting up your account or have any additional questions, reach us out on Ed!
44 |   
45 | __Again, please don't forget to SHUT DOWN your instances when you're done with your work for the day!__
46 | 


--------------------------------------------------------------------------------
/bfs/bfs.cpp:
--------------------------------------------------------------------------------
  1 | #include "bfs.h"
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | #include <cstddef>
  7 | #include <omp.h>
  8 | 
  9 | #include "../common/CycleTimer.h"
 10 | #include "../common/graph.h"
 11 | 
 12 | #define ROOT_NODE_ID 0
 13 | #define NOT_VISITED_MARKER -1
 14 | 
 15 | void vertex_set_clear(vertex_set* list) {
 16 |     list->count = 0;
 17 | }
 18 | 
 19 | void vertex_set_init(vertex_set* list, int count) {
 20 |     list->max_vertices = count;
 21 |     list->vertices = (int*)malloc(sizeof(int) * list->max_vertices);
 22 |     vertex_set_clear(list);
 23 | }
 24 | 
 25 | // Take one step of "top-down" BFS.  For each vertex on the frontier,
 26 | // follow all outgoing edges, and add all neighboring vertices to the
 27 | // new_frontier.
 28 | void top_down_step(
 29 |     Graph g,
 30 |     vertex_set* frontier,
 31 |     vertex_set* new_frontier,
 32 |     int* distances)
 33 | {
 34 | 
 35 |     for (int i=0; i<frontier->count; i++) {
 36 | 
 37 |         int node = frontier->vertices[i];
 38 | 
 39 |         int start_edge = g->outgoing_starts[node];
 40 |         int end_edge = (node == g->num_nodes - 1)
 41 |                            ? g->num_edges
 42 |                            : g->outgoing_starts[node + 1];
 43 | 
 44 |         // attempt to add all neighbors to the new frontier
 45 |         for (int neighbor=start_edge; neighbor<end_edge; neighbor++) {
 46 |             int outgoing = g->outgoing_edges[neighbor];
 47 | 
 48 |             if (distances[outgoing] == NOT_VISITED_MARKER) {
 49 |                 distances[outgoing] = distances[node] + 1;
 50 |                 int index = new_frontier->count++;
 51 |                 new_frontier->vertices[index] = outgoing;
 52 |             }
 53 |         }
 54 |     }
 55 | }
 56 | 
 57 | // Implements top-down BFS.
 58 | //
 59 | // Result of execution is that, for each node in the graph, the
 60 | // distance to the root is stored in sol.distances.
 61 | void bfs_top_down(Graph graph, solution* sol) {
 62 | 
 63 |     vertex_set list1;
 64 |     vertex_set list2;
 65 |     vertex_set_init(&list1, graph->num_nodes);
 66 |     vertex_set_init(&list2, graph->num_nodes);
 67 | 
 68 |     vertex_set* frontier = &list1;
 69 |     vertex_set* new_frontier = &list2;
 70 | 
 71 |     // initialize all nodes to NOT_VISITED
 72 |     for (int i=0; i<graph->num_nodes; i++)
 73 |         sol->distances[i] = NOT_VISITED_MARKER;
 74 | 
 75 |     // setup frontier with the root node
 76 |     frontier->vertices[frontier->count++] = ROOT_NODE_ID;
 77 |     sol->distances[ROOT_NODE_ID] = 0;
 78 | 
 79 |     while (frontier->count != 0) {
 80 | 
 81 | #ifdef VERBOSE
 82 |         double start_time = CycleTimer::currentSeconds();
 83 | #endif
 84 | 
 85 |         vertex_set_clear(new_frontier);
 86 | 
 87 |         top_down_step(graph, frontier, new_frontier, sol->distances);
 88 | 
 89 | #ifdef VERBOSE
 90 |     double end_time = CycleTimer::currentSeconds();
 91 |     printf("frontier=%-10d %.4f sec\n", frontier->count, end_time - start_time);
 92 | #endif
 93 | 
 94 |         // swap pointers
 95 |         vertex_set* tmp = frontier;
 96 |         frontier = new_frontier;
 97 |         new_frontier = tmp;
 98 |     }
 99 | }
100 | 
101 | void bfs_bottom_up(Graph graph, solution* sol)
102 | {
103 |     // CS149 students:
104 |     //
105 |     // You will need to implement the "bottom up" BFS here as
106 |     // described in the handout.
107 |     //
108 |     // As a result of your code's execution, sol.distances should be
109 |     // correctly populated for all nodes in the graph.
110 |     //
111 |     // As was done in the top-down case, you may wish to organize your
112 |     // code by creating subroutine bottom_up_step() that is called in
113 |     // each step of the BFS process.
114 | }
115 | 
116 | void bfs_hybrid(Graph graph, solution* sol)
117 | {
118 |     // CS149 students:
119 |     //
120 |     // You will need to implement the "hybrid" BFS here as
121 |     // described in the handout.
122 | }
123 | 


--------------------------------------------------------------------------------
/common/grade.h:
--------------------------------------------------------------------------------
  1 | #ifndef __GRADE_H__
  2 | #define __GRADE_H__
  3 | 
  4 | #include <stdio.h>
  5 | #include <sstream>
  6 | #include <iomanip>
  7 | #include <chrono>
  8 | 
  9 | #include <type_traits>
 10 | #include <utility>
 11 | 
 12 | #include <float.h>
 13 | #include <cmath>
 14 | 
 15 | #include <omp.h>
 16 | 
 17 | #include "graph.h"
 18 | #include "graph_internal.h"
 19 | #include "contracts.h"
 20 | 
 21 | // Epsilon for approximate float comparisons
 22 | #define EPSILON 0.00000000001
 23 | 
 24 | // Output column size
 25 | #define COL_SIZE 15
 26 | 
 27 | // Point value for apps that are not run.
 28 | #define POINTS_NA -1
 29 | 
 30 | // Point value for apps that yeilded incorrect results.
 31 | #define POINTS_INCORRECT -2
 32 | 
 33 | /*
 34 |  * Printing functions
 35 |  */
 36 | 
 37 | static void sep(std::ostream& out, char separator = '-', int length = 78)
 38 | {
 39 |     for (int i = 0; i < length; i++)
 40 |       out << separator;
 41 |     out << std::endl;
 42 | }
 43 | 
 44 | static void printTimingApp(std::ostream& timing, const char* appName)
 45 | {
 46 |   std::cout << std::endl;
 47 |   std::cout << "Timing results for " << appName << ":" << std::endl;
 48 |   sep(std::cout, '=', 75);
 49 | 
 50 |   timing << std::endl;
 51 |   timing << "Timing results for " << appName << ":" << std::endl;
 52 |   sep(timing, '=', 75);
 53 | }
 54 | 
 55 | /*
 56 |  * Correctness checkers
 57 |  */
 58 | 
 59 | template <class T>
 60 | bool compareArrays(Graph graph, T* ref, T* stu)
 61 | {
 62 |   for (int i = 0; i < graph->num_nodes; i++) {
 63 |     if (ref[i] != stu[i]) {
 64 |       std::cerr << "*** Results disagree at " << i << " expected " 
 65 |         << ref[i] << " found " << stu[i] << std::endl;
 66 |       return false;
 67 |     }
 68 |   }
 69 |   return true;
 70 | }
 71 | 
 72 | template <class T>
 73 | bool compareApprox(Graph graph, T* ref, T* stu)
 74 | {
 75 |   for (int i = 0; i < graph->num_nodes; i++) {
 76 |     if (fabs(ref[i] - stu[i]) > EPSILON) {
 77 |       std::cerr << "*** Results disagree at " << i << " expected " 
 78 |         << ref[i] << " found " << stu[i] << std::endl;
 79 |       return false;
 80 |     }
 81 |   }
 82 |   return true;
 83 | }
 84 | 
 85 | template <class T>
 86 | bool compareArraysAndDisplay(Graph graph, T* ref, T*stu) 
 87 | {
 88 |   printf("\n----------------------------------\n");
 89 |   printf("Visualization of student results");
 90 |   printf("\n----------------------------------\n\n");
 91 | 
 92 |   int grid_dim = (int)sqrt(graph->num_nodes);
 93 |   for (int j=0; j<grid_dim; j++) {
 94 |     for (int i=0; i<grid_dim; i++) {
 95 |       printf("%02d ", stu[j*grid_dim + i]);
 96 |     }
 97 |     printf("\n");
 98 |   }
 99 |   printf("\n----------------------------------\n");
100 |   printf("Visualization of reference results");
101 |   printf("\n----------------------------------\n\n");
102 | 
103 |   grid_dim = (int)sqrt(graph->num_nodes);
104 |   for (int j=0; j<grid_dim; j++) {
105 |     for (int i=0; i<grid_dim; i++) {
106 |       printf("%02d ", ref[j*grid_dim + i]);
107 |     }
108 |     printf("\n");
109 |   }
110 |   
111 |   return compareArrays<T>(graph, ref, stu);
112 | }
113 | 
114 | template <class T>
115 | bool compareArraysAndRadiiEst(Graph graph, T* ref, T* stu) 
116 | {
117 |   bool isCorrect = true;
118 |   for (int i = 0; i < graph->num_nodes; i++) {
119 |     if (ref[i] != stu[i]) {
120 |       std::cerr << "*** Results disagree at " << i << " expected "
121 |         << ref[i] << " found " << stu[i] << std::endl;
122 | 	isCorrect = false;
123 |     }
124 |   }
125 |   int stuMaxVal = -1;
126 |   int refMaxVal = -1;
127 |   #pragma omp parallel for schedule(dynamic, 512) reduction(max: stuMaxVal)
128 |   for (int i = 0; i < graph->num_nodes; i++) {
129 | 	if (stu[i] > stuMaxVal)
130 | 		stuMaxVal = stu[i];
131 |   }
132 |   #pragma omp parallel for schedule(dynamic, 512) reduction(max: refMaxVal)
133 |   for (int i = 0; i < graph->num_nodes; i++) {
134 |         if (ref[i] > refMaxVal)
135 |                 refMaxVal = ref[i];
136 |   }
137 |  
138 |   if (refMaxVal != stuMaxVal) {
139 | 	std::cerr << "*** Radius estimates differ. Expected: " << refMaxVal << " Got: " << stuMaxVal << std::endl;
140 | 	isCorrect = false;
141 |   }   
142 |   return isCorrect;
143 | }
144 | 
145 | #endif /* __GRADE_H__ */
146 | 


--------------------------------------------------------------------------------
/common/CycleTimer.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SYRAH_CYCLE_TIMER_H_
  2 | #define _SYRAH_CYCLE_TIMER_H_
  3 | 
  4 | #if defined(__APPLE__)
  5 |   #if defined(__x86_64__)
  6 |     #include <sys/sysctl.h>
  7 |   #else
  8 |     #include <mach/mach.h>
  9 |     #include <mach/mach_time.h>
 10 |   #endif // __x86_64__ or not
 11 | 
 12 |   #include <stdio.h>  // fprintf
 13 |   #include <stdlib.h> // exit
 14 | 
 15 | #elif _WIN32
 16 | #  include <windows.h>
 17 | #  include <time.h>
 18 | #else
 19 | #  include <stdio.h>
 20 | #  include <stdlib.h>
 21 | #  include <string.h>
 22 | #  include <sys/time.h>
 23 | #endif
 24 | 
 25 | 
 26 |   // This uses the cycle counter of the processor.  Different
 27 |   // processors in the system will have different values for this.  If
 28 |   // you process moves across processors, then the delta time you
 29 |   // measure will likely be incorrect.  This is mostly for fine
 30 |   // grained measurements where the process is likely to be on the
 31 |   // same processor.  For more global things you should use the
 32 |   // Time interface.
 33 | 
 34 |   // Also note that if you processors' speeds change (i.e. processors
 35 |   // scaling) or if you are in a heterogenous environment, you will
 36 |   // likely get spurious results.
 37 |   class CycleTimer {
 38 |   public:
 39 |     typedef unsigned long long SysClock;
 40 | 
 41 |     //////////
 42 |     // Return the current CPU time, in terms of clock ticks.
 43 |     // Time zero is at some arbitrary point in the past.
 44 |     static SysClock currentTicks() {
 45 | #if defined(__APPLE__) && !defined(__x86_64__)
 46 |       return mach_absolute_time();
 47 | #elif defined(_WIN32)
 48 |       LARGE_INTEGER qwTime;
 49 |       QueryPerformanceCounter(&qwTime);
 50 |       return qwTime.QuadPart;
 51 | #elif defined(__x86_64__)
 52 |       unsigned int a, d;
 53 |       asm volatile("rdtsc" : "=a" (a), "=d" (d));
 54 |       return static_cast<unsigned long long>(a) |
 55 |         (static_cast<unsigned long long>(d) << 32);
 56 | #elif defined(__ARM_NEON__) && 0 // mrc requires superuser.
 57 |       unsigned int val;
 58 |       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
 59 |       return val;
 60 | #else
 61 |       timespec spec;
 62 |       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec);
 63 |       return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec));
 64 | #endif
 65 |     }
 66 | 
 67 |     //////////
 68 |     // Return the current CPU time, in terms of seconds.
 69 |     // This is slower than currentTicks().  Time zero is at
 70 |     // some arbitrary point in the past.
 71 |     static double currentSeconds() {
 72 |       return currentTicks() * secondsPerTick();
 73 |     }
 74 | 
 75 |     //////////
 76 |     // Return the conversion from seconds to ticks.
 77 |     static double ticksPerSecond() {
 78 |       return 1.0/secondsPerTick();
 79 |     }
 80 | 
 81 |     static const char* tickUnits() {
 82 | #if defined(__APPLE__) && !defined(__x86_64__)
 83 |       return "ns";
 84 | #elif defined(__WIN32__) || defined(__x86_64__)
 85 |       return "cycles";
 86 | #else
 87 |       return "ns"; // clock_gettime
 88 | #endif
 89 |     }
 90 | 
 91 |     //////////
 92 |     // Return the conversion from ticks to seconds.
 93 |     static double secondsPerTick() {
 94 |       static bool initialized = false;
 95 |       static double secondsPerTick_val;
 96 |       if (initialized) return secondsPerTick_val;
 97 | #if defined(__APPLE__)
 98 |   #ifdef __x86_64__
 99 |       int args[] = {CTL_HW, HW_CPU_FREQ};
100 |       unsigned int Hz;
101 |       size_t len = sizeof(Hz);
102 |       if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) {
103 |          fprintf(stderr, "Failed to initialize secondsPerTick_val!\n");
104 |          exit(-1);
105 |       }
106 |       secondsPerTick_val = 1.0 / (double) Hz;
107 |   #else
108 |       mach_timebase_info_data_t time_info;
109 |       mach_timebase_info(&time_info);
110 | 
111 |       // Scales to nanoseconds without 1e-9f
112 |       secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/
113 |         static_cast<double>(time_info.denom);
114 |   #endif // x86_64 or not
115 | #elif defined(_WIN32)
116 |       LARGE_INTEGER qwTicksPerSec;
117 |       QueryPerformanceFrequency(&qwTicksPerSec);
118 |       secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart);
119 | #else
120 |       FILE *fp = fopen("/proc/cpuinfo","r");
121 |       char input[1024];
122 |       if (!fp) {
123 |          fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo.");
124 |          exit(-1);
125 |       }
126 |       // In case we don't find it, e.g. on the N900
127 |       secondsPerTick_val = 1e-9;
128 |       while (!feof(fp) && fgets(input, 1024, fp)) {
129 |         // NOTE(boulos): Because reading cpuinfo depends on dynamic
130 |         // frequency scaling it's better to read the @ sign first
131 |         float GHz, MHz;
132 |         if (strstr(input, "model name")) {
133 |           char* at_sign = strstr(input, "@");
134 |           if (at_sign) {
135 |             char* after_at = at_sign + 1;
136 |             char* GHz_str = strstr(after_at, "GHz");
137 |             char* MHz_str = strstr(after_at, "MHz");
138 |             if (GHz_str) {
139 |               *GHz_str = '\0';
140 |               if (1 == sscanf(after_at, "%f", &GHz)) {
141 |                 //printf("GHz = %f\n", GHz);
142 |                 secondsPerTick_val = 1e-9f / GHz;
143 |                 break;
144 |               }
145 |             } else if (MHz_str) {
146 |               *MHz_str = '\0';
147 |               if (1 == sscanf(after_at, "%f", &MHz)) {
148 |                 //printf("MHz = %f\n", MHz);
149 |                 secondsPerTick_val = 1e-6f / GHz;
150 |                 break;
151 |               }
152 |             }
153 |           }
154 |         } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) {
155 |           //printf("MHz = %f\n", MHz);
156 |           secondsPerTick_val = 1e-6f / MHz;
157 |           break;
158 |         }
159 |       }
160 |       fclose(fp);
161 | #endif
162 | 
163 |       initialized = true;
164 |       return secondsPerTick_val;
165 |     }
166 | 
167 |     //////////
168 |     // Return the conversion from ticks to milliseconds.
169 |     static double msPerTick() {
170 |       return secondsPerTick() * 1000.0;
171 |     }
172 | 
173 |   private:
174 |     CycleTimer();
175 |   };
176 | 
177 | #endif // #ifndef _SYRAH_CYCLE_TIMER_H_
178 | 


--------------------------------------------------------------------------------
/common/graph.cpp:
--------------------------------------------------------------------------------
  1 | #include <string>
  2 | #include <iostream>
  3 | #include <fstream>
  4 | #include <sstream>
  5 | #include <cstdlib>
  6 | 
  7 | #include "graph.h"
  8 | #include "graph_internal.h"
  9 | 
 10 | #define GRAPH_HEADER_TOKEN ((int) 0xDEADBEEF)
 11 | 
 12 | 
 13 | void free_graph(Graph graph)
 14 | {
 15 |   free(graph->outgoing_starts);
 16 |   free(graph->outgoing_edges);
 17 | 
 18 |   free(graph->incoming_starts);
 19 |   free(graph->incoming_edges);
 20 |   free(graph);
 21 | }
 22 | 
 23 | 
 24 | void build_start(graph* graph, int* scratch)
 25 | {
 26 |   int num_nodes = graph->num_nodes;
 27 |   graph->outgoing_starts = (int*)malloc(sizeof(int) * num_nodes);
 28 |   for(int i = 0; i < num_nodes; i++)
 29 |   {
 30 |     graph->outgoing_starts[i] = scratch[i];
 31 |   }
 32 | }
 33 | 
 34 | void build_edges(graph* graph, int* scratch)
 35 | {
 36 |   int num_nodes = graph->num_nodes;
 37 |   graph->outgoing_edges = (int*)malloc(sizeof(int) * graph->num_edges);
 38 |   for(int i = 0; i < graph->num_edges; i++)
 39 |   {
 40 |     graph->outgoing_edges[i] = scratch[num_nodes + i];
 41 |   }
 42 | }
 43 | 
 44 | // Given an outgoing edge adjacency list representation for a directed
 45 | // graph, build an incoming adjacency list representation
 46 | void build_incoming_edges(graph* graph) {
 47 | 
 48 |     //printf("Beginning build_incoming... (%d nodes)\n", graph->num_nodes);
 49 | 
 50 |     int num_nodes = graph->num_nodes;
 51 |     int* node_counts = (int*)malloc(sizeof(int) * num_nodes);
 52 |     int* node_scatter = (int*)malloc(sizeof(int) * num_nodes);
 53 | 
 54 |     graph->incoming_starts = (int*)malloc(sizeof(int) * num_nodes);
 55 |     graph->incoming_edges = (int*)malloc(sizeof(int) * graph->num_edges);
 56 | 
 57 |     for (int i=0; i<num_nodes; i++)
 58 |         node_counts[i] = node_scatter[i] = 0;
 59 | 
 60 |     int total_edges = 0;
 61 |     // compute number of incoming edges per node
 62 |     for (int i=0; i<num_nodes; i++) {
 63 |         int start_edge = graph->outgoing_starts[i];
 64 |         int end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1];
 65 |         for (int j=start_edge; j<end_edge; j++) {
 66 |             int target_node = graph->outgoing_edges[j];
 67 |             node_counts[target_node]++;
 68 |             total_edges++;
 69 |         }
 70 |     }
 71 |     //printf("Total edges: %d\n", total_edges);
 72 |     //printf("Computed incoming edge counts.\n");
 73 | 
 74 |     // build the starts array
 75 |     graph->incoming_starts[0] = 0;
 76 |     for (int i=1; i<num_nodes; i++) {
 77 |         graph->incoming_starts[i] = graph->incoming_starts[i-1] + node_counts[i-1];
 78 |         //printf("%d: %d ", i, graph->incoming_starts[i]);
 79 |     }
 80 |     //printf("\n");
 81 |     //printf("Last edge=%d\n", graph->incoming_starts[num_nodes-1] + node_counts[num_nodes-1]);
 82 | 
 83 |     //printf("Computed per-node incoming starts.\n");
 84 | 
 85 |     // now perform the scatter
 86 |     for (int i=0; i<num_nodes; i++) {
 87 |         int start_edge = graph->outgoing_starts[i];
 88 |         int end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1];
 89 |         for (int j=start_edge; j<end_edge; j++) {
 90 |             int target_node = graph->outgoing_edges[j];
 91 |             graph->incoming_edges[graph->incoming_starts[target_node] + node_scatter[target_node]] = i;
 92 |             node_scatter[target_node]++;
 93 |         }
 94 |     }
 95 | 
 96 |     /*
 97 |     // verify
 98 |     printf("Verifying graph...\n");
 99 | 
100 |     for (int i=0; i<num_nodes; i++) {
101 |         int outgoing_starts = graph->outgoing_starts[i];
102 |         int end_node = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1];
103 |         for (int j=outgoing_starts; j<end_node; j++) {
104 | 
105 |             bool verified = false;
106 | 
107 |             // make sure that i is a neighbor of target_node
108 |             int target_node = graph->outgoing_edges[j];
109 |             int j_start_edge = graph->incoming_starts[target_node];
110 |             int j_end_edge = (target_node == graph->num_nodes-1) ? graph->num_edges : graph->incoming_starts[target_node+1];
111 |             for (int k=j_start_edge; k<j_end_edge; k++) {
112 |                 if (graph->incoming_edges[k] == i) {
113 |                     verified = true;
114 |                     break;
115 |                 }
116 |             }
117 | 
118 |             if (!verified) {
119 |                 fprintf(stderr,"Error: %d,%d did not verify\n", i, target_node);
120 |             }
121 |         }
122 |     }
123 | 
124 |     printf("Done verifying\n");
125 |     */
126 | 
127 |     free(node_counts);
128 |     free(node_scatter);
129 | }
130 | 
131 | void get_meta_data(std::ifstream& file, graph* graph)
132 | {
133 |   // going back to the beginning of the file
134 |   file.clear();
135 |   file.seekg(0, std::ios::beg);
136 |   std::string buffer;
137 |   std::getline(file, buffer);
138 |   if ((buffer.compare(std::string("AdjacencyGraph"))))
139 |   {
140 |     std::cout << "Invalid input file" << buffer << std::endl;
141 |     exit(1);
142 |   }
143 |   buffer.clear();
144 | 
145 |   do {
146 |       std::getline(file, buffer);
147 |   } while (buffer.size() == 0 || buffer[0] == '#');
148 | 
149 |   graph->num_nodes = atoi(buffer.c_str());
150 |   buffer.clear();
151 | 
152 |   do {
153 |       std::getline(file, buffer);
154 |   } while (buffer.size() == 0 || buffer[0] == '#');
155 | 
156 |   graph->num_edges = atoi(buffer.c_str());
157 | 
158 | }
159 | 
160 | void read_graph_file(std::ifstream& file, int* scratch)
161 | {
162 |   std::string buffer;
163 |   int idx = 0;
164 |   while(!file.eof())
165 |   {
166 |     buffer.clear();
167 |     std::getline(file, buffer);
168 | 
169 |     if (buffer.size() > 0 && buffer[0] == '#')
170 |         continue;
171 | 
172 |     std::stringstream parse(buffer);
173 |     while (!parse.fail()) {
174 |         int v;
175 |         parse >> v;
176 |         if (parse.fail())
177 |         {
178 |             break;
179 |         }
180 |         scratch[idx] = v;
181 |         idx++;
182 |     }
183 |   }
184 | }
185 | 
186 | void print_graph(const graph* graph)
187 | {
188 | 
189 |     printf("Graph pretty print:\n");
190 |     printf("num_nodes=%d\n", graph->num_nodes);
191 |     printf("num_edges=%d\n", graph->num_edges);
192 | 
193 |     for (int i=0; i<graph->num_nodes; i++) {
194 | 
195 |         int start_edge = graph->outgoing_starts[i];
196 |         int end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1];
197 |         printf("node %02d: out=%d: ", i, end_edge - start_edge);
198 |         for (int j=start_edge; j<end_edge; j++) {
199 |             int target = graph->outgoing_edges[j];
200 |             printf("%d ", target);
201 |         }
202 |         printf("\n");
203 | 
204 |         start_edge = graph->incoming_starts[i];
205 |         end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->incoming_starts[i+1];
206 |         printf("         in=%d: ", end_edge - start_edge);
207 |         for (int j=start_edge; j<end_edge; j++) {
208 |             int target = graph->incoming_edges[j];
209 |             printf("%d ", target);
210 |         }
211 |         printf("\n");
212 |     }
213 | }
214 | 
215 | Graph load_graph(const char* filename)
216 | {
217 |   graph* graph = (struct graph*)(malloc(sizeof(struct graph)));
218 | 
219 |   // open the file
220 |   std::ifstream graph_file;
221 |   graph_file.open(filename);
222 |   get_meta_data(graph_file, graph);
223 | 
224 |   int* scratch = (int*) malloc(sizeof(int) * (graph->num_nodes + graph->num_edges));
225 |   read_graph_file(graph_file, scratch);
226 | 
227 |   build_start(graph, scratch);
228 |   build_edges(graph, scratch);
229 |   free(scratch);
230 | 
231 |   build_incoming_edges(graph);
232 | 
233 |   //print_graph(graph);
234 | 
235 |   return graph;
236 | }
237 | 
238 | Graph load_graph_binary(const char* filename)
239 | {
240 |     graph* graph = (struct graph*)(malloc(sizeof(struct graph)));
241 | 
242 |     FILE* input = fopen(filename, "rb");
243 | 
244 |     if (!input) {
245 |         fprintf(stderr, "Could not open: %s\n", filename);
246 |         exit(1);
247 |     }
248 | 
249 |     int header[3];
250 | 
251 |     if (fread(header, sizeof(int), 3, input) != 3) {
252 |         fprintf(stderr, "Error reading header.\n");
253 |         exit(1);
254 |     }
255 | 
256 |     if (header[0] != GRAPH_HEADER_TOKEN) {
257 |         fprintf(stderr, "Invalid graph file header. File may be corrupt.\n");
258 |         exit(1);
259 |     }
260 | 
261 |     graph->num_nodes = header[1];
262 |     graph->num_edges = header[2];
263 | 
264 |     graph->outgoing_starts = (int*)malloc(sizeof(int) * graph->num_nodes);
265 |     graph->outgoing_edges = (int*)malloc(sizeof(int) * graph->num_edges);
266 | 
267 |     if (fread(graph->outgoing_starts, sizeof(int), graph->num_nodes, input) != (size_t) graph->num_nodes) {
268 |         fprintf(stderr, "Error reading nodes.\n");
269 |         exit(1);
270 |     }
271 | 
272 |     if (fread(graph->outgoing_edges, sizeof(int), graph->num_edges, input) != (size_t) graph->num_edges) {
273 |         fprintf(stderr, "Error reading edges.\n");
274 |         exit(1);
275 |     }
276 | 
277 |     fclose(input);
278 | 
279 |     build_incoming_edges(graph);
280 |     //print_graph(graph);
281 |     return graph;
282 | }
283 | 
284 | void store_graph_binary(const char* filename, Graph graph) {
285 | 
286 |     FILE* output = fopen(filename, "wb");
287 | 
288 |     if (!output) {
289 |         fprintf(stderr, "Could not open: %s\n", filename);
290 |         exit(1);
291 |     }
292 | 
293 |     int header[3];
294 |     header[0] = GRAPH_HEADER_TOKEN;
295 |     header[1] = graph->num_nodes;
296 |     header[2] = graph->num_edges;
297 | 
298 |     if (fwrite(header, sizeof(int), 3, output) != 3) {
299 |         fprintf(stderr, "Error writing header.\n");
300 |         exit(1);
301 |     }
302 | 
303 |     if (fwrite(graph->outgoing_starts, sizeof(int), graph->num_nodes, output) != (size_t) graph->num_nodes) {
304 |         fprintf(stderr, "Error writing nodes.\n");
305 |         exit(1);
306 |     }
307 | 
308 |     if (fwrite(graph->outgoing_edges, sizeof(int), graph->num_edges, output) != (size_t) graph->num_edges) {
309 |         fprintf(stderr, "Error writing edges.\n");
310 |         exit(1);
311 |     }
312 | 
313 |     fclose(output);
314 | }
315 | 


--------------------------------------------------------------------------------
/tools/graphTools.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <algorithm>
  3 | #include <climits>
  4 | #include <iomanip>
  5 | #include <iostream>
  6 | #include <string>
  7 | #include <vector>
  8 | 
  9 | 
 10 | #include "../common/graph.h"
 11 | 
 12 | #define CMD_TEXT2BIN    "text2bin"
 13 | #define CMD_INFO        "info"
 14 | #define CMD_PRINT       "print"
 15 | #define CMD_NOOUTEDGES  "noout"
 16 | #define CMD_NOINEDGES   "noin"
 17 | #define CMD_EDGESTATS   "edgestats"
 18 | 
 19 | 
 20 | void print_help(const char* binary_name) {
 21 |     std::cerr << "Usage: " << binary_name << " cmd args\n";
 22 |     std::cerr << "Use '" << binary_name << " cmd' to get command-specific help.\n";
 23 |     std::cerr << "\n";
 24 |     std::cerr << "Valid cmds are:\n\n"
 25 |               << CMD_TEXT2BIN << ": text file to binary file conversion\n"
 26 |               << CMD_INFO << ": print graph metadata\n"
 27 |               << CMD_PRINT << ": print graph topology (careful with big graphs)\n"
 28 |               << CMD_NOOUTEDGES << ": detect vertices with no outgoing edges\n"
 29 |               << CMD_NOINEDGES << ": detect vertices with no incoming edges\n"
 30 |               << CMD_EDGESTATS << ": print stats on graph edges: e.g., min/max edges per node, etc.\n";
 31 | }
 32 | 
 33 | int main(int argc, char** argv) {
 34 | 
 35 |     if (argc < 2) {
 36 |         print_help(argv[0]);
 37 |         exit(1);
 38 |     }
 39 | 
 40 |     std::string cmd = std::string(argv[1]);
 41 | 
 42 |     if (!cmd.compare(CMD_TEXT2BIN)) {
 43 | 
 44 |         if (argc < 4) {
 45 |             std::cerr << "Usage: " << argv[0] << " " << cmd << " textfilename binfilename\n";
 46 |             std::cerr << "Converts a graph from text file format to binary file format\n";
 47 |             exit(1);
 48 |         }
 49 | 
 50 |         std::string inputFilename = std::string(argv[2]);
 51 |         std::string outputFilename = std::string(argv[3]);
 52 | 
 53 |         Graph g;
 54 |         std::cout << "Loading graph: " << inputFilename << "\n";
 55 |         g = load_graph(inputFilename.c_str());
 56 |         std::cout << "Done loading.\n";
 57 |         store_graph_binary(outputFilename.c_str(), g);
 58 |         delete g;
 59 | 
 60 |     } else if (!cmd.compare(CMD_INFO)) {
 61 |         if (argc < 3) {
 62 |             std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n";
 63 |             std::cerr << "Pretty-prints graph info (num vertices, num edges)\n";
 64 |             exit(1);
 65 |         }
 66 | 
 67 |         std::string inputFilename = std::string(argv[2]);
 68 | 
 69 |         Graph g;
 70 |         std::cout << "Loading graph: " << inputFilename << "\n";
 71 |         g = load_graph_binary(inputFilename.c_str());
 72 |         std::cout << "Done loading.\n";
 73 | 
 74 |         std::cout << "Num vertices: " << num_nodes(g) << "\n";
 75 |         std::cout << "Num edges:    " << num_edges(g) << "\n";
 76 |         delete g;
 77 | 
 78 |     } else if (!cmd.compare(CMD_PRINT)) {
 79 | 
 80 |         if (argc < 3) {
 81 |             std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n";
 82 |             std::cerr << "Pretty-prints graph, including edge information (be careful with large graphs)\n";
 83 |             exit(1);
 84 |         }
 85 | 
 86 |         std::string inputFilename = std::string(argv[2]);
 87 | 
 88 |         Graph g;
 89 |         std::cout << "Loading graph: " << inputFilename << "\n";
 90 |         g = load_graph_binary(inputFilename.c_str());
 91 |         std::cout << "Done loading.\n";
 92 |         print_graph(g);
 93 |         delete g;
 94 | 
 95 |     } else if (!cmd.compare(CMD_NOOUTEDGES)) {
 96 | 
 97 |         if (argc < 3) {
 98 |             std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n";
 99 |             std::cerr << "Lists all vertices without outgoing edges.\n";
100 |             exit(1);
101 |         }
102 | 
103 |         std::string inputFilename = std::string(argv[2]);
104 | 
105 |         Graph g;
106 |         std::cout << "Loading graph: " << inputFilename << "\n";
107 |         g = load_graph_binary(inputFilename.c_str());
108 |         std::cout << "Done loading.\n";
109 | 
110 |         std::vector<Vertex> zero_outgoing;
111 | 
112 |         for (int i=0; i<num_nodes(g); i++) {
113 |             if (outgoing_size(g, i) == 0) {
114 |                 zero_outgoing.push_back(i);
115 |             }
116 |         }
117 | 
118 |         std::cout << "Nodes with no outgoing edges:\n";
119 |         for (size_t i=0; i<zero_outgoing.size(); i++) {
120 |             std::cout << zero_outgoing[i] << " ";
121 |         }
122 |         std::cout << "\n";
123 |         std::cout << zero_outgoing.size() << " of " << num_nodes(g) << " nodes have zero outgoing edges ("
124 |                   << std::setprecision(2)
125 |                   << 100.0 * static_cast<double>(zero_outgoing.size())/num_nodes(g) << "\%).\n";
126 |         delete g;
127 | 
128 |     } else if (!cmd.compare(CMD_NOINEDGES)) {
129 | 
130 |         if (argc < 3) {
131 |             std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n";
132 |             std::cerr << "Lists all edges without incoming edges.\n";
133 |             exit(1);
134 |         }
135 | 
136 |         std::string inputFilename = std::string(argv[2]);
137 | 
138 |         Graph g;
139 |         std::cout << "Loading graph: " << inputFilename << "\n";
140 |         g = load_graph_binary(inputFilename.c_str());
141 |         std::cout << "Done loading.\n";
142 | 
143 |         std::vector<Vertex> zero_incoming;
144 | 
145 |         for (int i=0; i<num_nodes(g); i++) {
146 |             if (incoming_size(g, i) == 0) {
147 |                 zero_incoming.push_back(i);
148 |             }
149 |         }
150 | 
151 |         std::cout << "Nodes with no incoming edges:\n";
152 |         for (size_t i=0; i<zero_incoming.size(); i++) {
153 |             std::cout << zero_incoming[i] << " ";
154 |         }
155 |         std::cout << "\n";
156 |         std::cout << zero_incoming.size() << " of " << num_nodes(g) << " nodes have zero incoming edges ("
157 |                   << std::setprecision(2)
158 |                   << 100.0 * static_cast<double>(zero_incoming.size())/num_nodes(g) << "\%).\n";
159 |         delete g;
160 | 
161 |     } else if (!cmd.compare(CMD_EDGESTATS)) {
162 | 
163 |         if (argc < 3) {
164 |             std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n";
165 |             std::cerr << "Print basic stats about edges.\n";
166 |             exit(1);
167 |         }
168 | 
169 |         std::string inputFilename = std::string(argv[2]);
170 | 
171 |         Graph g;
172 |         std::cout << "Loading graph: " << inputFilename << "\n";
173 |         g = load_graph_binary(inputFilename.c_str());
174 |         std::cout << "Done loading. Now analyzing graph...\n";
175 | 
176 |         unsigned int total_incoming = 0;
177 |         unsigned int total_outgoing = 0;
178 |         unsigned int min_outgoing = INT_MAX;
179 |         unsigned int max_outgoing = 0;
180 |         unsigned int min_incoming = INT_MAX;
181 |         unsigned int max_incoming = 0;
182 |         bool is_symmetric = true;
183 | 
184 |         for (int i=0; i<num_nodes(g); i++) {
185 | 
186 |             unsigned int num_incoming = incoming_size(g, i);
187 |             unsigned int num_outgoing = outgoing_size(g, i);
188 | 
189 |             min_outgoing = std::min(min_outgoing, num_outgoing);
190 |             max_outgoing = std::max(max_outgoing, num_outgoing);
191 |             total_outgoing += num_outgoing;
192 | 
193 |             min_incoming = std::min(min_incoming, num_incoming);
194 |             max_incoming = std::max(max_incoming, num_incoming);
195 |             total_incoming += num_incoming;
196 | 
197 |             // check graph for sanity, and test for symmetric directed
198 |             // edges
199 |             const Vertex* out_begin = outgoing_begin(g, i);
200 |             const Vertex* out_end = outgoing_end(g, i);
201 |             for (const Vertex* v=out_begin; v!=out_end; v++) {
202 | 
203 |                 Vertex target = *v;
204 | 
205 |                 // sanity check. vertex i has an outoing edge to target
206 |                 // (i->target), therefore target better have an
207 |                 // incoming edge from i.
208 |                 bool found_matching = false;
209 |                 const Vertex* sanity_begin = incoming_begin(g, target);
210 |                 const Vertex* sanity_end = incoming_end(g, target);
211 |                 for (const Vertex* v2=sanity_begin; v2!=sanity_end; v2++) {
212 |                     Vertex i2 = *v2;
213 |                     if (i == i2) {
214 |                         found_matching = true;
215 |                         break;
216 |                     }
217 |                 }
218 |                 if (!found_matching) {
219 |                     std::cerr << "GRAPH DID NOT PASS SANITY CHECK:\n"
220 |                               << "vertex " << i << " has outgoing edge to " << target << ",\n but "
221 |                               << "vertex " << target << " has no incoming edge from " << i << "\n";
222 | 
223 |                     // abort on a failed sanity check
224 |                     exit(1);
225 |                  }
226 | 
227 |                 // symmetry test: vertex i has an outgoing edge to
228 |                 // target (i->target), so check to see if there's an
229 |                 // incoming edge from target as well (target->i).
230 |                 bool found_symmetric = false;
231 |                 const Vertex* in_start = incoming_begin(g, i);
232 |                 const Vertex* in_end =   incoming_end(g, i);
233 | 
234 |                 for (const Vertex* v2=in_start; v2!=in_end; v2++) {
235 | 
236 |                     Vertex target2 = *v2;
237 | 
238 |                     if (target == target2) {
239 |                         found_symmetric = true;
240 |                         break;
241 |                     }
242 |                 }
243 |                 if (!found_symmetric)
244 |                     is_symmetric = false;
245 | 
246 |             }
247 | 
248 |         }
249 | 
250 |         float avg_outgoing = (float)total_outgoing / num_nodes(g);
251 |         float avg_incoming = (float)total_incoming / num_nodes(g);
252 | 
253 |         std::cout << "=========================================================\n";
254 |         std::cout << "Edge statistics for this graph:\n";
255 |         std::cout << "=========================================================\n";
256 |         std::cout << "The graph " << ((is_symmetric) ? "IS " : "IS NOT ") << "symmetric.\n";
257 |         std::cout << "Outgoing edges: total=" << total_outgoing
258 |                   << " avg=" << avg_outgoing
259 |                   << " min=" << min_outgoing
260 |                   << " max=" << max_outgoing << "\n";
261 | 
262 |         std::cout << "Incoming edges: total=" << total_incoming
263 |                   << " avg=" << avg_incoming
264 |                   << " min=" << min_incoming
265 |                   << " max=" << max_incoming << "\n";
266 |     }
267 | 
268 |     else {
269 |         print_help(argv[0]);
270 |     }
271 | 
272 |     return 0;
273 | }
274 | 


--------------------------------------------------------------------------------
/bfs/grade.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <omp.h>
  4 | #include <string>
  5 | #include <unistd.h>
  6 | #include <limits>
  7 | 
  8 | #include <iostream>
  9 | #include <sstream>
 10 | #include <vector>
 11 | 
 12 | #include "../common/CycleTimer.h"
 13 | #include "../common/graph.h"
 14 | #include "../common/grade.h"
 15 | #include "bfs.h"
 16 | 
 17 | #define USE_BINARY_GRAPH 1
 18 | 
 19 | #define top_down 0
 20 | #define bott_up 1
 21 | #define hybrid 2
 22 | 
 23 | void reference_bfs_bottom_up(Graph graph, solution* sol);
 24 | void reference_bfs_top_down(Graph graph, solution* sol);
 25 | void reference_bfs_hybrid(Graph graph, solution* sol);
 26 | 
 27 | void usage(const char* binary_name) {
 28 |     std::cout << "Usage: " << binary_name << " [options] graphdir" << std::endl;
 29 |     std::cout << std::endl;
 30 |     std::cout << "Options:" << std::endl;
 31 |     std::cout << "  -n  INT number of threads" << std::endl;
 32 |     std::cout << "  -r  INT number of runs" << std::endl;
 33 |     std::cout << "  -h      this commandline help message" << std::endl;
 34 | }
 35 | 
 36 | graph* load_graph(std::string graph_filename) {
 37 |     graph* g;
 38 |     if (USE_BINARY_GRAPH) {
 39 |       g = load_graph_binary(graph_filename.c_str());
 40 |     } else {
 41 |         g = load_graph(graph_filename);
 42 |         printf("storing binary form of graph!\n");
 43 |         store_graph_binary(graph_filename.append(".bin").c_str(), g);
 44 |         delete g;
 45 |         exit(1);
 46 |     }
 47 |     return g;
 48 | }
 49 | 
 50 | double compute_score(std::string graph_name, bool correct, double ref_time, double stu_time) {
 51 |     double max_score = 1.0;
 52 |     double max_perf_score = 0.8 * max_score; 
 53 |     double correctness_score = 0.2 * max_score;
 54 |     correctness_score = (correct) ? correctness_score : 0;
 55 | 
 56 |     double ratio = (ref_time/stu_time);
 57 | 
 58 |     double slope = max_perf_score/(0.7 - 0.3);
 59 |     double offset = 0.3 * slope;
 60 | 
 61 |     double perf_score = (correct) ? ratio*slope - offset : 0;
 62 | 
 63 |     if (perf_score < 0) perf_score = 0;
 64 |     if (perf_score > max_perf_score) perf_score = max_perf_score;
 65 | 
 66 |     return (correctness_score + perf_score);
 67 | }
 68 | 
 69 | void run_on_graph(int idx, graph* g, int num_threads, int num_runs, 
 70 |     std::string graph_name, std::vector<std::vector<double>> &scores) {
 71 | 
 72 |     solution ref;
 73 |     ref.distances = new int[g->num_nodes];
 74 |     solution stu;
 75 |     stu.distances = new int[g->num_nodes];
 76 | 
 77 |     double start, time;
 78 | 
 79 |     omp_set_num_threads(num_threads);
 80 | 
 81 |     std::cout << "\nTop down bfs" << std::endl; 
 82 |     double ref_top_down_time = std::numeric_limits<int>::max();
 83 |     for (int r = 0; r < num_runs; r++) {
 84 |         start = CycleTimer::currentSeconds();
 85 |         reference_bfs_top_down(g, &ref);
 86 |         time = CycleTimer::currentSeconds() - start;
 87 |         ref_top_down_time = std::min(ref_top_down_time, time);
 88 |     }
 89 | 
 90 |     double stu_top_down_time = std::numeric_limits<int>::max();
 91 |     for (int r = 0; r < num_runs; r++) {
 92 |         start = CycleTimer::currentSeconds();
 93 |         bfs_top_down(g, &stu);
 94 |         //reference_bfs_top_down(g, &stu);
 95 |         time = CycleTimer::currentSeconds() - start;
 96 |         stu_top_down_time = std::min(stu_top_down_time, time);
 97 |     }
 98 | 
 99 |     bool correct = compareArrays(g, ref.distances, stu.distances);
100 |     
101 |     if (!correct) {
102 |         std::cout << "Top down bfs incorrect" << std::endl; 
103 |         std::cout << "ref_time: " <<  ref_top_down_time << "s" << std::endl;
104 |     } else {
105 |         std::cout << "ref_time: " <<  ref_top_down_time << "s" << std::endl;
106 |         std::cout << "stu_time: " <<  stu_top_down_time << "s" << std::endl;
107 |     }
108 | 
109 |     scores[idx][top_down] = compute_score(graph_name, correct, ref_top_down_time, stu_top_down_time);
110 | 
111 |     for (int i = 0; i < g->num_nodes; i++) {
112 |         ref.distances[i] = -1;
113 |         stu.distances[i] = -1;
114 |     }
115 | 
116 |     double ref_bottom_up_time = std::numeric_limits<int>::max();
117 |     for (int r = 0; r < num_runs; r++) {
118 |         start = CycleTimer::currentSeconds();
119 |         reference_bfs_bottom_up(g, &ref);
120 |         time = CycleTimer::currentSeconds() - start;
121 |         ref_bottom_up_time = std::min(ref_bottom_up_time, time);
122 |     }
123 | 
124 |     std::cout << "\nBottom up bfs" << std::endl; 
125 |     double stu_bottom_up_time = std::numeric_limits<int>::max();
126 |     for (int r = 0; r < num_runs; r++) {
127 |         start = CycleTimer::currentSeconds();
128 |         bfs_bottom_up(g, &stu);
129 |         //reference_bfs_bottom_up(g, &stu);
130 |         time = CycleTimer::currentSeconds() - start;
131 |         stu_bottom_up_time = std::min(stu_bottom_up_time, time);
132 |     }
133 | 
134 |     correct = compareArrays(g, ref.distances, stu.distances);
135 | 
136 |     if (!correct) {
137 |         std::cout << "Bottom up bfs incorrect" << std::endl; 
138 |         std::cout << "ref_time: " <<  ref_bottom_up_time << "s" << std::endl;
139 |     } else {
140 |         std::cout << "ref_time: " <<  ref_bottom_up_time << "s" << std::endl;
141 |         std::cout << "stu_time: " <<  stu_bottom_up_time << "s" << std::endl;
142 |     }
143 | 
144 |     scores[idx][bott_up] = compute_score(graph_name, correct, ref_bottom_up_time, stu_bottom_up_time);
145 | 
146 | 
147 |     for (int i = 0; i < g->num_nodes; i++) {
148 |         ref.distances[i] = -1;
149 |         stu.distances[i] = -1;
150 |     }
151 | 
152 |     std::cout << "\nHybrid bfs" << std::endl; 
153 | 
154 |     double ref_hybrid_time = std::numeric_limits<int>::max();
155 |     for (int r = 0; r < num_runs; r++) {
156 |         start = CycleTimer::currentSeconds();
157 |         reference_bfs_hybrid(g, &ref);
158 |         time = CycleTimer::currentSeconds() - start;
159 |         ref_hybrid_time = std::min(ref_hybrid_time, time);
160 |     }
161 | 
162 |     double stu_hybrid_time = std::numeric_limits<int>::max();
163 |     for (int r = 0; r < num_runs; r++) {
164 |         start = CycleTimer::currentSeconds();
165 |         bfs_hybrid(g, &stu);
166 |         //reference_bfs_hybrid(g, &stu);
167 |         time = CycleTimer::currentSeconds() - start;
168 |         stu_hybrid_time = std::min(stu_hybrid_time, time);
169 |     }
170 |     
171 |     correct = compareArrays(g, ref.distances, stu.distances);
172 |     
173 |     if (!correct) {
174 |         std::cout << "Hybrid bfs incorrect" << std::endl; 
175 |         std::cout << "ref_time: " <<  ref_hybrid_time << "s" << std::endl;
176 |     } else {
177 |         std::cout << "ref_time: " <<  ref_hybrid_time << "s" << std::endl;
178 |         std::cout << "stu_time: " <<  stu_hybrid_time << "s" << std::endl;
179 |     }
180 | 
181 |     scores[idx][hybrid] = compute_score(graph_name, correct, ref_hybrid_time, stu_hybrid_time);
182 | 
183 |     delete(stu.distances);
184 |     delete(ref.distances);
185 | }
186 | 
187 | void print_separator_line() {
188 |     for (int i = 0; i < 74; i++) {
189 |         std::cout<<"-";
190 |     }
191 |     std::cout<<std::endl;
192 | }
193 | 
194 | void print_scores(std::vector<std::string> grade_graphs, std::vector<std::vector<double>> scores) {
195 |     
196 |     std::cout.precision(2);
197 |     std::cout.setf(std::ios::fixed, std:: ios::floatfield);
198 |     std::cout<<std::endl<<std::endl;
199 | 
200 |     print_separator_line();
201 | 
202 |     std::cout<<"SCORES :"; 
203 |     for (int i = 0; i < (28 - 8); i++) {
204 |         std::cout<<" ";
205 |     }
206 | 
207 |     std::cout<<"|   Top-Down    |   Bott-Up    |    Hybrid    |"<<std::endl;
208 | 
209 |     print_separator_line();
210 | 
211 |     double total_score = 0.0;
212 | 
213 |     int max_scores_small[] = {2,3,3};
214 |     int max_scores_large[] = {7,8,8};
215 |     for (int g = 0; g < grade_graphs.size(); g++) {
216 |         auto& graph_name = grade_graphs[g];
217 | 
218 |         bool small = false;
219 |         if ((graph_name == "grid1000x1000.graph") || (graph_name == "soc-livejournal1_68m.graph") ||
220 |                 (graph_name == "com-orkut_117m.graph")) small = true;
221 | 
222 |         int *max_scores = small ? max_scores_small : max_scores_large;
223 | 
224 |         total_score +=  scores[g][top_down] * max_scores[top_down] 
225 |                       + scores[g][bott_up] * max_scores[bott_up]
226 |                       + scores[g][hybrid] * max_scores[hybrid];
227 | 
228 |         std::cout<<graph_name;
229 |         for (int i = 0; i < (28 - graph_name.length()); i++) {
230 |             std::cout<<" ";
231 |         }
232 | 
233 |         std::cout<<"| ";
234 |         std::cout<<"     "<<scores[g][top_down] * max_scores[top_down]<<" / "<<max_scores[top_down]<<" |"; 
235 |         std::cout<<"     "<<scores[g][bott_up] * max_scores[bott_up]<<" / "<<max_scores[bott_up]<<" |";
236 |         std::cout<<"     "<<scores[g][hybrid] * max_scores[hybrid]<<" / "<<max_scores[hybrid]<<" |"<<std::endl;;
237 | 
238 |         print_separator_line();
239 |     }
240 | 
241 |     std::cout<<"TOTAL";
242 |     for (int i = 0; i < (59 - 5); i++) {
243 |             std::cout<<" ";
244 |     }
245 |     std::cout<<"|  ";
246 |     std::cout<<total_score<<" / "<<"70"<<" |"<<std::endl;
247 | 
248 |     print_separator_line();
249 | 
250 | }
251 | 
252 | int main(int argc, char** argv) {
253 |     int num_threads = omp_get_max_threads();
254 |     int num_runs = 1;
255 |     std::string graph_name, graph_dir;
256 |     bool grade = false;
257 | 
258 |     int opt;
259 |     while ((opt = getopt(argc,argv,"n:r:gh")) != EOF) {
260 |         switch(opt) {
261 |             case 'n':
262 |                 num_threads = atoi(optarg);
263 |                 break;
264 |             case 'r':
265 |                 num_runs = atoi(optarg);
266 |                 break;
267 |             case 'h':
268 |             case '?':
269 |             default:
270 |                 usage(argv[0]);
271 |                 exit(1);
272 |         }
273 |     }
274 | 
275 |     if (argc <= optind) {
276 |         usage(argv[0]);
277 |         exit(1);
278 |     }
279 | 
280 |     graph_dir = argv[optind];
281 |   
282 |     printf("Max system threads = %d\n", omp_get_max_threads());
283 |     printf("Running with %d threads\n", num_threads);
284 | 
285 |     std::vector<std::string> grade_graphs = { "grid1000x1000.graph",
286 |                                               "soc-livejournal1_68m.graph",
287 |                                               "com-orkut_117m.graph",
288 |                                               "random_500m.graph",
289 |                                               "rmat_200m.graph"};                                       
290 | 
291 |     std::vector<std::vector<double>> scores(grade_graphs.size());
292 |     // top_down 0
293 |     // bott_up 1
294 |     // hybrid 2
295 |     for (int i = 0; i < grade_graphs.size(); i++) {
296 |         scores[i] = std::vector<double>(3);
297 |     }
298 | 
299 |     int idx = 0;
300 |     for (auto& graph_name: grade_graphs) {
301 |         graph* g = load_graph(graph_dir + '/' + graph_name);
302 |         std::cout << "\nGraph: " << graph_name << std::endl;
303 |         run_on_graph(idx, g, num_threads, num_runs, graph_name, scores);    
304 |         delete g;
305 |         idx++;
306 |     }
307 | 
308 |     print_scores(grade_graphs, scores);
309 | 
310 |     return 0;
311 | }
312 | 


--------------------------------------------------------------------------------
/bfs/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <omp.h>
  4 | #include <string>
  5 | #include <getopt.h>
  6 | 
  7 | #include <iostream>
  8 | #include <sstream>
  9 | #include <vector>
 10 | 
 11 | #include "common/CycleTimer.h"
 12 | #include "common/graph.h"
 13 | #include "bfs.h"
 14 | 
 15 | #define USE_BINARY_GRAPH 1
 16 | 
 17 | void reference_bfs_bottom_up(Graph graph, solution* sol);
 18 | void reference_bfs_top_down(Graph graph, solution* sol);
 19 | void reference_bfs_hybrid(Graph graph, solution* sol);
 20 | 
 21 | int main(int argc, char** argv) {
 22 | 
 23 |     int  num_threads = -1;
 24 |     std::string graph_filename;
 25 | 
 26 |     if (argc < 2)
 27 |     {
 28 |         std::cerr << "Usage: <path/to/graph/file> [num_threads]\n";
 29 |         std::cerr << "  To run results for all thread counts: <path/to/graph/file>\n";
 30 |         std::cerr << "  Run with a certain number of threads (no correctness run): <path/to/graph/file> <num_threads>\n";
 31 |         exit(1);
 32 |     }
 33 | 
 34 |     int thread_count = -1;
 35 |     if (argc == 3)
 36 |     {
 37 |         thread_count = atoi(argv[2]);
 38 |     }
 39 | 
 40 |     graph_filename = argv[1];
 41 | 
 42 |     Graph g;
 43 | 
 44 |     printf("----------------------------------------------------------\n");
 45 |     printf("Max system threads = %d\n", omp_get_max_threads());
 46 |     if (thread_count > 0)
 47 |     {
 48 |         thread_count = std::min(thread_count, omp_get_max_threads());
 49 |         printf("Running with %d threads\n", thread_count);
 50 |     }
 51 |     printf("----------------------------------------------------------\n");
 52 | 
 53 |     printf("Loading graph...\n");
 54 |     if (USE_BINARY_GRAPH) {
 55 |       g = load_graph_binary(graph_filename.c_str());
 56 |     } else {
 57 |         g = load_graph(argv[1]);
 58 |         printf("storing binary form of graph!\n");
 59 |         store_graph_binary(graph_filename.append(".bin").c_str(), g);
 60 |         delete g;
 61 |         exit(1);
 62 |     }
 63 |     printf("\n");
 64 |     printf("Graph stats:\n");
 65 |     printf("  Edges: %d\n", g->num_edges);
 66 |     printf("  Nodes: %d\n", g->num_nodes);
 67 | 
 68 |     //If we want to run on all threads
 69 |     if (thread_count <= -1)
 70 |     {
 71 |         //Static assignment to get consistent usage across trials
 72 |         int max_threads = omp_get_max_threads();
 73 | 
 74 |         //static num_threadss
 75 |         std::vector<int> num_threads;
 76 | 
 77 |         //dynamic num_threads
 78 |         for (int i = 1; i < max_threads; i *= 2) {
 79 |           num_threads.push_back(i);
 80 |         }
 81 |         num_threads.push_back(max_threads);
 82 |         int n_usage = num_threads.size();
 83 | 
 84 |         solution sol1;
 85 |         sol1.distances = (int*)malloc(sizeof(int) * g->num_nodes);
 86 |         solution sol2;
 87 |         sol2.distances = (int*)malloc(sizeof(int) * g->num_nodes);
 88 |         solution sol3;
 89 |         sol3.distances = (int*)malloc(sizeof(int) * g->num_nodes);
 90 | 
 91 |         //Solution sphere
 92 |         solution sol4;
 93 |         sol4.distances = (int*)malloc(sizeof(int) * g->num_nodes);
 94 | 
 95 |         double hybrid_base, top_base, bottom_base;
 96 |         double hybrid_time, top_time, bottom_time;
 97 | 
 98 |         double ref_hybrid_base, ref_top_base, ref_bottom_base;
 99 |         double ref_hybrid_time, ref_top_time, ref_bottom_time;
100 | 
101 |         double start;
102 |         std::stringstream timing;
103 |         std::stringstream ref_timing;
104 |         std::stringstream relative_timing;
105 | 
106 |         bool tds_check = true, bus_check = true, hs_check = true;
107 | 
108 |         timing          << "Threads  Top Down          Bottom Up         Hybrid\n";
109 |         ref_timing      << "Threads  Top Down          Bottom Up         Hybrid\n";
110 |         relative_timing << "Threads       Top Down          Bottom Up             Hybrid\n";
111 | 
112 |         //Loop through assignment values;
113 |         for (int i = 0; i < n_usage; i++)
114 |         {
115 |             printf("----------------------------------------------------------\n");
116 |             std::cout << "Running with " << num_threads[i] << " threads" << std::endl;
117 |             //Set thread count
118 |             omp_set_num_threads(num_threads[i]);
119 | 
120 |             //Run implementations
121 |             start = CycleTimer::currentSeconds();
122 |             bfs_top_down(g, &sol1);
123 |             top_time = CycleTimer::currentSeconds() - start;
124 | 
125 |             //Run reference implementation
126 |             start = CycleTimer::currentSeconds();
127 |             reference_bfs_top_down(g, &sol4);
128 |             ref_top_time = CycleTimer::currentSeconds() - start;
129 | 
130 |             std::cout << "Testing Correctness of Top Down\n";
131 |             for (int j=0; j<g->num_nodes; j++) {
132 |                 if (sol1.distances[j] != sol4.distances[j]) {
133 |                     fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol1.distances[j], sol4.distances[j]);
134 |                     tds_check = false;
135 |                     break;
136 |                 }
137 |             }
138 | 
139 |             //Run implementations
140 |             start = CycleTimer::currentSeconds();
141 |             bfs_bottom_up(g, &sol2);
142 |             bottom_time = CycleTimer::currentSeconds() - start;
143 | 
144 |             //Run reference implementation
145 |             start = CycleTimer::currentSeconds();
146 |             reference_bfs_bottom_up(g, &sol4);
147 |             ref_bottom_time = CycleTimer::currentSeconds() - start;
148 | 
149 |             std::cout << "Testing Correctness of Bottom Up\n";
150 |             for (int j=0; j<g->num_nodes; j++) {
151 |                 if (sol2.distances[j] != sol4.distances[j]) {
152 |                     fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol2.distances[j], sol4.distances[j]);
153 |                     bus_check = false;
154 |                     break;
155 |                 }
156 |             }
157 | 
158 |             start = CycleTimer::currentSeconds();
159 |             bfs_hybrid(g, &sol3);
160 |             hybrid_time = CycleTimer::currentSeconds() - start;
161 | 
162 |             //Run reference implementation
163 |             start = CycleTimer::currentSeconds();
164 |             reference_bfs_hybrid(g, &sol4);
165 |             ref_hybrid_time = CycleTimer::currentSeconds() - start;
166 | 
167 |             std::cout << "Testing Correctness of Hybrid\n";
168 |             for (int j=0; j<g->num_nodes; j++) {
169 |                 if (sol3.distances[j] != sol4.distances[j]) {
170 |                     fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol3.distances[j], sol4.distances[j]);
171 |                     hs_check = false;
172 |                     break;
173 |                 }
174 |             }
175 | 
176 |             if (i == 0)
177 |             {
178 |                 hybrid_base = hybrid_time;
179 |                 ref_hybrid_base = ref_hybrid_time;
180 |                 top_base = top_time;
181 |                 bottom_base = bottom_time;
182 |                 ref_top_base = ref_top_time;
183 |                 ref_bottom_base = ref_bottom_time;
184 | 
185 |             }
186 | 
187 |             char buf[1024];
188 |             char ref_buf[1024];
189 |             char relative_buf[1024];
190 | 
191 |             sprintf(buf, "%4d:    %.2f (%.2fx)      %.2f (%.2fx)      %.2f (%.2fx)\n",
192 |                     num_threads[i], top_time, top_base/top_time, bottom_time,
193 |                     bottom_base/bottom_time, hybrid_time, hybrid_base/hybrid_time);
194 |             sprintf(ref_buf, "%4d:    %.2f (%.2fx)      %.2f (%.2fx)      %.2f (%.2fx)\n",
195 |                     num_threads[i], ref_top_time, ref_top_base/ref_top_time, ref_bottom_time,
196 |                     ref_bottom_base/ref_bottom_time, ref_hybrid_time, ref_hybrid_base/ref_hybrid_time);
197 |             sprintf(relative_buf, "%4d:   %14.2f     %14.2f     %14.2f\n",
198 |                     num_threads[i], ref_top_time/top_time, ref_bottom_time/bottom_time, ref_hybrid_time/hybrid_time);
199 | 
200 |             timing << buf;
201 |             ref_timing << ref_buf;
202 |             relative_timing << relative_buf;
203 |         }
204 | 
205 |         printf("----------------------------------------------------------\n");
206 |         std::cout << "Your Code: Timing Summary" << std::endl;
207 |         std::cout << timing.str();
208 |         printf("----------------------------------------------------------\n");
209 |         std::cout << "Reference: Timing Summary" << std::endl;
210 |         std::cout << ref_timing.str();
211 |         printf("----------------------------------------------------------\n");
212 |         std::cout << "Correctness: " << std::endl;
213 |         if (!tds_check)
214 |             std::cout << "Top Down Search is not Correct" << std::endl;
215 |         if (!bus_check)
216 |             std::cout << "Bottom Up Search is not Correct" << std::endl;
217 |         if (!hs_check)
218 |             std::cout << "Hybrid Search is not Correct" << std::endl;
219 |         std::cout << std::endl << "Speedup vs. Reference: " << std::endl <<  relative_timing.str();
220 |     }
221 |     //Run the code with only one thread count and only report speedup
222 |     else
223 |     {
224 |         bool tds_check = true, bus_check = true, hs_check = true;
225 |         solution sol1;
226 |         sol1.distances = (int*)malloc(sizeof(int) * g->num_nodes);
227 |         solution sol2;
228 |         sol2.distances = (int*)malloc(sizeof(int) * g->num_nodes);
229 |         solution sol3;
230 |         sol3.distances = (int*)malloc(sizeof(int) * g->num_nodes);
231 | 
232 |         //Solution sphere
233 |         solution sol4;
234 |         sol4.distances = (int*)malloc(sizeof(int) * g->num_nodes);
235 | 
236 |         double hybrid_time, top_time, bottom_time;
237 |         double ref_hybrid_time, ref_top_time, ref_bottom_time;
238 | 
239 |         double start;
240 |         std::stringstream timing;
241 |         std::stringstream ref_timing;
242 | 
243 | 
244 |         timing << "Threads   Top Down    Bottom Up       Hybrid\n";
245 |         ref_timing << "Threads   Top Down    Bottom Up       Hybrid\n";
246 | 
247 |         //Loop through assignment values;
248 |         std::cout << "Running with " << thread_count << " threads" << std::endl;
249 |         //Set thread count
250 |         omp_set_num_threads(thread_count);
251 | 
252 |         //Run implementations
253 |         start = CycleTimer::currentSeconds();
254 |         bfs_top_down(g, &sol1);
255 |         top_time = CycleTimer::currentSeconds() - start;
256 | 
257 |         //Run reference implementation
258 |         start = CycleTimer::currentSeconds();
259 |         reference_bfs_top_down(g, &sol4);
260 |         ref_top_time = CycleTimer::currentSeconds() - start;
261 | 
262 |         std::cout << "Testing Correctness of Top Down\n";
263 |         for (int j=0; j<g->num_nodes; j++) {
264 |             if (sol1.distances[j] != sol4.distances[j]) {
265 |                 fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol1.distances[j], sol4.distances[j]);
266 |                 tds_check = false;
267 |                 break;
268 |             }
269 |         }
270 | 
271 | 
272 |         //Run implementations
273 |         start = CycleTimer::currentSeconds();
274 |         bfs_bottom_up(g, &sol2);
275 |         bottom_time = CycleTimer::currentSeconds() - start;
276 | 
277 |         //Run reference implementation
278 |         start = CycleTimer::currentSeconds();
279 |         reference_bfs_bottom_up(g, &sol4);
280 |         ref_bottom_time = CycleTimer::currentSeconds() - start;
281 | 
282 |         std::cout << "Testing Correctness of Bottom Up\n";
283 |         for (int j=0; j<g->num_nodes; j++) {
284 |             if (sol2.distances[j] != sol4.distances[j]) {
285 |                 fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol2.distances[j], sol4.distances[j]);
286 |                 bus_check = false;
287 |                 break;
288 |             }
289 |         }
290 | 
291 | 
292 |         start = CycleTimer::currentSeconds();
293 |         bfs_hybrid(g, &sol3);
294 |         hybrid_time = CycleTimer::currentSeconds() - start;
295 | 
296 |         //Run reference implementation
297 |         start = CycleTimer::currentSeconds();
298 |         reference_bfs_hybrid(g, &sol4);
299 |         ref_hybrid_time = CycleTimer::currentSeconds() - start;
300 | 
301 |         std::cout << "Testing Correctness of Hybrid\n";
302 |         for (int j=0; j<g->num_nodes; j++) {
303 |             if (sol3.distances[j] != sol4.distances[j]) {
304 |                 fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol3.distances[j], sol4.distances[j]);
305 |                 hs_check = false;
306 |                 break;
307 |             }
308 |         }
309 | 
310 | 
311 |         char buf[1024];
312 |         char ref_buf[1024];
313 | 
314 |         sprintf(buf, "%4d:     %8.2f     %8.2f     %8.2f\n",
315 |                 thread_count, top_time, bottom_time, hybrid_time);
316 |         sprintf(ref_buf, "%4d:     %8.2f     %8.2f     %8.2f\n",
317 |                 thread_count, ref_top_time, ref_bottom_time, ref_hybrid_time);
318 | 
319 |         timing << buf;
320 |         ref_timing << ref_buf;
321 |         if (!tds_check)
322 |             std::cout << "Top Down Search is not Correct" << std::endl;
323 |         if (!bus_check)
324 |             std::cout << "Bottom Up Search is not Correct" << std::endl;
325 |         if (!hs_check)
326 |             std::cout << "Hybrid Search is not Correct" << std::endl;
327 |         printf("----------------------------------------------------------\n");
328 |         std::cout << "Your Code: Timing Summary" << std::endl;
329 |         std::cout << timing.str();
330 |         printf("----------------------------------------------------------\n");
331 |         std::cout << "Reference: Timing Summary" << std::endl;
332 |         std::cout << ref_timing.str();
333 |         printf("----------------------------------------------------------\n");
334 |     }
335 | 
336 |     delete g;
337 | 
338 |     return 0;
339 | }
340 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Assignment 5: Big Graph Processing in OpenMP #
  2 | 
  3 | **Due: Fri Dec 6th, 11:59PM PT (No late submission allowed)**
  4 | 
  5 | **84 points total** 
  6 | 
  7 | If you complete this assignment, you will receive up to 10 bonus points on one of the regular programming assignments (PA1-PA4). However, this extra credit can only boost an assignment to 100 points.
  8 | 
  9 | ## Overview ##
 10 | 
 11 | In this assignment, you will implement  [breadth-first search](https://en.wikipedia.org/wiki/Breadth-first_search) (BFS). A good implementation of this assignment will be able to run this algorithm on graphs containing hundreds of millions of edges on a multi-core machine in only seconds.
 12 | 
 13 | ## Environment Setup ##
 14 | 
 15 | Final grading of this assignment will be performed on the Myth machines.
 16 | 
 17 | The assignment starter code is available on [Github](https://github.com/stanford-cs149/biggraphs-ec). Please clone the Assignment 5 starter code using:
 18 | 
 19 | ```
 20 | git clone https://github.com/stanford-cs149/biggraphs-ec.git
 21 | ```
 22 | 
 23 | #### Background: Learning OpenMP ####
 24 | 
 25 | In this assignment we'd like you to use [OpenMP](http://openmp.org/) for multi-core parallelization. OpenMP is an API and set of C-language extensions that provides compiler support for parallelism. You can also use OpenMP to tell the compiler to parallelize iterations of `for` loops, and to manage mutual exclusion. It is well documented online, but here is a brief example of parallelizing a `for` loop, with mutual exclusion.
 26 | ```c
 27 | /* The iterations of this for loop may be parallelized by the compiler */      
 28 | #pragma omp parallel for                                                      
 29 | for (int i = 0; i < 100; i++) {  
 30 | 
 31 |     /* different iterations of this part of the loop body may be
 32 |         run in parallel on different cores */
 33 | 
 34 |     #pragma omp critical                                                          
 35 |     {
 36 |     /* This block will be executed by at most one thread at a time. */
 37 |     printf("Thread %d got iteration %lu\n", omp_get_thread_num(), i);           
 38 |     }                                                                             
 39 | }
 40 | ``` 
 41 | Please see OpenMP documentation for the syntax for how to tell OpenMP to use different forms of static or dynamic scheduling. (For example, `omp parallel for schedule(dynamic 100)` distributes iterations to threads using dynamic scheduling with a chunk size of 100 iterations).  You can think of the implementation as a dynamic work queue where threads in the thread pool pull off 100 iterations at once, like what [we talked about in these lecture slides](https://gfxcourses.stanford.edu/cs149/fall24/lecture/perfopt1/slide_11).
 42 |     
 43 | Here is an example for an atomic counter update in OpenMP.
 44 | ```c
 45 | int my_counter = 0;
 46 | #pragma omp parallel for                                                        
 47 | for (int i = 0; i < 100; i++) {                                                      
 48 |     if ( ... some condition ...) {
 49 |         #pragma omp atomic
 50 |         my_counter++;
 51 |     }
 52 | }
 53 | ```
 54 | We expect you to be able to read OpenMP documentation on your own (Google will be very helpful), but here are some useful links to get you started:
 55 | 
 56 |  * The OpenMP 3.0 specification: <http://www.openmp.org/mp-documents/spec30.pdf>.
 57 |  * An OpenMP cheat sheet <http://openmp.org/mp-documents/OpenMP3.1-CCard.pdf>.
 58 |  * OpenMP has support for reductions on shared variables, and for declaring thread-local copies of variables.
 59 |  * This is a nice guide for the `omp parallel_for` directives: <http://www.inf.ufsc.br/~bosco.sobral/ensino/ine5645/OpenMP_Dynamic_Scheduling.pdf>
 60 | 
 61 | #### Background: Representing Graphs ####
 62 | 
 63 | The starter code operates on directed graphs, whose implementation you can find in `graph.h` and `graph_internal.h`.  We recommend you begin by understanding the graph representation in these files. A graph is represented by an array of edges (both `outgoing_edges` and `incoming_edges`), where each edge is represented by an integer describing the id of the destination vertex.  Edges are stored in the graph sorted by their source vertex, so the source vertex is implicit in the representation.  This makes for a compact representation of the graph, and also allows it to be stored contiguously in memory.  For example, to iterate over the outgoing edges for all nodes in the graph, you'd use the following code which makes use of convenient helper functions defined in `graph.h` (and implemented in `graph_internal.h`):
 64 | ```c
 65 | for (int i=0; i<num_nodes(g); i++) {
 66 |     // Vertex is typedef'ed to an int. Vertex* points into g.outgoing_edges[]
 67 |     const Vertex* start = outgoing_begin(g, i);
 68 |     const Vertex* end = outgoing_end(g, i);
 69 |     for (const Vertex* v=start; v!=end; v++)
 70 |     printf("Edge %u %u\n", i, *v);
 71 | }
 72 | ```
 73 | 
 74 | #### Dataset
 75 | 
 76 | In this project, you will use a large graph dataset to test the performance. The dataset can be found depending on your setup:
 77 | 
 78 | - If you are working on a myth machine, the path to graphs directory is `/afs/ir.stanford.edu/class/cs149/data/asst3_graphs/`
 79 | - If you are working on your local machine, the dataset can be downloaded from <http://cs149.stanford.edu/cs149asstdata/all_graphs.tgz>. You can download the dataset using `wget http://cs149.stanford.edu/cs149asstdata/all_graphs.tgz`, and then untar it with `tar -xzvf all_graphs.tgz`. Be careful, this is a 3 GB download.
 80 | 
 81 | Some interesting real-world graphs include:
 82 | 
 83 |  * com-orkut_117m.graph 
 84 |  * oc-pokec_30m.graph
 85 |  * soc-livejournal1_68m.graph
 86 |  
 87 | Your useful synthetic, but large graphs include:
 88 | 
 89 |  * random_500m.graph
 90 |  * rmat_200m.graph
 91 | 
 92 | There are also some very small graphs for testing.  If you look in the `/tools` directory of the starter code, you'll notice a useful program called `graphTools.cpp` that can be used to make your own graphs as well.
 93 | 
 94 | ## Part 1: Parallel "Top Down" Breadth-First Search (20 points) ##
 95 | 
 96 | Breadth-first search (BFS) is a common algorithm that might have seen in a prior algorithms class (See [here](https://www.hackerearth.com/practice/algorithms/graphs/breadth-first-search/tutorial/) and [here](https://www.youtube.com/watch?v=oDqjPvD54Ss) for helpful references.)
 97 | Please familiarize yourself with the function `bfs_top_down()` in `bfs/bfs.cpp`, which contains a sequential implementation of BFS. The code uses BFS to compute the distance to vertex 0 for all vertices in the graph. You may wish to familiarize yourself with the graph structure defined in `common/graph.h` as well as the simple array data structure `vertex_set` (`bfs/bfs.h`), which is an array of vertices used to represent the current frontier of BFS.
 98 | 
 99 | You can run bfs using:
100 | 
101 |     ./bfs <PATH_TO_GRAPHS_DIRECTORY>/rmat_200m.graph
102 | 
103 | where `<PATH_TO_GRAPHS_DIRECTORY>` is the path to the directory containing the graph files (see the "Dataset" section above).
104 | 
105 | When you run `bfs`, you'll see execution time and the frontier size printed for each step in the algorithm.  Correctness will pass for the top-down version (since we've given you a correct sequential implementation), but it will be slow.  (Note that `bfs` will report failures for a "bottom up" and "hybrid" versions of the algorithm, which you will implement later in this assignment.)
106 | 
107 | In this part of the assignment your job is to parallelize top-down BFS. You'll need to focus on identifying parallelism, as well as inserting the appropriate synchronization to ensure correctness. We wish to remind you that you __should not__ expect to achieve near-perfect speedups on this problem (we'll leave it to you to think about why!). 
108 | 
109 | __Tips/Hints:__
110 | 
111 | * Always start by considering what work can be done in parallel.
112 | * Some parts of the computation may need to be synchronized, for example, by wrapping the appropriate code within a critical region using `#pragma omp critical` or `#pragma omp atomic`.  __However, in this problem you should think about how to make use of the simple atomic operation called `compare and swap`.__  You can read about [GCC's implementation of compare and swap](https://gcc.gnu.org/onlinedocs/gcc-9.4.0/gcc/_005f_005fsync-Builtins.html), which is exposed to C code as the function `__sync_bool_compare_and_swap`.  If you can figure out how to use compare-and-swap for this problem, you will achieve much higher performance than using a critical region. 
113 | * Updating a shared counter can be done efficiently using `#pragma omp atomic` before a line like `counter++;`. 
114 | * Are there conditions where it is possible to avoid using `compare_and_swap`?  In other words, when you *know* in advance that the comparison will fail?
115 | * There is a preprocessor macro `VERBOSE` to make it easy to disable useful print per-step timings in your solution (see the top of `bfs/bfs.cpp`).  In general, these printfs occur infrequently enough (only once per BFS step) that they do not notably impact performance, but if you want to disable the printfs during timing, you can use this `#define` as a convenience.
116 | 
117 | ## Part 2: "Bottom Up" BFS (25 points) ##
118 | 
119 | Think about what behavior might cause a performance problem in the BFS implementation from Part 1.2.  An alternative implementation of a breadth-first search step may be more efficient in these situations.  Instead of iterating over all vertices in the frontier and marking all vertices adjacent to the frontier, it is possible to implement BFS by having *each vertex check whether it should be added to the frontier!*  Basic pseudocode for the algorithm is as follows:
120 | 
121 | ```
122 |     for each vertex v in graph:
123 |         if v has not been visited AND 
124 |            v shares an incoming edge with a vertex u on the frontier:
125 |               add vertex v to frontier;
126 | ```
127 | 
128 | This algorithm is sometimes referred to as a "bottom up" implementation of BFS, since each vertex looks "up the BFS tree" to find its ancestor. (As opposed to being found by its ancestor in a "top down" fashion, as was done in Part 1.2.)
129 | 
130 | Please implement a bottom-up BFS to compute the shortest path to all the vertices in the graph from the root (see `bfs_bottom_up()` in `bfs/bfs.cpp`). Start by implementing a simple sequential version. Then parallelize your implementation.
131 | 
132 | __Tips/Hints:__
133 | 
134 | * It may be useful to think about how you represent the set of unvisited nodes.  Do the top-down and bottom-up versions of the code lend themselves to different implementations?  
135 | * How do the synchronization requirements of the bottom-up BFS change?
136 | 
137 | ## Part 3: Hybrid BFS (25 points) ##
138 | 
139 | Notice that in some steps of the BFS, the "bottom up" BFS is significantly faster than the top-down version.  In other steps, the top-down version is significantly faster.  This suggests a major performance improvement in your implementation, if __you could dynamically choose between your "top down" and "bottom up" formulations based on the size of the frontier or other properties of the graph!__  If you want a solution competitive with the reference one, your implementation will likely have to implement this dynamic optimization.  Please provide your solution in `bfs_hybrid()` in `bfs/bfs.cpp`.
140 | 
141 | __Tips/Hints:__
142 | 
143 | * If you used different representations of the frontier in Parts 1.2 and 1.3, you may have to convert between these representations in the hybrid solution.  How might you efficiently convert between them? Is there an overhead in doing so?
144 | 
145 | You can run our grading script via: `./bfs_grader <path to graphs directory>`, which will report correctness and a performance points score for a number of graphs.
146 | 
147 | ## Grading and Handin ##
148 | 
149 | Along with your code, we would like you to hand in a clear but concise high-level description of how your implementation works as well as a brief description of how you arrived at your solutions. Specifically address approaches you tried along the way, and how you went about determining how to optimize your code (For example, what measurements did you perform to guide your optimization efforts?).
150 | 
151 | Aspects of your work that you should mention in the write-up include:
152 | 
153 | 1. Include both partners' names at the top of your write-up.
154 | 2. Run bfs_grader on a Myth machine and insert a copy of the score table in your solutions. **We will use Myth machines to grade your code.**
155 | 3. Describe the process of optimizing your code:
156 |  * In Part 1 (Top Down) and 2 (Bottom Up), where is the synchronization in each of your solutions? Do you do anything to limit the overhead of synchronization?
157 |  * In Part 3 (Hybrid), did you decide to switch between the top-down and bottom-up BFS implementations dynamically? How did you decide which implementation to use?
158 |  * Why do you think your code (and the staff reference) is unable to achieve perfect speedup? (Is it workload imbalance? communication/synchronization? data movement?)
159 | 
160 | ## Points Distribution ##
161 | 
162 | The 84 points on this assignment are allotted as follows:
163 | 
164 | * 70 points:  BFS performance
165 | * 14 points:  Write-up
166 | 
167 | If you earn `x` points on this assignment, we will boost your grade on any prior programming assignment by `(x/84) * 10` points, rounded to the nearest tenth.
168 | 
169 | ## Hand-in Instructions ##
170 | 
171 | Please submit your work using Gradescope.
172 | 
173 | 1. __Please submit your writeup as a PDF in the Gradescope assignment Programming Assignment 5 (Writeup).__
174 | 2. __To submit your code, run `sh create_submission.h` to generate a `tar.gz` file and submit it to Programming Assignment 5 (Code).__ We only look that your `bfs/bfs.cpp` and `bfs/bfs.h` file, so do not change any other files. Before submitting the source files, make sure that all code is compilable and runnable! We should be able to simply make, then execute your programs in the `/bfs`  directories without manual intervention. 
175 | 
176 | Our grading scripts will rerun the checker code allowing us to verify your score matches what you submitted in your writeup.  We may also run your code on other datasets to further examine its correctness.
177 | 


--------------------------------------------------------------------------------