├── bfs ├── ref_bfs.o ├── Makefile ├── bfs.h ├── bfs.cpp ├── grade.cpp └── main.cpp ├── handout ├── AMI.png ├── storage.png ├── ip_address.png ├── instance_nav.png ├── instance_type.png └── new_key_pair.png ├── create_submission.sh ├── tools ├── Makefile ├── plaintext.graph └── graphTools.cpp ├── common ├── contracts.h ├── graph.h ├── graph_internal.h ├── grade.h ├── CycleTimer.h └── graph.cpp ├── cloud_readme.md └── README.md /bfs/ref_bfs.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/bfs/ref_bfs.o -------------------------------------------------------------------------------- /handout/AMI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/AMI.png -------------------------------------------------------------------------------- /handout/storage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/storage.png -------------------------------------------------------------------------------- /handout/ip_address.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/ip_address.png -------------------------------------------------------------------------------- /handout/instance_nav.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/instance_nav.png -------------------------------------------------------------------------------- /handout/instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/instance_type.png -------------------------------------------------------------------------------- /handout/new_key_pair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-cs149/biggraphs-ec/HEAD/handout/new_key_pair.png -------------------------------------------------------------------------------- /create_submission.sh: -------------------------------------------------------------------------------- 1 | # archive bfs/bfs.cpp and bfs/bfs.h into bfs.tar.gz 2 | 3 | tar -czvf asst5.tar.gz bfs/bfs.cpp bfs/bfs.h -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | BINARYNAME=graphTools 2 | 3 | main: 4 | g++ -std=c++11 -g -O3 -o ${BINARYNAME} graphTools.cpp ../common/graph.cpp 5 | clean: 6 | rm -rf pr *~ *.*~ ${BINARYNAME} 7 | -------------------------------------------------------------------------------- /tools/plaintext.graph: -------------------------------------------------------------------------------- 1 | AdjacencyGraph 2 | # num vertices 3 | 5 4 | # num edges 5 | 8 6 | # edge starts 7 | 0 4 6 7 8 8 | # all the outgoing edges (target vertex) 9 | 1 2 3 4 10 | 2 3 11 | 0 12 | 0 13 | -------------------------------------------------------------------------------- /bfs/Makefile: -------------------------------------------------------------------------------- 1 | all: default grade 2 | 3 | default: main.cpp bfs.cpp 4 | g++ -I../ -std=c++11 -fopenmp -O3 -g -o bfs main.cpp bfs.cpp ../common/graph.cpp ref_bfs.o 5 | grade: grade.cpp bfs.cpp 6 | g++ -I../ -std=c++11 -fopenmp -O3 -g -o bfs_grader grade.cpp bfs.cpp ../common/graph.cpp ref_bfs.o 7 | clean: 8 | rm -rf bfs_grader bfs *~ *.*~ 9 | -------------------------------------------------------------------------------- /bfs/bfs.h: -------------------------------------------------------------------------------- 1 | #ifndef __BFS_H__ 2 | #define __BFS_H__ 3 | 4 | //#define DEBUG 5 | 6 | #include "common/graph.h" 7 | 8 | struct solution 9 | { 10 | int *distances; 11 | }; 12 | 13 | struct vertex_set { 14 | // # of vertices in the set 15 | int count; 16 | // max size of buffer vertices 17 | int max_vertices; 18 | // array of vertex ids in set 19 | int *vertices; 20 | }; 21 | 22 | 23 | void bfs_top_down(Graph graph, solution* sol); 24 | void bfs_bottom_up(Graph graph, solution* sol); 25 | void bfs_hybrid(Graph graph, solution* sol); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /common/contracts.h: -------------------------------------------------------------------------------- 1 | /* Debugging with contracts; simulating cc0 -d 2 | * Enable with gcc -DDEBUG ... 3 | * 4 | * 15-122 Principles of Imperative Computation 5 | * Frank Pfenning 6 | */ 7 | 8 | #include 9 | 10 | /* Unlike typical header files, "contracts.h" may be 11 | * included multiple times, with and without DEBUG defined. 12 | * For this to succeed we first undefine the macros in 13 | * question in order to avoid a redefinition warning. 14 | */ 15 | 16 | #undef ASSERT 17 | #undef REQUIRES 18 | #undef ENSURES 19 | 20 | #ifdef DEBUG 21 | 22 | #define ASSERT(COND) assert(COND) 23 | #define REQUIRES(COND) assert(COND) 24 | #define ENSURES(COND) assert(COND) 25 | 26 | #else 27 | 28 | #define ASSERT(COND) ((void)0) 29 | #define REQUIRES(COND) ((void)0) 30 | #define ENSURES(COND) ((void)0) 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /common/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef __GRAPH_H__ 2 | #define __GRAPH_H__ 3 | 4 | using Vertex = int; 5 | 6 | struct graph 7 | { 8 | // Number of edges in the graph 9 | int num_edges; 10 | // Number of vertices in the graph 11 | int num_nodes; 12 | 13 | // The node reached by vertex i's first outgoing edge is given by 14 | // outgoing_edges[outgoing_starts[i]]. To iterate over all 15 | // outgoing edges, please see the top-down bfs implementation. 16 | int* outgoing_starts; 17 | Vertex* outgoing_edges; 18 | 19 | int* incoming_starts; 20 | Vertex* incoming_edges; 21 | }; 22 | 23 | using Graph = graph*; 24 | 25 | /* Getters */ 26 | static inline int num_nodes(const Graph); 27 | static inline int num_edges(const Graph); 28 | 29 | static inline const Vertex* outgoing_begin(const Graph, Vertex); 30 | static inline const Vertex* outgoing_end(const Graph, Vertex); 31 | static inline int outgoing_size(const Graph, Vertex); 32 | 33 | static inline const Vertex* incoming_begin(const Graph, Vertex); 34 | static inline const Vertex* incoming_end(const Graph, Vertex); 35 | static inline int incoming_size(const Graph, Vertex); 36 | 37 | 38 | /* IO */ 39 | Graph load_graph(const char* filename); 40 | Graph load_graph_binary(const char* filename); 41 | void store_graph_binary(const char* filename, Graph); 42 | 43 | void print_graph(const graph*); 44 | 45 | 46 | /* Deallocation */ 47 | void free_graph(Graph); 48 | 49 | 50 | /* Included here to enable inlining. Don't look. */ 51 | #include "graph_internal.h" 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /common/graph_internal.h: -------------------------------------------------------------------------------- 1 | #ifndef __GRAPH_INTERNAL_H__ 2 | #define __GRAPH_INTERNAL_H__ 3 | 4 | #include 5 | #include "contracts.h" 6 | 7 | static inline int num_nodes(const Graph graph) 8 | { 9 | REQUIRES(graph != NULL); 10 | return graph->num_nodes; 11 | } 12 | 13 | static inline int num_edges(const Graph graph) 14 | { 15 | REQUIRES(graph != NULL); 16 | return graph->num_edges; 17 | } 18 | 19 | static inline const Vertex* outgoing_begin(const Graph g, Vertex v) 20 | { 21 | REQUIRES(g != NULL); 22 | REQUIRES(0 <= v && v < num_nodes(g)); 23 | return g->outgoing_edges + g->outgoing_starts[v]; 24 | } 25 | 26 | static inline const Vertex* outgoing_end(const Graph g, Vertex v) 27 | { 28 | REQUIRES(g != NULL); 29 | REQUIRES(0 <= v && v < num_nodes(g)); 30 | int offset = (v == g->num_nodes - 1) ? g->num_edges : g->outgoing_starts[v + 1]; 31 | return g->outgoing_edges + offset; 32 | } 33 | 34 | static inline int outgoing_size(const Graph g, Vertex v) 35 | { 36 | REQUIRES(g != NULL); 37 | REQUIRES(0 <= v && v < num_nodes(g)); 38 | if (v == g->num_nodes - 1) { 39 | return g->num_edges - g->outgoing_starts[v]; 40 | } else { 41 | return g->outgoing_starts[v + 1] - g->outgoing_starts[v]; 42 | } 43 | } 44 | 45 | static inline const Vertex* incoming_begin(const Graph g, Vertex v) 46 | { 47 | REQUIRES(g != NULL); 48 | REQUIRES(0 <= v && v < num_nodes(g)); 49 | return g->incoming_edges + g->incoming_starts[v]; 50 | } 51 | 52 | static inline const Vertex* incoming_end(const Graph g, Vertex v) 53 | { 54 | REQUIRES(g != NULL); 55 | REQUIRES(0 <= v && v < num_nodes(g)); 56 | int offset = (v == g->num_nodes - 1) ? g->num_edges : g->incoming_starts[v + 1]; 57 | return g->incoming_edges + offset; 58 | } 59 | 60 | static inline int incoming_size(const Graph g, Vertex v) 61 | { 62 | REQUIRES(g != NULL); 63 | REQUIRES(0 <= v && v < num_nodes(g)); 64 | if (v == g->num_nodes - 1) { 65 | return g->num_edges - g->incoming_starts[v]; 66 | } else { 67 | return g->incoming_starts[v + 1] - g->incoming_starts[v]; 68 | } 69 | } 70 | 71 | #endif // __GRAPH_INTERNAL_H__ 72 | -------------------------------------------------------------------------------- /cloud_readme.md: -------------------------------------------------------------------------------- 1 | # AWS Setup Instructions # 2 | 3 | For performance testing, you will need to run your code on a VM instance on Amazon Web Services (AWS). Here are the steps for how to get setup for running on AWS. Note that if you are using the shared cluster, you do not need to do any setup. 4 | 5 | NOTE: __Please don't forget to SHUT DOWN your instances when you're done for the day to avoid burning through credits overnight!__ 6 | 7 | ### Creating a VM with 32 vCPU ### 8 | 9 | 1. Navigate to your "Instances" page in AWS by selecting the correct link in the left hand menu. 10 | ![Instance Page](handout/instance_nav.png?raw=true) 11 | 12 | 2. Click on the button that says `Launch Instances`. Choose the `Ubuntu Server 20.04 LTS (HVM), SSD Volume Type` AMI: 13 | ![AMI Selection](handout/AMI.png?raw=true) 14 | 15 | 3. Choose the `m5.8xlarge` Instance Type 16 | ![instance](handout/instance_type.png?raw=true) 17 | 18 | 4. Next, under `Key pair (login)`, choose a key pair. You can use the same key pair from assignment 3. Alternatively, you can create a new one. To create a new one, click `Create new key pair` and give it whatever name you'd like. This will download a keyfile to your computer called `.pem` which you will use to login to the VM instance you are about to create. 19 | ![Key Pair](handout/new_key_pair.png?raw=true) 20 | 21 | 5. Scroll down to `Configure storage` and change the size of the `Root volume` to 100 GiB to accomodate the packages we will need to install to make the instance functional for the assignment. 22 | ![Storage](handout/storage.png?raw=true) 23 | 24 | 6. Once you've done all previous steps, scroll to the bottom and click `Launch instance` 25 | 26 | __Note: `m5.8xlarge` instances cost $1.792 / hour, so leaving one running for a whole day will consume $43 worth of your AWS coupon.__ 27 | 28 | 7. Now that you've created your VM, you should be able to __SSH__ into it. You need the public IP address to SSH into it, which you can find on the instance page by clicking the `View All Instances` button on the current page and then the instance ID for your created instance (note, it may take a moment for the instance to startup and be assigned an IP address): 29 | ![IP Address](handout/ip_address.png?raw=true) 30 | Once you have the IP address, you can login to the instance by running this command: 31 | ~~~~ 32 | ssh -i path/to/key_name.pem ubuntu@ 33 | ~~~~ 34 | 35 | 8. Once you SSH into your VM instance, you'll want to install whatever software you need to make the machine a useful development environment for you. For example we recommend: 36 | ~~~~ 37 | sudo apt update 38 | sudo apt install emacs25 39 | sudo apt install make 40 | sudo apt install g++ 41 | ~~~~ 42 | 43 | If you're confused about any of the steps, having problems with setting up your account or have any additional questions, reach us out on Ed! 44 | 45 | __Again, please don't forget to SHUT DOWN your instances when you're done with your work for the day!__ 46 | -------------------------------------------------------------------------------- /bfs/bfs.cpp: -------------------------------------------------------------------------------- 1 | #include "bfs.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "../common/CycleTimer.h" 10 | #include "../common/graph.h" 11 | 12 | #define ROOT_NODE_ID 0 13 | #define NOT_VISITED_MARKER -1 14 | 15 | void vertex_set_clear(vertex_set* list) { 16 | list->count = 0; 17 | } 18 | 19 | void vertex_set_init(vertex_set* list, int count) { 20 | list->max_vertices = count; 21 | list->vertices = (int*)malloc(sizeof(int) * list->max_vertices); 22 | vertex_set_clear(list); 23 | } 24 | 25 | // Take one step of "top-down" BFS. For each vertex on the frontier, 26 | // follow all outgoing edges, and add all neighboring vertices to the 27 | // new_frontier. 28 | void top_down_step( 29 | Graph g, 30 | vertex_set* frontier, 31 | vertex_set* new_frontier, 32 | int* distances) 33 | { 34 | 35 | for (int i=0; icount; i++) { 36 | 37 | int node = frontier->vertices[i]; 38 | 39 | int start_edge = g->outgoing_starts[node]; 40 | int end_edge = (node == g->num_nodes - 1) 41 | ? g->num_edges 42 | : g->outgoing_starts[node + 1]; 43 | 44 | // attempt to add all neighbors to the new frontier 45 | for (int neighbor=start_edge; neighboroutgoing_edges[neighbor]; 47 | 48 | if (distances[outgoing] == NOT_VISITED_MARKER) { 49 | distances[outgoing] = distances[node] + 1; 50 | int index = new_frontier->count++; 51 | new_frontier->vertices[index] = outgoing; 52 | } 53 | } 54 | } 55 | } 56 | 57 | // Implements top-down BFS. 58 | // 59 | // Result of execution is that, for each node in the graph, the 60 | // distance to the root is stored in sol.distances. 61 | void bfs_top_down(Graph graph, solution* sol) { 62 | 63 | vertex_set list1; 64 | vertex_set list2; 65 | vertex_set_init(&list1, graph->num_nodes); 66 | vertex_set_init(&list2, graph->num_nodes); 67 | 68 | vertex_set* frontier = &list1; 69 | vertex_set* new_frontier = &list2; 70 | 71 | // initialize all nodes to NOT_VISITED 72 | for (int i=0; inum_nodes; i++) 73 | sol->distances[i] = NOT_VISITED_MARKER; 74 | 75 | // setup frontier with the root node 76 | frontier->vertices[frontier->count++] = ROOT_NODE_ID; 77 | sol->distances[ROOT_NODE_ID] = 0; 78 | 79 | while (frontier->count != 0) { 80 | 81 | #ifdef VERBOSE 82 | double start_time = CycleTimer::currentSeconds(); 83 | #endif 84 | 85 | vertex_set_clear(new_frontier); 86 | 87 | top_down_step(graph, frontier, new_frontier, sol->distances); 88 | 89 | #ifdef VERBOSE 90 | double end_time = CycleTimer::currentSeconds(); 91 | printf("frontier=%-10d %.4f sec\n", frontier->count, end_time - start_time); 92 | #endif 93 | 94 | // swap pointers 95 | vertex_set* tmp = frontier; 96 | frontier = new_frontier; 97 | new_frontier = tmp; 98 | } 99 | } 100 | 101 | void bfs_bottom_up(Graph graph, solution* sol) 102 | { 103 | // CS149 students: 104 | // 105 | // You will need to implement the "bottom up" BFS here as 106 | // described in the handout. 107 | // 108 | // As a result of your code's execution, sol.distances should be 109 | // correctly populated for all nodes in the graph. 110 | // 111 | // As was done in the top-down case, you may wish to organize your 112 | // code by creating subroutine bottom_up_step() that is called in 113 | // each step of the BFS process. 114 | } 115 | 116 | void bfs_hybrid(Graph graph, solution* sol) 117 | { 118 | // CS149 students: 119 | // 120 | // You will need to implement the "hybrid" BFS here as 121 | // described in the handout. 122 | } 123 | -------------------------------------------------------------------------------- /common/grade.h: -------------------------------------------------------------------------------- 1 | #ifndef __GRADE_H__ 2 | #define __GRADE_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include "graph.h" 18 | #include "graph_internal.h" 19 | #include "contracts.h" 20 | 21 | // Epsilon for approximate float comparisons 22 | #define EPSILON 0.00000000001 23 | 24 | // Output column size 25 | #define COL_SIZE 15 26 | 27 | // Point value for apps that are not run. 28 | #define POINTS_NA -1 29 | 30 | // Point value for apps that yeilded incorrect results. 31 | #define POINTS_INCORRECT -2 32 | 33 | /* 34 | * Printing functions 35 | */ 36 | 37 | static void sep(std::ostream& out, char separator = '-', int length = 78) 38 | { 39 | for (int i = 0; i < length; i++) 40 | out << separator; 41 | out << std::endl; 42 | } 43 | 44 | static void printTimingApp(std::ostream& timing, const char* appName) 45 | { 46 | std::cout << std::endl; 47 | std::cout << "Timing results for " << appName << ":" << std::endl; 48 | sep(std::cout, '=', 75); 49 | 50 | timing << std::endl; 51 | timing << "Timing results for " << appName << ":" << std::endl; 52 | sep(timing, '=', 75); 53 | } 54 | 55 | /* 56 | * Correctness checkers 57 | */ 58 | 59 | template 60 | bool compareArrays(Graph graph, T* ref, T* stu) 61 | { 62 | for (int i = 0; i < graph->num_nodes; i++) { 63 | if (ref[i] != stu[i]) { 64 | std::cerr << "*** Results disagree at " << i << " expected " 65 | << ref[i] << " found " << stu[i] << std::endl; 66 | return false; 67 | } 68 | } 69 | return true; 70 | } 71 | 72 | template 73 | bool compareApprox(Graph graph, T* ref, T* stu) 74 | { 75 | for (int i = 0; i < graph->num_nodes; i++) { 76 | if (fabs(ref[i] - stu[i]) > EPSILON) { 77 | std::cerr << "*** Results disagree at " << i << " expected " 78 | << ref[i] << " found " << stu[i] << std::endl; 79 | return false; 80 | } 81 | } 82 | return true; 83 | } 84 | 85 | template 86 | bool compareArraysAndDisplay(Graph graph, T* ref, T*stu) 87 | { 88 | printf("\n----------------------------------\n"); 89 | printf("Visualization of student results"); 90 | printf("\n----------------------------------\n\n"); 91 | 92 | int grid_dim = (int)sqrt(graph->num_nodes); 93 | for (int j=0; jnum_nodes); 104 | for (int j=0; j(graph, ref, stu); 112 | } 113 | 114 | template 115 | bool compareArraysAndRadiiEst(Graph graph, T* ref, T* stu) 116 | { 117 | bool isCorrect = true; 118 | for (int i = 0; i < graph->num_nodes; i++) { 119 | if (ref[i] != stu[i]) { 120 | std::cerr << "*** Results disagree at " << i << " expected " 121 | << ref[i] << " found " << stu[i] << std::endl; 122 | isCorrect = false; 123 | } 124 | } 125 | int stuMaxVal = -1; 126 | int refMaxVal = -1; 127 | #pragma omp parallel for schedule(dynamic, 512) reduction(max: stuMaxVal) 128 | for (int i = 0; i < graph->num_nodes; i++) { 129 | if (stu[i] > stuMaxVal) 130 | stuMaxVal = stu[i]; 131 | } 132 | #pragma omp parallel for schedule(dynamic, 512) reduction(max: refMaxVal) 133 | for (int i = 0; i < graph->num_nodes; i++) { 134 | if (ref[i] > refMaxVal) 135 | refMaxVal = ref[i]; 136 | } 137 | 138 | if (refMaxVal != stuMaxVal) { 139 | std::cerr << "*** Radius estimates differ. Expected: " << refMaxVal << " Got: " << stuMaxVal << std::endl; 140 | isCorrect = false; 141 | } 142 | return isCorrect; 143 | } 144 | 145 | #endif /* __GRADE_H__ */ 146 | -------------------------------------------------------------------------------- /common/CycleTimer.h: -------------------------------------------------------------------------------- 1 | #ifndef _SYRAH_CYCLE_TIMER_H_ 2 | #define _SYRAH_CYCLE_TIMER_H_ 3 | 4 | #if defined(__APPLE__) 5 | #if defined(__x86_64__) 6 | #include 7 | #else 8 | #include 9 | #include 10 | #endif // __x86_64__ or not 11 | 12 | #include // fprintf 13 | #include // exit 14 | 15 | #elif _WIN32 16 | # include 17 | # include 18 | #else 19 | # include 20 | # include 21 | # include 22 | # include 23 | #endif 24 | 25 | 26 | // This uses the cycle counter of the processor. Different 27 | // processors in the system will have different values for this. If 28 | // you process moves across processors, then the delta time you 29 | // measure will likely be incorrect. This is mostly for fine 30 | // grained measurements where the process is likely to be on the 31 | // same processor. For more global things you should use the 32 | // Time interface. 33 | 34 | // Also note that if you processors' speeds change (i.e. processors 35 | // scaling) or if you are in a heterogenous environment, you will 36 | // likely get spurious results. 37 | class CycleTimer { 38 | public: 39 | typedef unsigned long long SysClock; 40 | 41 | ////////// 42 | // Return the current CPU time, in terms of clock ticks. 43 | // Time zero is at some arbitrary point in the past. 44 | static SysClock currentTicks() { 45 | #if defined(__APPLE__) && !defined(__x86_64__) 46 | return mach_absolute_time(); 47 | #elif defined(_WIN32) 48 | LARGE_INTEGER qwTime; 49 | QueryPerformanceCounter(&qwTime); 50 | return qwTime.QuadPart; 51 | #elif defined(__x86_64__) 52 | unsigned int a, d; 53 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); 54 | return static_cast(a) | 55 | (static_cast(d) << 32); 56 | #elif defined(__ARM_NEON__) && 0 // mrc requires superuser. 57 | unsigned int val; 58 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); 59 | return val; 60 | #else 61 | timespec spec; 62 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec); 63 | return CycleTimer::SysClock(static_cast(spec.tv_sec) * 1e9 + static_cast(spec.tv_nsec)); 64 | #endif 65 | } 66 | 67 | ////////// 68 | // Return the current CPU time, in terms of seconds. 69 | // This is slower than currentTicks(). Time zero is at 70 | // some arbitrary point in the past. 71 | static double currentSeconds() { 72 | return currentTicks() * secondsPerTick(); 73 | } 74 | 75 | ////////// 76 | // Return the conversion from seconds to ticks. 77 | static double ticksPerSecond() { 78 | return 1.0/secondsPerTick(); 79 | } 80 | 81 | static const char* tickUnits() { 82 | #if defined(__APPLE__) && !defined(__x86_64__) 83 | return "ns"; 84 | #elif defined(__WIN32__) || defined(__x86_64__) 85 | return "cycles"; 86 | #else 87 | return "ns"; // clock_gettime 88 | #endif 89 | } 90 | 91 | ////////// 92 | // Return the conversion from ticks to seconds. 93 | static double secondsPerTick() { 94 | static bool initialized = false; 95 | static double secondsPerTick_val; 96 | if (initialized) return secondsPerTick_val; 97 | #if defined(__APPLE__) 98 | #ifdef __x86_64__ 99 | int args[] = {CTL_HW, HW_CPU_FREQ}; 100 | unsigned int Hz; 101 | size_t len = sizeof(Hz); 102 | if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) { 103 | fprintf(stderr, "Failed to initialize secondsPerTick_val!\n"); 104 | exit(-1); 105 | } 106 | secondsPerTick_val = 1.0 / (double) Hz; 107 | #else 108 | mach_timebase_info_data_t time_info; 109 | mach_timebase_info(&time_info); 110 | 111 | // Scales to nanoseconds without 1e-9f 112 | secondsPerTick_val = (1e-9*static_cast(time_info.numer))/ 113 | static_cast(time_info.denom); 114 | #endif // x86_64 or not 115 | #elif defined(_WIN32) 116 | LARGE_INTEGER qwTicksPerSec; 117 | QueryPerformanceFrequency(&qwTicksPerSec); 118 | secondsPerTick_val = 1.0/static_cast(qwTicksPerSec.QuadPart); 119 | #else 120 | FILE *fp = fopen("/proc/cpuinfo","r"); 121 | char input[1024]; 122 | if (!fp) { 123 | fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo."); 124 | exit(-1); 125 | } 126 | // In case we don't find it, e.g. on the N900 127 | secondsPerTick_val = 1e-9; 128 | while (!feof(fp) && fgets(input, 1024, fp)) { 129 | // NOTE(boulos): Because reading cpuinfo depends on dynamic 130 | // frequency scaling it's better to read the @ sign first 131 | float GHz, MHz; 132 | if (strstr(input, "model name")) { 133 | char* at_sign = strstr(input, "@"); 134 | if (at_sign) { 135 | char* after_at = at_sign + 1; 136 | char* GHz_str = strstr(after_at, "GHz"); 137 | char* MHz_str = strstr(after_at, "MHz"); 138 | if (GHz_str) { 139 | *GHz_str = '\0'; 140 | if (1 == sscanf(after_at, "%f", &GHz)) { 141 | //printf("GHz = %f\n", GHz); 142 | secondsPerTick_val = 1e-9f / GHz; 143 | break; 144 | } 145 | } else if (MHz_str) { 146 | *MHz_str = '\0'; 147 | if (1 == sscanf(after_at, "%f", &MHz)) { 148 | //printf("MHz = %f\n", MHz); 149 | secondsPerTick_val = 1e-6f / GHz; 150 | break; 151 | } 152 | } 153 | } 154 | } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) { 155 | //printf("MHz = %f\n", MHz); 156 | secondsPerTick_val = 1e-6f / MHz; 157 | break; 158 | } 159 | } 160 | fclose(fp); 161 | #endif 162 | 163 | initialized = true; 164 | return secondsPerTick_val; 165 | } 166 | 167 | ////////// 168 | // Return the conversion from ticks to milliseconds. 169 | static double msPerTick() { 170 | return secondsPerTick() * 1000.0; 171 | } 172 | 173 | private: 174 | CycleTimer(); 175 | }; 176 | 177 | #endif // #ifndef _SYRAH_CYCLE_TIMER_H_ 178 | -------------------------------------------------------------------------------- /common/graph.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "graph.h" 8 | #include "graph_internal.h" 9 | 10 | #define GRAPH_HEADER_TOKEN ((int) 0xDEADBEEF) 11 | 12 | 13 | void free_graph(Graph graph) 14 | { 15 | free(graph->outgoing_starts); 16 | free(graph->outgoing_edges); 17 | 18 | free(graph->incoming_starts); 19 | free(graph->incoming_edges); 20 | free(graph); 21 | } 22 | 23 | 24 | void build_start(graph* graph, int* scratch) 25 | { 26 | int num_nodes = graph->num_nodes; 27 | graph->outgoing_starts = (int*)malloc(sizeof(int) * num_nodes); 28 | for(int i = 0; i < num_nodes; i++) 29 | { 30 | graph->outgoing_starts[i] = scratch[i]; 31 | } 32 | } 33 | 34 | void build_edges(graph* graph, int* scratch) 35 | { 36 | int num_nodes = graph->num_nodes; 37 | graph->outgoing_edges = (int*)malloc(sizeof(int) * graph->num_edges); 38 | for(int i = 0; i < graph->num_edges; i++) 39 | { 40 | graph->outgoing_edges[i] = scratch[num_nodes + i]; 41 | } 42 | } 43 | 44 | // Given an outgoing edge adjacency list representation for a directed 45 | // graph, build an incoming adjacency list representation 46 | void build_incoming_edges(graph* graph) { 47 | 48 | //printf("Beginning build_incoming... (%d nodes)\n", graph->num_nodes); 49 | 50 | int num_nodes = graph->num_nodes; 51 | int* node_counts = (int*)malloc(sizeof(int) * num_nodes); 52 | int* node_scatter = (int*)malloc(sizeof(int) * num_nodes); 53 | 54 | graph->incoming_starts = (int*)malloc(sizeof(int) * num_nodes); 55 | graph->incoming_edges = (int*)malloc(sizeof(int) * graph->num_edges); 56 | 57 | for (int i=0; ioutgoing_starts[i]; 64 | int end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1]; 65 | for (int j=start_edge; joutgoing_edges[j]; 67 | node_counts[target_node]++; 68 | total_edges++; 69 | } 70 | } 71 | //printf("Total edges: %d\n", total_edges); 72 | //printf("Computed incoming edge counts.\n"); 73 | 74 | // build the starts array 75 | graph->incoming_starts[0] = 0; 76 | for (int i=1; iincoming_starts[i] = graph->incoming_starts[i-1] + node_counts[i-1]; 78 | //printf("%d: %d ", i, graph->incoming_starts[i]); 79 | } 80 | //printf("\n"); 81 | //printf("Last edge=%d\n", graph->incoming_starts[num_nodes-1] + node_counts[num_nodes-1]); 82 | 83 | //printf("Computed per-node incoming starts.\n"); 84 | 85 | // now perform the scatter 86 | for (int i=0; ioutgoing_starts[i]; 88 | int end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1]; 89 | for (int j=start_edge; joutgoing_edges[j]; 91 | graph->incoming_edges[graph->incoming_starts[target_node] + node_scatter[target_node]] = i; 92 | node_scatter[target_node]++; 93 | } 94 | } 95 | 96 | /* 97 | // verify 98 | printf("Verifying graph...\n"); 99 | 100 | for (int i=0; ioutgoing_starts[i]; 102 | int end_node = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1]; 103 | for (int j=outgoing_starts; joutgoing_edges[j]; 109 | int j_start_edge = graph->incoming_starts[target_node]; 110 | int j_end_edge = (target_node == graph->num_nodes-1) ? graph->num_edges : graph->incoming_starts[target_node+1]; 111 | for (int k=j_start_edge; kincoming_edges[k] == i) { 113 | verified = true; 114 | break; 115 | } 116 | } 117 | 118 | if (!verified) { 119 | fprintf(stderr,"Error: %d,%d did not verify\n", i, target_node); 120 | } 121 | } 122 | } 123 | 124 | printf("Done verifying\n"); 125 | */ 126 | 127 | free(node_counts); 128 | free(node_scatter); 129 | } 130 | 131 | void get_meta_data(std::ifstream& file, graph* graph) 132 | { 133 | // going back to the beginning of the file 134 | file.clear(); 135 | file.seekg(0, std::ios::beg); 136 | std::string buffer; 137 | std::getline(file, buffer); 138 | if ((buffer.compare(std::string("AdjacencyGraph")))) 139 | { 140 | std::cout << "Invalid input file" << buffer << std::endl; 141 | exit(1); 142 | } 143 | buffer.clear(); 144 | 145 | do { 146 | std::getline(file, buffer); 147 | } while (buffer.size() == 0 || buffer[0] == '#'); 148 | 149 | graph->num_nodes = atoi(buffer.c_str()); 150 | buffer.clear(); 151 | 152 | do { 153 | std::getline(file, buffer); 154 | } while (buffer.size() == 0 || buffer[0] == '#'); 155 | 156 | graph->num_edges = atoi(buffer.c_str()); 157 | 158 | } 159 | 160 | void read_graph_file(std::ifstream& file, int* scratch) 161 | { 162 | std::string buffer; 163 | int idx = 0; 164 | while(!file.eof()) 165 | { 166 | buffer.clear(); 167 | std::getline(file, buffer); 168 | 169 | if (buffer.size() > 0 && buffer[0] == '#') 170 | continue; 171 | 172 | std::stringstream parse(buffer); 173 | while (!parse.fail()) { 174 | int v; 175 | parse >> v; 176 | if (parse.fail()) 177 | { 178 | break; 179 | } 180 | scratch[idx] = v; 181 | idx++; 182 | } 183 | } 184 | } 185 | 186 | void print_graph(const graph* graph) 187 | { 188 | 189 | printf("Graph pretty print:\n"); 190 | printf("num_nodes=%d\n", graph->num_nodes); 191 | printf("num_edges=%d\n", graph->num_edges); 192 | 193 | for (int i=0; inum_nodes; i++) { 194 | 195 | int start_edge = graph->outgoing_starts[i]; 196 | int end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->outgoing_starts[i+1]; 197 | printf("node %02d: out=%d: ", i, end_edge - start_edge); 198 | for (int j=start_edge; joutgoing_edges[j]; 200 | printf("%d ", target); 201 | } 202 | printf("\n"); 203 | 204 | start_edge = graph->incoming_starts[i]; 205 | end_edge = (i == graph->num_nodes-1) ? graph->num_edges : graph->incoming_starts[i+1]; 206 | printf(" in=%d: ", end_edge - start_edge); 207 | for (int j=start_edge; jincoming_edges[j]; 209 | printf("%d ", target); 210 | } 211 | printf("\n"); 212 | } 213 | } 214 | 215 | Graph load_graph(const char* filename) 216 | { 217 | graph* graph = (struct graph*)(malloc(sizeof(struct graph))); 218 | 219 | // open the file 220 | std::ifstream graph_file; 221 | graph_file.open(filename); 222 | get_meta_data(graph_file, graph); 223 | 224 | int* scratch = (int*) malloc(sizeof(int) * (graph->num_nodes + graph->num_edges)); 225 | read_graph_file(graph_file, scratch); 226 | 227 | build_start(graph, scratch); 228 | build_edges(graph, scratch); 229 | free(scratch); 230 | 231 | build_incoming_edges(graph); 232 | 233 | //print_graph(graph); 234 | 235 | return graph; 236 | } 237 | 238 | Graph load_graph_binary(const char* filename) 239 | { 240 | graph* graph = (struct graph*)(malloc(sizeof(struct graph))); 241 | 242 | FILE* input = fopen(filename, "rb"); 243 | 244 | if (!input) { 245 | fprintf(stderr, "Could not open: %s\n", filename); 246 | exit(1); 247 | } 248 | 249 | int header[3]; 250 | 251 | if (fread(header, sizeof(int), 3, input) != 3) { 252 | fprintf(stderr, "Error reading header.\n"); 253 | exit(1); 254 | } 255 | 256 | if (header[0] != GRAPH_HEADER_TOKEN) { 257 | fprintf(stderr, "Invalid graph file header. File may be corrupt.\n"); 258 | exit(1); 259 | } 260 | 261 | graph->num_nodes = header[1]; 262 | graph->num_edges = header[2]; 263 | 264 | graph->outgoing_starts = (int*)malloc(sizeof(int) * graph->num_nodes); 265 | graph->outgoing_edges = (int*)malloc(sizeof(int) * graph->num_edges); 266 | 267 | if (fread(graph->outgoing_starts, sizeof(int), graph->num_nodes, input) != (size_t) graph->num_nodes) { 268 | fprintf(stderr, "Error reading nodes.\n"); 269 | exit(1); 270 | } 271 | 272 | if (fread(graph->outgoing_edges, sizeof(int), graph->num_edges, input) != (size_t) graph->num_edges) { 273 | fprintf(stderr, "Error reading edges.\n"); 274 | exit(1); 275 | } 276 | 277 | fclose(input); 278 | 279 | build_incoming_edges(graph); 280 | //print_graph(graph); 281 | return graph; 282 | } 283 | 284 | void store_graph_binary(const char* filename, Graph graph) { 285 | 286 | FILE* output = fopen(filename, "wb"); 287 | 288 | if (!output) { 289 | fprintf(stderr, "Could not open: %s\n", filename); 290 | exit(1); 291 | } 292 | 293 | int header[3]; 294 | header[0] = GRAPH_HEADER_TOKEN; 295 | header[1] = graph->num_nodes; 296 | header[2] = graph->num_edges; 297 | 298 | if (fwrite(header, sizeof(int), 3, output) != 3) { 299 | fprintf(stderr, "Error writing header.\n"); 300 | exit(1); 301 | } 302 | 303 | if (fwrite(graph->outgoing_starts, sizeof(int), graph->num_nodes, output) != (size_t) graph->num_nodes) { 304 | fprintf(stderr, "Error writing nodes.\n"); 305 | exit(1); 306 | } 307 | 308 | if (fwrite(graph->outgoing_edges, sizeof(int), graph->num_edges, output) != (size_t) graph->num_edges) { 309 | fprintf(stderr, "Error writing edges.\n"); 310 | exit(1); 311 | } 312 | 313 | fclose(output); 314 | } 315 | -------------------------------------------------------------------------------- /tools/graphTools.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #include "../common/graph.h" 11 | 12 | #define CMD_TEXT2BIN "text2bin" 13 | #define CMD_INFO "info" 14 | #define CMD_PRINT "print" 15 | #define CMD_NOOUTEDGES "noout" 16 | #define CMD_NOINEDGES "noin" 17 | #define CMD_EDGESTATS "edgestats" 18 | 19 | 20 | void print_help(const char* binary_name) { 21 | std::cerr << "Usage: " << binary_name << " cmd args\n"; 22 | std::cerr << "Use '" << binary_name << " cmd' to get command-specific help.\n"; 23 | std::cerr << "\n"; 24 | std::cerr << "Valid cmds are:\n\n" 25 | << CMD_TEXT2BIN << ": text file to binary file conversion\n" 26 | << CMD_INFO << ": print graph metadata\n" 27 | << CMD_PRINT << ": print graph topology (careful with big graphs)\n" 28 | << CMD_NOOUTEDGES << ": detect vertices with no outgoing edges\n" 29 | << CMD_NOINEDGES << ": detect vertices with no incoming edges\n" 30 | << CMD_EDGESTATS << ": print stats on graph edges: e.g., min/max edges per node, etc.\n"; 31 | } 32 | 33 | int main(int argc, char** argv) { 34 | 35 | if (argc < 2) { 36 | print_help(argv[0]); 37 | exit(1); 38 | } 39 | 40 | std::string cmd = std::string(argv[1]); 41 | 42 | if (!cmd.compare(CMD_TEXT2BIN)) { 43 | 44 | if (argc < 4) { 45 | std::cerr << "Usage: " << argv[0] << " " << cmd << " textfilename binfilename\n"; 46 | std::cerr << "Converts a graph from text file format to binary file format\n"; 47 | exit(1); 48 | } 49 | 50 | std::string inputFilename = std::string(argv[2]); 51 | std::string outputFilename = std::string(argv[3]); 52 | 53 | Graph g; 54 | std::cout << "Loading graph: " << inputFilename << "\n"; 55 | g = load_graph(inputFilename.c_str()); 56 | std::cout << "Done loading.\n"; 57 | store_graph_binary(outputFilename.c_str(), g); 58 | delete g; 59 | 60 | } else if (!cmd.compare(CMD_INFO)) { 61 | if (argc < 3) { 62 | std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n"; 63 | std::cerr << "Pretty-prints graph info (num vertices, num edges)\n"; 64 | exit(1); 65 | } 66 | 67 | std::string inputFilename = std::string(argv[2]); 68 | 69 | Graph g; 70 | std::cout << "Loading graph: " << inputFilename << "\n"; 71 | g = load_graph_binary(inputFilename.c_str()); 72 | std::cout << "Done loading.\n"; 73 | 74 | std::cout << "Num vertices: " << num_nodes(g) << "\n"; 75 | std::cout << "Num edges: " << num_edges(g) << "\n"; 76 | delete g; 77 | 78 | } else if (!cmd.compare(CMD_PRINT)) { 79 | 80 | if (argc < 3) { 81 | std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n"; 82 | std::cerr << "Pretty-prints graph, including edge information (be careful with large graphs)\n"; 83 | exit(1); 84 | } 85 | 86 | std::string inputFilename = std::string(argv[2]); 87 | 88 | Graph g; 89 | std::cout << "Loading graph: " << inputFilename << "\n"; 90 | g = load_graph_binary(inputFilename.c_str()); 91 | std::cout << "Done loading.\n"; 92 | print_graph(g); 93 | delete g; 94 | 95 | } else if (!cmd.compare(CMD_NOOUTEDGES)) { 96 | 97 | if (argc < 3) { 98 | std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n"; 99 | std::cerr << "Lists all vertices without outgoing edges.\n"; 100 | exit(1); 101 | } 102 | 103 | std::string inputFilename = std::string(argv[2]); 104 | 105 | Graph g; 106 | std::cout << "Loading graph: " << inputFilename << "\n"; 107 | g = load_graph_binary(inputFilename.c_str()); 108 | std::cout << "Done loading.\n"; 109 | 110 | std::vector zero_outgoing; 111 | 112 | for (int i=0; i(zero_outgoing.size())/num_nodes(g) << "\%).\n"; 126 | delete g; 127 | 128 | } else if (!cmd.compare(CMD_NOINEDGES)) { 129 | 130 | if (argc < 3) { 131 | std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n"; 132 | std::cerr << "Lists all edges without incoming edges.\n"; 133 | exit(1); 134 | } 135 | 136 | std::string inputFilename = std::string(argv[2]); 137 | 138 | Graph g; 139 | std::cout << "Loading graph: " << inputFilename << "\n"; 140 | g = load_graph_binary(inputFilename.c_str()); 141 | std::cout << "Done loading.\n"; 142 | 143 | std::vector zero_incoming; 144 | 145 | for (int i=0; i(zero_incoming.size())/num_nodes(g) << "\%).\n"; 159 | delete g; 160 | 161 | } else if (!cmd.compare(CMD_EDGESTATS)) { 162 | 163 | if (argc < 3) { 164 | std::cerr << "Usage: " << argv[0] << " " << cmd << " filename\n"; 165 | std::cerr << "Print basic stats about edges.\n"; 166 | exit(1); 167 | } 168 | 169 | std::string inputFilename = std::string(argv[2]); 170 | 171 | Graph g; 172 | std::cout << "Loading graph: " << inputFilename << "\n"; 173 | g = load_graph_binary(inputFilename.c_str()); 174 | std::cout << "Done loading. Now analyzing graph...\n"; 175 | 176 | unsigned int total_incoming = 0; 177 | unsigned int total_outgoing = 0; 178 | unsigned int min_outgoing = INT_MAX; 179 | unsigned int max_outgoing = 0; 180 | unsigned int min_incoming = INT_MAX; 181 | unsigned int max_incoming = 0; 182 | bool is_symmetric = true; 183 | 184 | for (int i=0; itarget), therefore target better have an 207 | // incoming edge from i. 208 | bool found_matching = false; 209 | const Vertex* sanity_begin = incoming_begin(g, target); 210 | const Vertex* sanity_end = incoming_end(g, target); 211 | for (const Vertex* v2=sanity_begin; v2!=sanity_end; v2++) { 212 | Vertex i2 = *v2; 213 | if (i == i2) { 214 | found_matching = true; 215 | break; 216 | } 217 | } 218 | if (!found_matching) { 219 | std::cerr << "GRAPH DID NOT PASS SANITY CHECK:\n" 220 | << "vertex " << i << " has outgoing edge to " << target << ",\n but " 221 | << "vertex " << target << " has no incoming edge from " << i << "\n"; 222 | 223 | // abort on a failed sanity check 224 | exit(1); 225 | } 226 | 227 | // symmetry test: vertex i has an outgoing edge to 228 | // target (i->target), so check to see if there's an 229 | // incoming edge from target as well (target->i). 230 | bool found_symmetric = false; 231 | const Vertex* in_start = incoming_begin(g, i); 232 | const Vertex* in_end = incoming_end(g, i); 233 | 234 | for (const Vertex* v2=in_start; v2!=in_end; v2++) { 235 | 236 | Vertex target2 = *v2; 237 | 238 | if (target == target2) { 239 | found_symmetric = true; 240 | break; 241 | } 242 | } 243 | if (!found_symmetric) 244 | is_symmetric = false; 245 | 246 | } 247 | 248 | } 249 | 250 | float avg_outgoing = (float)total_outgoing / num_nodes(g); 251 | float avg_incoming = (float)total_incoming / num_nodes(g); 252 | 253 | std::cout << "=========================================================\n"; 254 | std::cout << "Edge statistics for this graph:\n"; 255 | std::cout << "=========================================================\n"; 256 | std::cout << "The graph " << ((is_symmetric) ? "IS " : "IS NOT ") << "symmetric.\n"; 257 | std::cout << "Outgoing edges: total=" << total_outgoing 258 | << " avg=" << avg_outgoing 259 | << " min=" << min_outgoing 260 | << " max=" << max_outgoing << "\n"; 261 | 262 | std::cout << "Incoming edges: total=" << total_incoming 263 | << " avg=" << avg_incoming 264 | << " min=" << min_incoming 265 | << " max=" << max_incoming << "\n"; 266 | } 267 | 268 | else { 269 | print_help(argv[0]); 270 | } 271 | 272 | return 0; 273 | } 274 | -------------------------------------------------------------------------------- /bfs/grade.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include "../common/CycleTimer.h" 13 | #include "../common/graph.h" 14 | #include "../common/grade.h" 15 | #include "bfs.h" 16 | 17 | #define USE_BINARY_GRAPH 1 18 | 19 | #define top_down 0 20 | #define bott_up 1 21 | #define hybrid 2 22 | 23 | void reference_bfs_bottom_up(Graph graph, solution* sol); 24 | void reference_bfs_top_down(Graph graph, solution* sol); 25 | void reference_bfs_hybrid(Graph graph, solution* sol); 26 | 27 | void usage(const char* binary_name) { 28 | std::cout << "Usage: " << binary_name << " [options] graphdir" << std::endl; 29 | std::cout << std::endl; 30 | std::cout << "Options:" << std::endl; 31 | std::cout << " -n INT number of threads" << std::endl; 32 | std::cout << " -r INT number of runs" << std::endl; 33 | std::cout << " -h this commandline help message" << std::endl; 34 | } 35 | 36 | graph* load_graph(std::string graph_filename) { 37 | graph* g; 38 | if (USE_BINARY_GRAPH) { 39 | g = load_graph_binary(graph_filename.c_str()); 40 | } else { 41 | g = load_graph(graph_filename); 42 | printf("storing binary form of graph!\n"); 43 | store_graph_binary(graph_filename.append(".bin").c_str(), g); 44 | delete g; 45 | exit(1); 46 | } 47 | return g; 48 | } 49 | 50 | double compute_score(std::string graph_name, bool correct, double ref_time, double stu_time) { 51 | double max_score = 1.0; 52 | double max_perf_score = 0.8 * max_score; 53 | double correctness_score = 0.2 * max_score; 54 | correctness_score = (correct) ? correctness_score : 0; 55 | 56 | double ratio = (ref_time/stu_time); 57 | 58 | double slope = max_perf_score/(0.7 - 0.3); 59 | double offset = 0.3 * slope; 60 | 61 | double perf_score = (correct) ? ratio*slope - offset : 0; 62 | 63 | if (perf_score < 0) perf_score = 0; 64 | if (perf_score > max_perf_score) perf_score = max_perf_score; 65 | 66 | return (correctness_score + perf_score); 67 | } 68 | 69 | void run_on_graph(int idx, graph* g, int num_threads, int num_runs, 70 | std::string graph_name, std::vector> &scores) { 71 | 72 | solution ref; 73 | ref.distances = new int[g->num_nodes]; 74 | solution stu; 75 | stu.distances = new int[g->num_nodes]; 76 | 77 | double start, time; 78 | 79 | omp_set_num_threads(num_threads); 80 | 81 | std::cout << "\nTop down bfs" << std::endl; 82 | double ref_top_down_time = std::numeric_limits::max(); 83 | for (int r = 0; r < num_runs; r++) { 84 | start = CycleTimer::currentSeconds(); 85 | reference_bfs_top_down(g, &ref); 86 | time = CycleTimer::currentSeconds() - start; 87 | ref_top_down_time = std::min(ref_top_down_time, time); 88 | } 89 | 90 | double stu_top_down_time = std::numeric_limits::max(); 91 | for (int r = 0; r < num_runs; r++) { 92 | start = CycleTimer::currentSeconds(); 93 | bfs_top_down(g, &stu); 94 | //reference_bfs_top_down(g, &stu); 95 | time = CycleTimer::currentSeconds() - start; 96 | stu_top_down_time = std::min(stu_top_down_time, time); 97 | } 98 | 99 | bool correct = compareArrays(g, ref.distances, stu.distances); 100 | 101 | if (!correct) { 102 | std::cout << "Top down bfs incorrect" << std::endl; 103 | std::cout << "ref_time: " << ref_top_down_time << "s" << std::endl; 104 | } else { 105 | std::cout << "ref_time: " << ref_top_down_time << "s" << std::endl; 106 | std::cout << "stu_time: " << stu_top_down_time << "s" << std::endl; 107 | } 108 | 109 | scores[idx][top_down] = compute_score(graph_name, correct, ref_top_down_time, stu_top_down_time); 110 | 111 | for (int i = 0; i < g->num_nodes; i++) { 112 | ref.distances[i] = -1; 113 | stu.distances[i] = -1; 114 | } 115 | 116 | double ref_bottom_up_time = std::numeric_limits::max(); 117 | for (int r = 0; r < num_runs; r++) { 118 | start = CycleTimer::currentSeconds(); 119 | reference_bfs_bottom_up(g, &ref); 120 | time = CycleTimer::currentSeconds() - start; 121 | ref_bottom_up_time = std::min(ref_bottom_up_time, time); 122 | } 123 | 124 | std::cout << "\nBottom up bfs" << std::endl; 125 | double stu_bottom_up_time = std::numeric_limits::max(); 126 | for (int r = 0; r < num_runs; r++) { 127 | start = CycleTimer::currentSeconds(); 128 | bfs_bottom_up(g, &stu); 129 | //reference_bfs_bottom_up(g, &stu); 130 | time = CycleTimer::currentSeconds() - start; 131 | stu_bottom_up_time = std::min(stu_bottom_up_time, time); 132 | } 133 | 134 | correct = compareArrays(g, ref.distances, stu.distances); 135 | 136 | if (!correct) { 137 | std::cout << "Bottom up bfs incorrect" << std::endl; 138 | std::cout << "ref_time: " << ref_bottom_up_time << "s" << std::endl; 139 | } else { 140 | std::cout << "ref_time: " << ref_bottom_up_time << "s" << std::endl; 141 | std::cout << "stu_time: " << stu_bottom_up_time << "s" << std::endl; 142 | } 143 | 144 | scores[idx][bott_up] = compute_score(graph_name, correct, ref_bottom_up_time, stu_bottom_up_time); 145 | 146 | 147 | for (int i = 0; i < g->num_nodes; i++) { 148 | ref.distances[i] = -1; 149 | stu.distances[i] = -1; 150 | } 151 | 152 | std::cout << "\nHybrid bfs" << std::endl; 153 | 154 | double ref_hybrid_time = std::numeric_limits::max(); 155 | for (int r = 0; r < num_runs; r++) { 156 | start = CycleTimer::currentSeconds(); 157 | reference_bfs_hybrid(g, &ref); 158 | time = CycleTimer::currentSeconds() - start; 159 | ref_hybrid_time = std::min(ref_hybrid_time, time); 160 | } 161 | 162 | double stu_hybrid_time = std::numeric_limits::max(); 163 | for (int r = 0; r < num_runs; r++) { 164 | start = CycleTimer::currentSeconds(); 165 | bfs_hybrid(g, &stu); 166 | //reference_bfs_hybrid(g, &stu); 167 | time = CycleTimer::currentSeconds() - start; 168 | stu_hybrid_time = std::min(stu_hybrid_time, time); 169 | } 170 | 171 | correct = compareArrays(g, ref.distances, stu.distances); 172 | 173 | if (!correct) { 174 | std::cout << "Hybrid bfs incorrect" << std::endl; 175 | std::cout << "ref_time: " << ref_hybrid_time << "s" << std::endl; 176 | } else { 177 | std::cout << "ref_time: " << ref_hybrid_time << "s" << std::endl; 178 | std::cout << "stu_time: " << stu_hybrid_time << "s" << std::endl; 179 | } 180 | 181 | scores[idx][hybrid] = compute_score(graph_name, correct, ref_hybrid_time, stu_hybrid_time); 182 | 183 | delete(stu.distances); 184 | delete(ref.distances); 185 | } 186 | 187 | void print_separator_line() { 188 | for (int i = 0; i < 74; i++) { 189 | std::cout<<"-"; 190 | } 191 | std::cout< grade_graphs, std::vector> scores) { 195 | 196 | std::cout.precision(2); 197 | std::cout.setf(std::ios::fixed, std:: ios::floatfield); 198 | std::cout< grade_graphs = { "grid1000x1000.graph", 286 | "soc-livejournal1_68m.graph", 287 | "com-orkut_117m.graph", 288 | "random_500m.graph", 289 | "rmat_200m.graph"}; 290 | 291 | std::vector> scores(grade_graphs.size()); 292 | // top_down 0 293 | // bott_up 1 294 | // hybrid 2 295 | for (int i = 0; i < grade_graphs.size(); i++) { 296 | scores[i] = std::vector(3); 297 | } 298 | 299 | int idx = 0; 300 | for (auto& graph_name: grade_graphs) { 301 | graph* g = load_graph(graph_dir + '/' + graph_name); 302 | std::cout << "\nGraph: " << graph_name << std::endl; 303 | run_on_graph(idx, g, num_threads, num_runs, graph_name, scores); 304 | delete g; 305 | idx++; 306 | } 307 | 308 | print_scores(grade_graphs, scores); 309 | 310 | return 0; 311 | } 312 | -------------------------------------------------------------------------------- /bfs/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "common/CycleTimer.h" 12 | #include "common/graph.h" 13 | #include "bfs.h" 14 | 15 | #define USE_BINARY_GRAPH 1 16 | 17 | void reference_bfs_bottom_up(Graph graph, solution* sol); 18 | void reference_bfs_top_down(Graph graph, solution* sol); 19 | void reference_bfs_hybrid(Graph graph, solution* sol); 20 | 21 | int main(int argc, char** argv) { 22 | 23 | int num_threads = -1; 24 | std::string graph_filename; 25 | 26 | if (argc < 2) 27 | { 28 | std::cerr << "Usage: [num_threads]\n"; 29 | std::cerr << " To run results for all thread counts: \n"; 30 | std::cerr << " Run with a certain number of threads (no correctness run): \n"; 31 | exit(1); 32 | } 33 | 34 | int thread_count = -1; 35 | if (argc == 3) 36 | { 37 | thread_count = atoi(argv[2]); 38 | } 39 | 40 | graph_filename = argv[1]; 41 | 42 | Graph g; 43 | 44 | printf("----------------------------------------------------------\n"); 45 | printf("Max system threads = %d\n", omp_get_max_threads()); 46 | if (thread_count > 0) 47 | { 48 | thread_count = std::min(thread_count, omp_get_max_threads()); 49 | printf("Running with %d threads\n", thread_count); 50 | } 51 | printf("----------------------------------------------------------\n"); 52 | 53 | printf("Loading graph...\n"); 54 | if (USE_BINARY_GRAPH) { 55 | g = load_graph_binary(graph_filename.c_str()); 56 | } else { 57 | g = load_graph(argv[1]); 58 | printf("storing binary form of graph!\n"); 59 | store_graph_binary(graph_filename.append(".bin").c_str(), g); 60 | delete g; 61 | exit(1); 62 | } 63 | printf("\n"); 64 | printf("Graph stats:\n"); 65 | printf(" Edges: %d\n", g->num_edges); 66 | printf(" Nodes: %d\n", g->num_nodes); 67 | 68 | //If we want to run on all threads 69 | if (thread_count <= -1) 70 | { 71 | //Static assignment to get consistent usage across trials 72 | int max_threads = omp_get_max_threads(); 73 | 74 | //static num_threadss 75 | std::vector num_threads; 76 | 77 | //dynamic num_threads 78 | for (int i = 1; i < max_threads; i *= 2) { 79 | num_threads.push_back(i); 80 | } 81 | num_threads.push_back(max_threads); 82 | int n_usage = num_threads.size(); 83 | 84 | solution sol1; 85 | sol1.distances = (int*)malloc(sizeof(int) * g->num_nodes); 86 | solution sol2; 87 | sol2.distances = (int*)malloc(sizeof(int) * g->num_nodes); 88 | solution sol3; 89 | sol3.distances = (int*)malloc(sizeof(int) * g->num_nodes); 90 | 91 | //Solution sphere 92 | solution sol4; 93 | sol4.distances = (int*)malloc(sizeof(int) * g->num_nodes); 94 | 95 | double hybrid_base, top_base, bottom_base; 96 | double hybrid_time, top_time, bottom_time; 97 | 98 | double ref_hybrid_base, ref_top_base, ref_bottom_base; 99 | double ref_hybrid_time, ref_top_time, ref_bottom_time; 100 | 101 | double start; 102 | std::stringstream timing; 103 | std::stringstream ref_timing; 104 | std::stringstream relative_timing; 105 | 106 | bool tds_check = true, bus_check = true, hs_check = true; 107 | 108 | timing << "Threads Top Down Bottom Up Hybrid\n"; 109 | ref_timing << "Threads Top Down Bottom Up Hybrid\n"; 110 | relative_timing << "Threads Top Down Bottom Up Hybrid\n"; 111 | 112 | //Loop through assignment values; 113 | for (int i = 0; i < n_usage; i++) 114 | { 115 | printf("----------------------------------------------------------\n"); 116 | std::cout << "Running with " << num_threads[i] << " threads" << std::endl; 117 | //Set thread count 118 | omp_set_num_threads(num_threads[i]); 119 | 120 | //Run implementations 121 | start = CycleTimer::currentSeconds(); 122 | bfs_top_down(g, &sol1); 123 | top_time = CycleTimer::currentSeconds() - start; 124 | 125 | //Run reference implementation 126 | start = CycleTimer::currentSeconds(); 127 | reference_bfs_top_down(g, &sol4); 128 | ref_top_time = CycleTimer::currentSeconds() - start; 129 | 130 | std::cout << "Testing Correctness of Top Down\n"; 131 | for (int j=0; jnum_nodes; j++) { 132 | if (sol1.distances[j] != sol4.distances[j]) { 133 | fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol1.distances[j], sol4.distances[j]); 134 | tds_check = false; 135 | break; 136 | } 137 | } 138 | 139 | //Run implementations 140 | start = CycleTimer::currentSeconds(); 141 | bfs_bottom_up(g, &sol2); 142 | bottom_time = CycleTimer::currentSeconds() - start; 143 | 144 | //Run reference implementation 145 | start = CycleTimer::currentSeconds(); 146 | reference_bfs_bottom_up(g, &sol4); 147 | ref_bottom_time = CycleTimer::currentSeconds() - start; 148 | 149 | std::cout << "Testing Correctness of Bottom Up\n"; 150 | for (int j=0; jnum_nodes; j++) { 151 | if (sol2.distances[j] != sol4.distances[j]) { 152 | fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol2.distances[j], sol4.distances[j]); 153 | bus_check = false; 154 | break; 155 | } 156 | } 157 | 158 | start = CycleTimer::currentSeconds(); 159 | bfs_hybrid(g, &sol3); 160 | hybrid_time = CycleTimer::currentSeconds() - start; 161 | 162 | //Run reference implementation 163 | start = CycleTimer::currentSeconds(); 164 | reference_bfs_hybrid(g, &sol4); 165 | ref_hybrid_time = CycleTimer::currentSeconds() - start; 166 | 167 | std::cout << "Testing Correctness of Hybrid\n"; 168 | for (int j=0; jnum_nodes; j++) { 169 | if (sol3.distances[j] != sol4.distances[j]) { 170 | fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol3.distances[j], sol4.distances[j]); 171 | hs_check = false; 172 | break; 173 | } 174 | } 175 | 176 | if (i == 0) 177 | { 178 | hybrid_base = hybrid_time; 179 | ref_hybrid_base = ref_hybrid_time; 180 | top_base = top_time; 181 | bottom_base = bottom_time; 182 | ref_top_base = ref_top_time; 183 | ref_bottom_base = ref_bottom_time; 184 | 185 | } 186 | 187 | char buf[1024]; 188 | char ref_buf[1024]; 189 | char relative_buf[1024]; 190 | 191 | sprintf(buf, "%4d: %.2f (%.2fx) %.2f (%.2fx) %.2f (%.2fx)\n", 192 | num_threads[i], top_time, top_base/top_time, bottom_time, 193 | bottom_base/bottom_time, hybrid_time, hybrid_base/hybrid_time); 194 | sprintf(ref_buf, "%4d: %.2f (%.2fx) %.2f (%.2fx) %.2f (%.2fx)\n", 195 | num_threads[i], ref_top_time, ref_top_base/ref_top_time, ref_bottom_time, 196 | ref_bottom_base/ref_bottom_time, ref_hybrid_time, ref_hybrid_base/ref_hybrid_time); 197 | sprintf(relative_buf, "%4d: %14.2f %14.2f %14.2f\n", 198 | num_threads[i], ref_top_time/top_time, ref_bottom_time/bottom_time, ref_hybrid_time/hybrid_time); 199 | 200 | timing << buf; 201 | ref_timing << ref_buf; 202 | relative_timing << relative_buf; 203 | } 204 | 205 | printf("----------------------------------------------------------\n"); 206 | std::cout << "Your Code: Timing Summary" << std::endl; 207 | std::cout << timing.str(); 208 | printf("----------------------------------------------------------\n"); 209 | std::cout << "Reference: Timing Summary" << std::endl; 210 | std::cout << ref_timing.str(); 211 | printf("----------------------------------------------------------\n"); 212 | std::cout << "Correctness: " << std::endl; 213 | if (!tds_check) 214 | std::cout << "Top Down Search is not Correct" << std::endl; 215 | if (!bus_check) 216 | std::cout << "Bottom Up Search is not Correct" << std::endl; 217 | if (!hs_check) 218 | std::cout << "Hybrid Search is not Correct" << std::endl; 219 | std::cout << std::endl << "Speedup vs. Reference: " << std::endl << relative_timing.str(); 220 | } 221 | //Run the code with only one thread count and only report speedup 222 | else 223 | { 224 | bool tds_check = true, bus_check = true, hs_check = true; 225 | solution sol1; 226 | sol1.distances = (int*)malloc(sizeof(int) * g->num_nodes); 227 | solution sol2; 228 | sol2.distances = (int*)malloc(sizeof(int) * g->num_nodes); 229 | solution sol3; 230 | sol3.distances = (int*)malloc(sizeof(int) * g->num_nodes); 231 | 232 | //Solution sphere 233 | solution sol4; 234 | sol4.distances = (int*)malloc(sizeof(int) * g->num_nodes); 235 | 236 | double hybrid_time, top_time, bottom_time; 237 | double ref_hybrid_time, ref_top_time, ref_bottom_time; 238 | 239 | double start; 240 | std::stringstream timing; 241 | std::stringstream ref_timing; 242 | 243 | 244 | timing << "Threads Top Down Bottom Up Hybrid\n"; 245 | ref_timing << "Threads Top Down Bottom Up Hybrid\n"; 246 | 247 | //Loop through assignment values; 248 | std::cout << "Running with " << thread_count << " threads" << std::endl; 249 | //Set thread count 250 | omp_set_num_threads(thread_count); 251 | 252 | //Run implementations 253 | start = CycleTimer::currentSeconds(); 254 | bfs_top_down(g, &sol1); 255 | top_time = CycleTimer::currentSeconds() - start; 256 | 257 | //Run reference implementation 258 | start = CycleTimer::currentSeconds(); 259 | reference_bfs_top_down(g, &sol4); 260 | ref_top_time = CycleTimer::currentSeconds() - start; 261 | 262 | std::cout << "Testing Correctness of Top Down\n"; 263 | for (int j=0; jnum_nodes; j++) { 264 | if (sol1.distances[j] != sol4.distances[j]) { 265 | fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol1.distances[j], sol4.distances[j]); 266 | tds_check = false; 267 | break; 268 | } 269 | } 270 | 271 | 272 | //Run implementations 273 | start = CycleTimer::currentSeconds(); 274 | bfs_bottom_up(g, &sol2); 275 | bottom_time = CycleTimer::currentSeconds() - start; 276 | 277 | //Run reference implementation 278 | start = CycleTimer::currentSeconds(); 279 | reference_bfs_bottom_up(g, &sol4); 280 | ref_bottom_time = CycleTimer::currentSeconds() - start; 281 | 282 | std::cout << "Testing Correctness of Bottom Up\n"; 283 | for (int j=0; jnum_nodes; j++) { 284 | if (sol2.distances[j] != sol4.distances[j]) { 285 | fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol2.distances[j], sol4.distances[j]); 286 | bus_check = false; 287 | break; 288 | } 289 | } 290 | 291 | 292 | start = CycleTimer::currentSeconds(); 293 | bfs_hybrid(g, &sol3); 294 | hybrid_time = CycleTimer::currentSeconds() - start; 295 | 296 | //Run reference implementation 297 | start = CycleTimer::currentSeconds(); 298 | reference_bfs_hybrid(g, &sol4); 299 | ref_hybrid_time = CycleTimer::currentSeconds() - start; 300 | 301 | std::cout << "Testing Correctness of Hybrid\n"; 302 | for (int j=0; jnum_nodes; j++) { 303 | if (sol3.distances[j] != sol4.distances[j]) { 304 | fprintf(stderr, "*** Results disagree at %d: %d, %d\n", j, sol3.distances[j], sol4.distances[j]); 305 | hs_check = false; 306 | break; 307 | } 308 | } 309 | 310 | 311 | char buf[1024]; 312 | char ref_buf[1024]; 313 | 314 | sprintf(buf, "%4d: %8.2f %8.2f %8.2f\n", 315 | thread_count, top_time, bottom_time, hybrid_time); 316 | sprintf(ref_buf, "%4d: %8.2f %8.2f %8.2f\n", 317 | thread_count, ref_top_time, ref_bottom_time, ref_hybrid_time); 318 | 319 | timing << buf; 320 | ref_timing << ref_buf; 321 | if (!tds_check) 322 | std::cout << "Top Down Search is not Correct" << std::endl; 323 | if (!bus_check) 324 | std::cout << "Bottom Up Search is not Correct" << std::endl; 325 | if (!hs_check) 326 | std::cout << "Hybrid Search is not Correct" << std::endl; 327 | printf("----------------------------------------------------------\n"); 328 | std::cout << "Your Code: Timing Summary" << std::endl; 329 | std::cout << timing.str(); 330 | printf("----------------------------------------------------------\n"); 331 | std::cout << "Reference: Timing Summary" << std::endl; 332 | std::cout << ref_timing.str(); 333 | printf("----------------------------------------------------------\n"); 334 | } 335 | 336 | delete g; 337 | 338 | return 0; 339 | } 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Assignment 5: Big Graph Processing in OpenMP # 2 | 3 | **Due: Fri Dec 6th, 11:59PM PT (No late submission allowed)** 4 | 5 | **84 points total** 6 | 7 | If you complete this assignment, you will receive up to 10 bonus points on one of the regular programming assignments (PA1-PA4). However, this extra credit can only boost an assignment to 100 points. 8 | 9 | ## Overview ## 10 | 11 | In this assignment, you will implement [breadth-first search](https://en.wikipedia.org/wiki/Breadth-first_search) (BFS). A good implementation of this assignment will be able to run this algorithm on graphs containing hundreds of millions of edges on a multi-core machine in only seconds. 12 | 13 | ## Environment Setup ## 14 | 15 | Final grading of this assignment will be performed on the Myth machines. 16 | 17 | The assignment starter code is available on [Github](https://github.com/stanford-cs149/biggraphs-ec). Please clone the Assignment 5 starter code using: 18 | 19 | ``` 20 | git clone https://github.com/stanford-cs149/biggraphs-ec.git 21 | ``` 22 | 23 | #### Background: Learning OpenMP #### 24 | 25 | In this assignment we'd like you to use [OpenMP](http://openmp.org/) for multi-core parallelization. OpenMP is an API and set of C-language extensions that provides compiler support for parallelism. You can also use OpenMP to tell the compiler to parallelize iterations of `for` loops, and to manage mutual exclusion. It is well documented online, but here is a brief example of parallelizing a `for` loop, with mutual exclusion. 26 | ```c 27 | /* The iterations of this for loop may be parallelized by the compiler */ 28 | #pragma omp parallel for 29 | for (int i = 0; i < 100; i++) { 30 | 31 | /* different iterations of this part of the loop body may be 32 | run in parallel on different cores */ 33 | 34 | #pragma omp critical 35 | { 36 | /* This block will be executed by at most one thread at a time. */ 37 | printf("Thread %d got iteration %lu\n", omp_get_thread_num(), i); 38 | } 39 | } 40 | ``` 41 | Please see OpenMP documentation for the syntax for how to tell OpenMP to use different forms of static or dynamic scheduling. (For example, `omp parallel for schedule(dynamic 100)` distributes iterations to threads using dynamic scheduling with a chunk size of 100 iterations). You can think of the implementation as a dynamic work queue where threads in the thread pool pull off 100 iterations at once, like what [we talked about in these lecture slides](https://gfxcourses.stanford.edu/cs149/fall24/lecture/perfopt1/slide_11). 42 | 43 | Here is an example for an atomic counter update in OpenMP. 44 | ```c 45 | int my_counter = 0; 46 | #pragma omp parallel for 47 | for (int i = 0; i < 100; i++) { 48 | if ( ... some condition ...) { 49 | #pragma omp atomic 50 | my_counter++; 51 | } 52 | } 53 | ``` 54 | We expect you to be able to read OpenMP documentation on your own (Google will be very helpful), but here are some useful links to get you started: 55 | 56 | * The OpenMP 3.0 specification: . 57 | * An OpenMP cheat sheet . 58 | * OpenMP has support for reductions on shared variables, and for declaring thread-local copies of variables. 59 | * This is a nice guide for the `omp parallel_for` directives: 60 | 61 | #### Background: Representing Graphs #### 62 | 63 | The starter code operates on directed graphs, whose implementation you can find in `graph.h` and `graph_internal.h`. We recommend you begin by understanding the graph representation in these files. A graph is represented by an array of edges (both `outgoing_edges` and `incoming_edges`), where each edge is represented by an integer describing the id of the destination vertex. Edges are stored in the graph sorted by their source vertex, so the source vertex is implicit in the representation. This makes for a compact representation of the graph, and also allows it to be stored contiguously in memory. For example, to iterate over the outgoing edges for all nodes in the graph, you'd use the following code which makes use of convenient helper functions defined in `graph.h` (and implemented in `graph_internal.h`): 64 | ```c 65 | for (int i=0; i. You can download the dataset using `wget http://cs149.stanford.edu/cs149asstdata/all_graphs.tgz`, and then untar it with `tar -xzvf all_graphs.tgz`. Be careful, this is a 3 GB download. 80 | 81 | Some interesting real-world graphs include: 82 | 83 | * com-orkut_117m.graph 84 | * oc-pokec_30m.graph 85 | * soc-livejournal1_68m.graph 86 | 87 | Your useful synthetic, but large graphs include: 88 | 89 | * random_500m.graph 90 | * rmat_200m.graph 91 | 92 | There are also some very small graphs for testing. If you look in the `/tools` directory of the starter code, you'll notice a useful program called `graphTools.cpp` that can be used to make your own graphs as well. 93 | 94 | ## Part 1: Parallel "Top Down" Breadth-First Search (20 points) ## 95 | 96 | Breadth-first search (BFS) is a common algorithm that might have seen in a prior algorithms class (See [here](https://www.hackerearth.com/practice/algorithms/graphs/breadth-first-search/tutorial/) and [here](https://www.youtube.com/watch?v=oDqjPvD54Ss) for helpful references.) 97 | Please familiarize yourself with the function `bfs_top_down()` in `bfs/bfs.cpp`, which contains a sequential implementation of BFS. The code uses BFS to compute the distance to vertex 0 for all vertices in the graph. You may wish to familiarize yourself with the graph structure defined in `common/graph.h` as well as the simple array data structure `vertex_set` (`bfs/bfs.h`), which is an array of vertices used to represent the current frontier of BFS. 98 | 99 | You can run bfs using: 100 | 101 | ./bfs /rmat_200m.graph 102 | 103 | where `` is the path to the directory containing the graph files (see the "Dataset" section above). 104 | 105 | When you run `bfs`, you'll see execution time and the frontier size printed for each step in the algorithm. Correctness will pass for the top-down version (since we've given you a correct sequential implementation), but it will be slow. (Note that `bfs` will report failures for a "bottom up" and "hybrid" versions of the algorithm, which you will implement later in this assignment.) 106 | 107 | In this part of the assignment your job is to parallelize top-down BFS. You'll need to focus on identifying parallelism, as well as inserting the appropriate synchronization to ensure correctness. We wish to remind you that you __should not__ expect to achieve near-perfect speedups on this problem (we'll leave it to you to think about why!). 108 | 109 | __Tips/Hints:__ 110 | 111 | * Always start by considering what work can be done in parallel. 112 | * Some parts of the computation may need to be synchronized, for example, by wrapping the appropriate code within a critical region using `#pragma omp critical` or `#pragma omp atomic`. __However, in this problem you should think about how to make use of the simple atomic operation called `compare and swap`.__ You can read about [GCC's implementation of compare and swap](https://gcc.gnu.org/onlinedocs/gcc-9.4.0/gcc/_005f_005fsync-Builtins.html), which is exposed to C code as the function `__sync_bool_compare_and_swap`. If you can figure out how to use compare-and-swap for this problem, you will achieve much higher performance than using a critical region. 113 | * Updating a shared counter can be done efficiently using `#pragma omp atomic` before a line like `counter++;`. 114 | * Are there conditions where it is possible to avoid using `compare_and_swap`? In other words, when you *know* in advance that the comparison will fail? 115 | * There is a preprocessor macro `VERBOSE` to make it easy to disable useful print per-step timings in your solution (see the top of `bfs/bfs.cpp`). In general, these printfs occur infrequently enough (only once per BFS step) that they do not notably impact performance, but if you want to disable the printfs during timing, you can use this `#define` as a convenience. 116 | 117 | ## Part 2: "Bottom Up" BFS (25 points) ## 118 | 119 | Think about what behavior might cause a performance problem in the BFS implementation from Part 1.2. An alternative implementation of a breadth-first search step may be more efficient in these situations. Instead of iterating over all vertices in the frontier and marking all vertices adjacent to the frontier, it is possible to implement BFS by having *each vertex check whether it should be added to the frontier!* Basic pseudocode for the algorithm is as follows: 120 | 121 | ``` 122 | for each vertex v in graph: 123 | if v has not been visited AND 124 | v shares an incoming edge with a vertex u on the frontier: 125 | add vertex v to frontier; 126 | ``` 127 | 128 | This algorithm is sometimes referred to as a "bottom up" implementation of BFS, since each vertex looks "up the BFS tree" to find its ancestor. (As opposed to being found by its ancestor in a "top down" fashion, as was done in Part 1.2.) 129 | 130 | Please implement a bottom-up BFS to compute the shortest path to all the vertices in the graph from the root (see `bfs_bottom_up()` in `bfs/bfs.cpp`). Start by implementing a simple sequential version. Then parallelize your implementation. 131 | 132 | __Tips/Hints:__ 133 | 134 | * It may be useful to think about how you represent the set of unvisited nodes. Do the top-down and bottom-up versions of the code lend themselves to different implementations? 135 | * How do the synchronization requirements of the bottom-up BFS change? 136 | 137 | ## Part 3: Hybrid BFS (25 points) ## 138 | 139 | Notice that in some steps of the BFS, the "bottom up" BFS is significantly faster than the top-down version. In other steps, the top-down version is significantly faster. This suggests a major performance improvement in your implementation, if __you could dynamically choose between your "top down" and "bottom up" formulations based on the size of the frontier or other properties of the graph!__ If you want a solution competitive with the reference one, your implementation will likely have to implement this dynamic optimization. Please provide your solution in `bfs_hybrid()` in `bfs/bfs.cpp`. 140 | 141 | __Tips/Hints:__ 142 | 143 | * If you used different representations of the frontier in Parts 1.2 and 1.3, you may have to convert between these representations in the hybrid solution. How might you efficiently convert between them? Is there an overhead in doing so? 144 | 145 | You can run our grading script via: `./bfs_grader `, which will report correctness and a performance points score for a number of graphs. 146 | 147 | ## Grading and Handin ## 148 | 149 | Along with your code, we would like you to hand in a clear but concise high-level description of how your implementation works as well as a brief description of how you arrived at your solutions. Specifically address approaches you tried along the way, and how you went about determining how to optimize your code (For example, what measurements did you perform to guide your optimization efforts?). 150 | 151 | Aspects of your work that you should mention in the write-up include: 152 | 153 | 1. Include both partners' names at the top of your write-up. 154 | 2. Run bfs_grader on a Myth machine and insert a copy of the score table in your solutions. **We will use Myth machines to grade your code.** 155 | 3. Describe the process of optimizing your code: 156 | * In Part 1 (Top Down) and 2 (Bottom Up), where is the synchronization in each of your solutions? Do you do anything to limit the overhead of synchronization? 157 | * In Part 3 (Hybrid), did you decide to switch between the top-down and bottom-up BFS implementations dynamically? How did you decide which implementation to use? 158 | * Why do you think your code (and the staff reference) is unable to achieve perfect speedup? (Is it workload imbalance? communication/synchronization? data movement?) 159 | 160 | ## Points Distribution ## 161 | 162 | The 84 points on this assignment are allotted as follows: 163 | 164 | * 70 points: BFS performance 165 | * 14 points: Write-up 166 | 167 | If you earn `x` points on this assignment, we will boost your grade on any prior programming assignment by `(x/84) * 10` points, rounded to the nearest tenth. 168 | 169 | ## Hand-in Instructions ## 170 | 171 | Please submit your work using Gradescope. 172 | 173 | 1. __Please submit your writeup as a PDF in the Gradescope assignment Programming Assignment 5 (Writeup).__ 174 | 2. __To submit your code, run `sh create_submission.h` to generate a `tar.gz` file and submit it to Programming Assignment 5 (Code).__ We only look that your `bfs/bfs.cpp` and `bfs/bfs.h` file, so do not change any other files. Before submitting the source files, make sure that all code is compilable and runnable! We should be able to simply make, then execute your programs in the `/bfs` directories without manual intervention. 175 | 176 | Our grading scripts will rerun the checker code allowing us to verify your score matches what you submitted in your writeup. We may also run your code on other datasets to further examine its correctness. 177 | --------------------------------------------------------------------------------