├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── bin └── .gitkeep ├── core ├── atomic.hpp ├── bigvector.hpp ├── bitmap.hpp ├── constants.hpp ├── filesystem.hpp ├── graph.hpp ├── partition.hpp ├── queue.hpp ├── time.hpp └── type.hpp ├── examples ├── bfs.cpp ├── mis.cpp ├── pagerank.cpp ├── radii.cpp ├── spmv.cpp └── wcc.cpp └── tools ├── clear_cache.sh ├── preprocess.cpp └── raise_ulimit_n.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | ROOT_DIR= $(shell pwd) 3 | TARGETS= bin/preprocess bin/bfs bin/wcc bin/pagerank bin/spmv bin/mis bin/radii 4 | 5 | CXX?= g++ 6 | CXXFLAGS?= -O3 -Wall -std=c++11 -g -fopenmp -I$(ROOT_DIR) 7 | HEADERS= $(shell find . -name '*.hpp') 8 | 9 | all: $(TARGETS) 10 | 11 | bin/preprocess: tools/preprocess.cpp $(HEADERS) 12 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 13 | 14 | bin/bfs: examples/bfs.cpp $(HEADERS) 15 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 16 | 17 | bin/wcc: examples/wcc.cpp $(HEADERS) 18 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 19 | 20 | bin/pagerank: examples/pagerank.cpp $(HEADERS) 21 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 22 | 23 | bin/spmv: examples/spmv.cpp $(HEADERS) 24 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 25 | 26 | bin/mis: examples/mis.cpp $(HEADERS) 27 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 28 | 29 | bin/radii: examples/radii.cpp $(HEADERS) 30 | $(CXX) $(CXXFLAGS) -o $@ $< $(SYSLIBS) 31 | 32 | clean: 33 | rm -rf $(TARGETS) 34 | 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GridGraph 2 | A large scale graph processing framework on a single machine. 3 | 4 | ## Compilation 5 | Compilers supporting basic C++11 features (lambdas, threads, etc.) and OpenMP are required. 6 | 7 | To compile: 8 | ``` 9 | make 10 | ``` 11 | 12 | ## Preprocessing 13 | Before running applications on a graph, GridGraph needs to partition the original edge list into the grid format. 14 | 15 | Two types of edge list files are supported: 16 | - Unweighted. Edges are tuples of <4 byte source, 4 byte destination>. 17 | - Weighted. Edges are tuples of <4 byte source, 4 byte destination, 4 byte float typed weight>. 18 | 19 | To partition the edge list: 20 | ``` 21 | ./bin/preprocess -i [input path] -o [output path] -v [vertices] -p [partitions] -t [edge type: 0=unweighted, 1=weighted] 22 | ``` 23 | For example, we want to partition the unweighted [LiveJournal](http://snap.stanford.edu/data/soc-LiveJournal1.html) graph into a 4x4 grid: 24 | ``` 25 | ./bin/preprocess -i /data/LiveJournal -o /data/LiveJournal_Grid -v 4847571 -p 4 -t 0 26 | ``` 27 | 28 | > You may need to raise the limit of maximum open file descriptors (./tools/raise\_ulimit\_n.sh). 29 | 30 | ## Running Applications 31 | To run the applications, just give the path of the grid format and the memory budge (unit in GB), as well as other necessary program parameters (e.g. the starting vertex of BFS, the number of iterations of PageRank, etc.): 32 | 33 | ### BFS 34 | ``` 35 | ./bin/bfs [path] [start vertex id] [memory budget] 36 | ``` 37 | 38 | ### WCC 39 | ``` 40 | ./bin/wcc [path] [memory budget] 41 | ``` 42 | 43 | ### SpMV 44 | ``` 45 | ./bin/spmv [path] [memory budget] 46 | ``` 47 | 48 | ### PageRank 49 | ``` 50 | ./bin/pagerank [path] [number of iterations] [memory budget] 51 | ``` 52 | 53 | For example, to run 20 iterations of PageRank on the (grid partitioned) [LiveJournal](http://snap.stanford.edu/data/soc-LiveJournal1.html) graph using a machine with 8 GB RAM: 54 | ``` 55 | ./bin/pagerank /data/LiveJournal_Grid 20 8 56 | ``` 57 | 58 | ## Resources 59 | Xiaowei Zhu, Wentao Han and Wenguang Chen. [GridGraph: Large-Scale Graph Processing on a Single Machine Using 2-Level Hierarchical Partitioning](https://www.usenix.org/system/files/conference/atc15/atc15-paper-zhu.pdf). Proceedings of the 2015 USENIX Annual Technical Conference, pages 375-386. 60 | 61 | To cite GridGraph, you can use the following BibTeX entry: 62 | ``` 63 | @inproceedings {zhu2015gridgraph, 64 | author = {Xiaowei Zhu and Wentao Han and Wenguang Chen}, 65 | title = {GridGraph: Large-Scale Graph Processing on a Single Machine Using 2-Level Hierarchical Partitioning}, 66 | booktitle = {2015 USENIX Annual Technical Conference (USENIX ATC 15)}, 67 | year = {2015}, 68 | month = Jul, 69 | isbn = {978-1-931971-225}, 70 | address = {Santa Clara, CA}, 71 | pages = {375--386}, 72 | url = {https://www.usenix.org/conference/atc15/technical-session/presentation/zhu}, 73 | publisher = {USENIX Association}, 74 | } 75 | ``` 76 | -------------------------------------------------------------------------------- /bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thu-pacman/GridGraph/1f1a6262ef6ffbc61bcc229357e106843bd457ac/bin/.gitkeep -------------------------------------------------------------------------------- /core/atomic.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef ATOMIC_H 18 | #define ATOMIC_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | template 25 | inline bool cas(ET *ptr, ET oldv, ET newv) { 26 | if (sizeof(ET) == 8) { 27 | return __sync_bool_compare_and_swap((long*)ptr, *((long*)&oldv), *((long*)&newv)); 28 | } else if (sizeof(ET) == 4) { 29 | return __sync_bool_compare_and_swap((int*)ptr, *((int*)&oldv), *((int*)&newv)); 30 | } else { 31 | assert(false); 32 | } 33 | } 34 | 35 | template 36 | inline bool write_min(ET *a, ET b) { 37 | ET c; bool r=0; 38 | do c = *a; 39 | while (c > b && !(r=cas(a,c,b))); 40 | return r; 41 | } 42 | 43 | template 44 | inline void write_add(ET *a, ET b) { 45 | volatile ET newV, oldV; 46 | do {oldV = *a; newV = oldV + b;} 47 | while (!cas(a, oldV, newV)); 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /core/bigvector.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef BIGVECTOR_H 18 | #define BIGVECTOR_H 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "core/filesystem.hpp" 29 | #include "core/partition.hpp" 30 | 31 | template 32 | class BigVector { 33 | std::string path; 34 | bool is_open; 35 | bool in_memory = false; 36 | size_t begin_i = 0, end_i = 0; 37 | T * data_in_memory = NULL; 38 | static const long PAGESIZE = 4096; 39 | public: 40 | int fd; 41 | T * data; 42 | size_t length; 43 | BigVector() { 44 | is_open = false; 45 | data = NULL; 46 | length = 0; 47 | } 48 | BigVector(std::string path, size_t length) { 49 | init(path, length); 50 | } 51 | BigVector(std::string path) { 52 | init(path); 53 | } 54 | ~BigVector() { 55 | if (is_open && file_exists(path)) { 56 | close_mmap(); 57 | } 58 | } 59 | void init(std::string path) { 60 | assert(file_exists(path)); 61 | assert(file_size(path) % sizeof(T) == 0); 62 | init(path, file_size(path) / sizeof(T)); 63 | } 64 | void init(std::string path, size_t length) { 65 | this->path = path; 66 | this->length = length; 67 | if (!file_exists(path)) { 68 | FILE * fout = fopen(path.c_str(), "wb"); 69 | fclose(fout); 70 | } 71 | if (file_size(path) != sizeof(T) * length) { 72 | long file_length = sizeof(T) * length; 73 | assert(truncate(path.c_str(), file_length)!=-1); 74 | int fout = open(path.c_str(), O_WRONLY); 75 | void * buffer = memalign(PAGESIZE, PAGESIZE); 76 | for (long offset=0;offset PAGESIZE) { 78 | assert(write(fout, buffer, PAGESIZE)==PAGESIZE); 79 | offset += PAGESIZE; 80 | } 81 | else { 82 | assert(write(fout, buffer, file_length - offset)==file_length - offset); 83 | offset += file_length - offset; 84 | } 85 | } 86 | close(fout); 87 | } 88 | fd = open(path.c_str(), O_RDWR | O_DIRECT); 89 | assert(fd!=-1); 90 | open_mmap(); 91 | } 92 | void open_mmap() { 93 | int ret = posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); 94 | assert(ret==0); 95 | data = (T *)mmap(NULL, sizeof(T) * length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 96 | assert(data!=MAP_FAILED); 97 | is_open = true; 98 | } 99 | void close_mmap() { 100 | is_open = false; 101 | int ret = munmap(data, sizeof(T) * length); 102 | assert(ret==0); 103 | } 104 | void fill(const T & value) { 105 | int parallelism = std::thread::hardware_concurrency(); 106 | #pragma omp parallel num_threads(parallelism) 107 | { 108 | size_t begin_i, end_i; 109 | std::tie(begin_i, end_i) = get_partition_range(length, omp_get_num_threads(), omp_get_thread_num()); 110 | for (size_t i=begin_i;i= begin_i && i <= end_i)) { 119 | printf("%s %lu %lu %lu\n", path.c_str(), begin_i, i, end_i); 120 | exit(-1); 121 | } 122 | return data_in_memory[i - begin_i]; 123 | } else { 124 | return data[i]; 125 | } 126 | } 127 | void sync() { 128 | assert(msync(data, sizeof(T) * length, MS_SYNC)==0); 129 | } 130 | void lock(size_t begin_i, size_t end_i) { 131 | assert(mlock(data + begin_i, (end_i - begin_i) * sizeof(T))==0); 132 | } 133 | void unlock(size_t begin_i, size_t end_i) { 134 | assert(munlock(data + begin_i, (end_i - begin_i) * sizeof(T))==0); 135 | } 136 | void load(size_t begin_i, size_t end_i) { 137 | close_mmap(); 138 | begin_i = begin_i * sizeof(T) / PAGESIZE * PAGESIZE / sizeof(T); 139 | this->begin_i = begin_i; 140 | this->end_i = end_i; 141 | in_memory = true; 142 | // data_in_memory = (T *)memalign(PAGESIZE, (end_i - begin_i) * sizeof(T) + PAGESIZE); 143 | // assert(data_in_memory!=NULL); 144 | data_in_memory = (T *)mmap(0, (end_i - begin_i) * sizeof(T) + PAGESIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 145 | assert(data_in_memory!=MAP_FAILED); 146 | long end_offset = end_i * sizeof(T); 147 | long offset = begin_i * sizeof(T); 148 | long bytes; 149 | while (offset < end_offset) { 150 | bytes = pread(fd, data_in_memory + (offset / sizeof(T) - begin_i), (end_offset - offset + PAGESIZE - 1) / PAGESIZE * PAGESIZE, offset); 151 | if (bytes==-1) { 152 | printf("%ld %ld\n", offset, end_offset); 153 | printf("%s\n", strerror(errno)); 154 | getchar(); 155 | exit(-1); 156 | } 157 | offset += bytes; 158 | } 159 | } 160 | void save() { 161 | long end_offset = end_i * sizeof(T); 162 | long offset = begin_i * sizeof(T); 163 | long bytes; 164 | while (offset < end_offset) { 165 | bytes = pwrite(fd, data_in_memory + (offset / sizeof(T) - begin_i), (end_offset - offset + PAGESIZE - 1) / PAGESIZE * PAGESIZE, offset); 166 | if (bytes==-1) { 167 | printf("%ld %ld\n", offset, end_offset); 168 | printf("%s\n", strerror(errno)); 169 | getchar(); 170 | exit(-1); 171 | } 172 | offset += bytes; 173 | } 174 | int ret = munmap(data_in_memory, (end_i - begin_i) * sizeof(T) + PAGESIZE); 175 | assert(ret==0); 176 | in_memory = false; 177 | begin_i = 0; 178 | end_i = 0; 179 | open_mmap(); 180 | } 181 | }; 182 | 183 | #endif 184 | -------------------------------------------------------------------------------- /core/bitmap.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef BITMAP_H 18 | #define BITMAP_H 19 | 20 | #define WORD_OFFSET(i) (i >> 6) 21 | #define BIT_OFFSET(i) (i & 0x3f) 22 | 23 | class Bitmap { 24 | public: 25 | size_t size; 26 | unsigned long * data; 27 | Bitmap() { 28 | size = 0; 29 | data = NULL; 30 | } 31 | Bitmap(size_t size) { 32 | init(size); 33 | } 34 | void init(size_t size) { 35 | this->size = size; 36 | data = new unsigned long [WORD_OFFSET(size)+1]; 37 | } 38 | void clear() { 39 | size_t bm_size = WORD_OFFSET(size); 40 | #pragma omp parallel for 41 | for (size_t i=0;i<=bm_size;i++) { 42 | data[i] = 0; 43 | } 44 | #pragma omp barrier 45 | } 46 | void fill() { 47 | size_t bm_size = WORD_OFFSET(size); 48 | #pragma omp parallel for 49 | for (size_t i=0;i 21 | #include 22 | #include 23 | 24 | inline bool file_exists(std::string filename) { 25 | struct stat st; 26 | return stat(filename.c_str(), &st)==0; 27 | } 28 | 29 | inline long file_size(std::string filename) { 30 | struct stat st; 31 | assert(stat(filename.c_str(), &st)==0); 32 | return st.st_size; 33 | } 34 | 35 | inline void create_directory(std::string path) { 36 | assert(mkdir(path.c_str(), 0764)==0 || errno==EEXIST); 37 | } 38 | 39 | // TODO: only on unix-like systems 40 | inline void remove_directory(std::string path) { 41 | char command[1024]; 42 | sprintf(command, "rm -rf %s", path.c_str()); 43 | system(command); 44 | } 45 | 46 | #endif -------------------------------------------------------------------------------- /core/graph.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef GRAPH_H 18 | #define GRAPH_H 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | 31 | #include "core/constants.hpp" 32 | #include "core/type.hpp" 33 | #include "core/bitmap.hpp" 34 | #include "core/atomic.hpp" 35 | #include "core/queue.hpp" 36 | #include "core/partition.hpp" 37 | #include "core/bigvector.hpp" 38 | #include "core/time.hpp" 39 | 40 | bool f_true(VertexId v) { 41 | return true; 42 | } 43 | 44 | void f_none_1(std::pair vid_range) { 45 | 46 | } 47 | 48 | void f_none_2(std::pair source_vid_range, std::pair target_vid_range) { 49 | 50 | } 51 | 52 | class Graph { 53 | int parallelism; 54 | int edge_unit; 55 | bool * should_access_shard; 56 | long ** fsize; 57 | char ** buffer_pool; 58 | long * column_offset; 59 | long * row_offset; 60 | long memory_bytes; 61 | int partition_batch; 62 | long vertex_data_bytes; 63 | long PAGESIZE; 64 | public: 65 | std::string path; 66 | 67 | int edge_type; 68 | VertexId vertices; 69 | EdgeId edges; 70 | int partitions; 71 | 72 | Graph (std::string path) { 73 | PAGESIZE = 4096; 74 | parallelism = std::thread::hardware_concurrency(); 75 | buffer_pool = new char * [parallelism*1]; 76 | for (int i=0;imemory_bytes = memory_bytes; 86 | } 87 | 88 | void set_vertex_data_bytes(long vertex_data_bytes) { 89 | this->vertex_data_bytes = vertex_data_bytes; 90 | } 91 | 92 | void init(std::string path) { 93 | this->path = path; 94 | 95 | FILE * fin_meta = fopen((path+"/meta").c_str(), "r"); 96 | fscanf(fin_meta, "%d %d %ld %d", &edge_type, &vertices, &edges, &partitions); 97 | fclose(fin_meta); 98 | 99 | if (edge_type==0) { 100 | PAGESIZE = 4096; 101 | } else { 102 | PAGESIZE = 12288; 103 | } 104 | 105 | should_access_shard = new bool[partitions]; 106 | 107 | if (edge_type==0) { 108 | edge_unit = sizeof(VertexId) * 2; 109 | } else { 110 | edge_unit = sizeof(VertexId) * 2 + sizeof(Weight); 111 | } 112 | 113 | memory_bytes = 1024l*1024l*1024l*1024l; // assume RAM capacity is very large 114 | partition_batch = partitions; 115 | vertex_data_bytes = 0; 116 | 117 | char filename[1024]; 118 | fsize = new long * [partitions]; 119 | for (int i=0;i 147 | T stream_vertices(std::function process, Bitmap * bitmap = nullptr, T zero = 0, 148 | std::function)> pre = f_none_1, 149 | std::function)> post = f_none_1) { 150 | T value = zero; 151 | if (bitmap==nullptr && vertex_data_bytes > (0.8 * memory_bytes)) { 152 | for (int cur_partition=0;cur_partition=partitions) { 156 | end_vid = vertices; 157 | } else { 158 | end_vid = get_partition_range(vertices, partitions, cur_partition+partition_batch).first; 159 | } 160 | pre(std::make_pair(begin_vid, end_vid)); 161 | #pragma omp parallel for schedule(dynamic) num_threads(parallelism) 162 | for (int partition_id=cur_partition;partition_iddata[WORD_OFFSET(i)]; 190 | if (word==0) { 191 | i = (WORD_OFFSET(i) + 1) << 6; 192 | continue; 193 | } 194 | size_t j = BIT_OFFSET(i); 195 | word = word >> j; 196 | while (word!=0) { 197 | if (word & 1) { 198 | local_value += process(i); 199 | } 200 | i++; 201 | j++; 202 | word = word >> 1; 203 | if (i==end_vid) break; 204 | } 205 | i += (64 - j); 206 | } 207 | } 208 | write_add(&value, local_value); 209 | } 210 | #pragma omp barrier 211 | } 212 | return value; 213 | } 214 | 215 | void set_partition_batch(long bytes) { 216 | int x = (int)ceil(bytes / (0.8 * memory_bytes)); 217 | partition_batch = partitions / x; 218 | } 219 | 220 | template 221 | void hint(Args... args); 222 | 223 | template 224 | void hint(BigVector & a) { 225 | long bytes = sizeof(A) * a.length; 226 | set_partition_batch(bytes); 227 | } 228 | 229 | template 230 | void hint(BigVector & a, BigVector & b) { 231 | long bytes = sizeof(A) * a.length + sizeof(B) * b.length; 232 | set_partition_batch(bytes); 233 | } 234 | 235 | template 236 | void hint(BigVector & a, BigVector & b, BigVector & c) { 237 | long bytes = sizeof(A) * a.length + sizeof(B) * b.length + sizeof(C) * c.length; 238 | set_partition_batch(bytes); 239 | } 240 | 241 | template 242 | T stream_edges(std::function process, Bitmap * bitmap = nullptr, T zero = 0, int update_mode = 1, 243 | std::function vid_range)> pre_source_window = f_none_1, 244 | std::function vid_range)> post_source_window = f_none_1, 245 | std::function vid_range)> pre_target_window = f_none_1, 246 | std::function vid_range)> post_target_window = f_none_1) { 247 | if (bitmap==nullptr) { 248 | for (int i=0;idata[WORD_OFFSET(i)]; 262 | if (word!=0) { 263 | should_access_shard[partition_id] = true; 264 | break; 265 | } 266 | i = (WORD_OFFSET(i) + 1) << 6; 267 | } 268 | } 269 | #pragma omp barrier 270 | } 271 | 272 | T value = zero; 273 | Queue > tasks(65536); 274 | std::vector threads; 275 | long read_bytes = 0; 276 | 277 | long total_bytes = 0; 278 | for (int i=0;i0); 310 | local_read_bytes += bytes; 311 | // CHECK: start position should be offset % edge_unit 312 | for (long pos=offset % edge_unit;pos+edge_unit<=bytes;pos+=edge_unit) { 313 | Edge & e = *(Edge*)(buffer+pos); 314 | if (bitmap==nullptr || bitmap->get_bit(e.source)) { 315 | local_value += process(e); 316 | } 317 | } 318 | } 319 | write_add(&value, local_value); 320 | write_add(&read_bytes, local_read_bytes); 321 | }, ti); 322 | } 323 | fin = open((path+"/row").c_str(), read_mode); 324 | posix_fadvise(fin, 0, 0, POSIX_FADV_SEQUENTIAL); 325 | for (int i=0;i= PAGESIZE) { 330 | offset = begin_offset / PAGESIZE * PAGESIZE; 331 | } 332 | long end_offset = row_offset[i*partitions+j+1]; 333 | if (end_offset <= offset) continue; 334 | while (end_offset - offset >= IOSIZE) { 335 | tasks.push(std::make_tuple(fin, offset, IOSIZE)); 336 | offset += IOSIZE; 337 | } 338 | if (end_offset > offset) { 339 | tasks.push(std::make_tuple(fin, offset, (end_offset - offset + PAGESIZE - 1) / PAGESIZE * PAGESIZE)); 340 | offset += (end_offset - offset + PAGESIZE - 1) / PAGESIZE * PAGESIZE; 341 | } 342 | } 343 | } 344 | for (int i=0;i=partitions) { 359 | end_vid = vertices; 360 | } else { 361 | end_vid = get_partition_range(vertices, partitions, cur_partition+partition_batch).first; 362 | } 363 | pre_source_window(std::make_pair(begin_vid, end_vid)); 364 | // printf("pre %d %d\n", begin_vid, end_vid); 365 | threads.clear(); 366 | for (int ti=0;ti0); 378 | local_read_bytes += bytes; 379 | // CHECK: start position should be offset % edge_unit 380 | for (long pos=offset % edge_unit;pos+edge_unit<=bytes;pos+=edge_unit) { 381 | Edge & e = *(Edge*)(buffer+pos); 382 | if (e.source < begin_vid || e.source >= end_vid) { 383 | continue; 384 | } 385 | if (bitmap==nullptr || bitmap->get_bit(e.source)) { 386 | local_value += process(e); 387 | } 388 | } 389 | } 390 | write_add(&value, local_value); 391 | write_add(&read_bytes, local_read_bytes); 392 | }, ti); 393 | } 394 | offset = 0; 395 | for (int j=0;j=partitions) break; 398 | if (!should_access_shard[i]) continue; 399 | long begin_offset = column_offset[j*partitions+i]; 400 | if (begin_offset - offset >= PAGESIZE) { 401 | offset = begin_offset / PAGESIZE * PAGESIZE; 402 | } 403 | long end_offset = column_offset[j*partitions+i+1]; 404 | if (end_offset <= offset) continue; 405 | while (end_offset - offset >= IOSIZE) { 406 | tasks.push(std::make_tuple(fin, offset, IOSIZE)); 407 | offset += IOSIZE; 408 | } 409 | if (end_offset > offset) { 410 | tasks.push(std::make_tuple(fin, offset, (end_offset - offset + PAGESIZE - 1) / PAGESIZE * PAGESIZE)); 411 | offset += (end_offset - offset + PAGESIZE - 1) / PAGESIZE * PAGESIZE; 412 | } 413 | } 414 | } 415 | for (int i=0;i get_partition_range(const size_t vertices, const size_t partitions, const size_t partition_id) { 31 | const size_t split_partition = vertices % partitions; 32 | const size_t partition_size = vertices / partitions + 1; 33 | if (partition_id < split_partition) { 34 | const size_t begin = partition_id * partition_size; 35 | const size_t end = (partition_id + 1) * partition_size; 36 | return std::make_pair(begin, end); 37 | } 38 | const size_t split_point = split_partition * partition_size; 39 | const size_t begin = split_point + (partition_id - split_partition) * (partition_size - 1); 40 | const size_t end = split_point + (partition_id - split_partition + 1) * (partition_size - 1); 41 | return std::make_pair(begin, end); 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /core/queue.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef QUEUE_H 18 | #define QUEUE_H 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | template 25 | class Queue { 26 | const size_t capacity; 27 | std::queue queue; 28 | std::mutex mutex; 29 | std::condition_variable cond_full; 30 | std::condition_variable cond_empty; 31 | public: 32 | Queue(const size_t capacity) : capacity(capacity) { } 33 | void push(const T & item) { 34 | std::unique_lock lock(mutex); 35 | cond_full.wait(lock, [&]{ return !is_full(); }); 36 | queue.push(item); 37 | lock.unlock(); 38 | cond_empty.notify_one(); 39 | } 40 | T pop() { 41 | std::unique_lock lock(mutex); 42 | cond_empty.wait(lock, [&]{ return !is_empty(); }); 43 | auto item = queue.front(); 44 | queue.pop(); 45 | lock.unlock(); 46 | cond_full.notify_one(); 47 | return item; 48 | } 49 | bool is_full() { 50 | return queue.size()==capacity; 51 | } 52 | bool is_empty() { 53 | return queue.empty(); 54 | } 55 | }; 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /core/time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef TIME_H 18 | #define TIME_H 19 | 20 | #include 21 | 22 | inline double get_time() { 23 | struct timeval tv; 24 | gettimeofday(&tv, NULL); 25 | return tv.tv_sec + (tv.tv_usec / 1e6); 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /core/type.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #ifndef TYPE_H 18 | #define TYPE_H 19 | 20 | typedef int VertexId; 21 | typedef long EdgeId; 22 | typedef float Weight; 23 | 24 | struct Edge { 25 | VertexId source; 26 | VertexId target; 27 | Weight weight; 28 | }; 29 | 30 | struct MergeStatus { 31 | int id; 32 | long begin_offset; 33 | long end_offset; 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /examples/bfs.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "core/graph.hpp" 18 | 19 | int main(int argc, char ** argv) { 20 | if (argc<3) { 21 | fprintf(stderr, "usage: bfs [path] [start vertex id] [memory budget in GB]\n"); 22 | exit(-1); 23 | } 24 | std::string path = argv[1]; 25 | VertexId start_vid = atoi(argv[2]); 26 | long memory_bytes = (argc>=4)?atol(argv[3])*1024l*1024l*1024l:8l*1024l*1024l*1024l; 27 | 28 | Graph graph(path); 29 | graph.set_memory_bytes(memory_bytes); 30 | Bitmap * active_in = graph.alloc_bitmap(); 31 | Bitmap * active_out = graph.alloc_bitmap(); 32 | BigVector parent(graph.path+"/parent", graph.vertices); 33 | graph.set_vertex_data_bytes( graph.vertices * sizeof(VertexId) ); 34 | 35 | active_out->clear(); 36 | active_out->set_bit(start_vid); 37 | parent.fill(-1); 38 | parent[start_vid] = start_vid; 39 | VertexId active_vertices = 1; 40 | 41 | double start_time = get_time(); 42 | int iteration = 0; 43 | while (active_vertices!=0) { 44 | iteration++; 45 | printf("%7d: %d\n", iteration, active_vertices); 46 | std::swap(active_in, active_out); 47 | active_out->clear(); 48 | graph.hint(parent); 49 | active_vertices = graph.stream_edges([&](Edge & e){ 50 | if (parent[e.target]==-1) { 51 | if (cas(&parent[e.target], -1, e.source)) { 52 | active_out->set_bit(e.target); 53 | return 1; 54 | } 55 | } 56 | return 0; 57 | }, active_in); 58 | } 59 | double end_time = get_time(); 60 | 61 | int discovered_vertices = graph.stream_vertices([&](VertexId i){ 62 | return parent[i]!=-1; 63 | }); 64 | printf("discovered %d vertices from %d in %.2f seconds.\n", discovered_vertices, start_vid, end_time - start_time); 65 | 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /examples/mis.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "core/graph.hpp" 18 | 19 | int main(int argc, char ** argv) { 20 | if (argc<2) { 21 | fprintf(stderr, "usage: mis [path] [memory budget in GB]\n"); 22 | exit(-1); 23 | } 24 | std::string path = argv[1]; 25 | long memory_bytes = ((argc>=3)?atol(argv[2]):8l) * (1024l*1024l*1024l); 26 | 27 | Graph graph(path); 28 | graph.set_memory_bytes(memory_bytes); 29 | Bitmap * active_in = graph.alloc_bitmap(); 30 | Bitmap * active_out = graph.alloc_bitmap(); 31 | BigVector in_mis(graph.path+"/in_mis", graph.vertices); 32 | graph.set_vertex_data_bytes( graph.vertices * sizeof(bool) ); 33 | 34 | active_out->fill(); 35 | VertexId active_vertices = graph.stream_vertices([&](VertexId i){ 36 | in_mis[i] = true; 37 | return 1; 38 | }); 39 | 40 | double start_time = get_time(); 41 | int iteration = 0; 42 | while (true) { 43 | iteration++; 44 | printf("%7d: %d\n", iteration, active_vertices); 45 | std::swap(active_in, active_out); 46 | graph.stream_edges([&](Edge & e) { 47 | if (e.sourceclear(); 53 | VertexId next_active_vertices = graph.stream_vertices([&](VertexId i){ 54 | if (in_mis[i]) { 55 | active_out->set_bit(i); 56 | return 1; 57 | } else { 58 | in_mis[i] = true; 59 | return 0; 60 | } 61 | }); 62 | if (active_vertices==next_active_vertices) break; 63 | active_vertices = next_active_vertices; 64 | } 65 | double end_time = get_time(); 66 | printf("in_mis: %d\n", active_vertices); 67 | printf("time: %.2f seconds\n", end_time - start_time); 68 | 69 | return 0; 70 | } 71 | 72 | -------------------------------------------------------------------------------- /examples/pagerank.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "core/graph.hpp" 18 | 19 | int main(int argc, char ** argv) { 20 | if (argc<3) { 21 | fprintf(stderr, "usage: pagerank [path] [iterations] [memory budget in GB]\n"); 22 | exit(-1); 23 | } 24 | std::string path = argv[1]; 25 | int iterations = atoi(argv[2]); 26 | long memory_bytes = (argc>=4)?atol(argv[3])*1024l*1024l*1024l:8l*1024l*1024l*1024l; 27 | 28 | Graph graph(path); 29 | graph.set_memory_bytes(memory_bytes); 30 | BigVector degree(graph.path+"/degree", graph.vertices); 31 | BigVector pagerank(graph.path+"/pagerank", graph.vertices); 32 | BigVector sum(graph.path+"/sum", graph.vertices); 33 | 34 | long vertex_data_bytes = (long)graph.vertices * ( sizeof(VertexId) + sizeof(float) + sizeof(float) ); 35 | graph.set_vertex_data_bytes(vertex_data_bytes); 36 | 37 | double begin_time = get_time(); 38 | 39 | degree.fill(0); 40 | graph.stream_edges( 41 | [&](Edge & e){ 42 | write_add(°ree[e.source], 1); 43 | return 0; 44 | }, nullptr, 0, 0 45 | ); 46 | printf("degree calculation used %.2f seconds\n", get_time() - begin_time); 47 | fflush(stdout); 48 | 49 | graph.hint(pagerank, sum); 50 | graph.stream_vertices( 51 | [&](VertexId i){ 52 | pagerank[i] = 1.f / degree[i]; 53 | sum[i] = 0; 54 | return 0; 55 | }, nullptr, 0, 56 | [&](std::pair vid_range){ 57 | pagerank.load(vid_range.first, vid_range.second); 58 | sum.load(vid_range.first, vid_range.second); 59 | }, 60 | [&](std::pair vid_range){ 61 | pagerank.save(); 62 | sum.save(); 63 | } 64 | ); 65 | 66 | for (int iter=0;iter( 69 | [&](Edge & e){ 70 | write_add(&sum[e.target], pagerank[e.source]); 71 | return 0; 72 | }, nullptr, 0, 1, 73 | [&](std::pair source_vid_range){ 74 | pagerank.lock(source_vid_range.first, source_vid_range.second); 75 | }, 76 | [&](std::pair source_vid_range){ 77 | pagerank.unlock(source_vid_range.first, source_vid_range.second); 78 | } 79 | ); 80 | graph.hint(pagerank, sum); 81 | if (iter==iterations-1) { 82 | graph.stream_vertices( 83 | [&](VertexId i){ 84 | pagerank[i] = 0.15f + 0.85f * sum[i]; 85 | return 0; 86 | }, nullptr, 0, 87 | [&](std::pair vid_range){ 88 | pagerank.load(vid_range.first, vid_range.second); 89 | }, 90 | [&](std::pair vid_range){ 91 | pagerank.save(); 92 | } 93 | ); 94 | } else { 95 | graph.stream_vertices( 96 | [&](VertexId i){ 97 | pagerank[i] = (0.15f + 0.85f * sum[i]) / degree[i]; 98 | sum[i] = 0; 99 | return 0; 100 | }, nullptr, 0, 101 | [&](std::pair vid_range){ 102 | pagerank.load(vid_range.first, vid_range.second); 103 | sum.load(vid_range.first, vid_range.second); 104 | }, 105 | [&](std::pair vid_range){ 106 | pagerank.save(); 107 | sum.save(); 108 | } 109 | ); 110 | } 111 | } 112 | 113 | double end_time = get_time(); 114 | printf("%d iterations of pagerank took %.2f seconds\n", iterations, end_time - begin_time); 115 | 116 | } 117 | -------------------------------------------------------------------------------- /examples/radii.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "core/graph.hpp" 18 | 19 | #define K 64 20 | 21 | int main(int argc, char ** argv) { 22 | if (argc<2) { 23 | fprintf(stderr, "usage: mis [path] [memory budget in GB]\n"); 24 | exit(-1); 25 | } 26 | std::string path = argv[1]; 27 | long memory_bytes = ((argc>=3)?atol(argv[2]):8l) * (1024l*1024l*1024l); 28 | 29 | Graph graph(path); 30 | graph.set_memory_bytes(memory_bytes); 31 | Bitmap * active_in = graph.alloc_bitmap(); 32 | Bitmap * active_out = graph.alloc_bitmap(); 33 | BigVector visited(graph.path+"/visited", graph.vertices); 34 | BigVector radii(graph.path+"/radii", graph.vertices); 35 | graph.set_vertex_data_bytes( graph.vertices * ( sizeof(VertexId) + sizeof(long) * 2 ) ); 36 | 37 | srand(time(NULL)); 38 | 39 | double start_time = get_time(); 40 | int iteration; 41 | VertexId active_vertices; 42 | VertexId max_radii; 43 | 44 | active_out->clear(); 45 | graph.stream_vertices([&](VertexId i){ 46 | visited[i][0] = 0ul; 47 | visited[i][1] = 0ul; 48 | radii[i] = -1; 49 | return 0; 50 | }); 51 | for (int k=0;kset_bit(vid); 56 | } 57 | iteration = 0; 58 | active_vertices = K; 59 | while (active_vertices > 0) { 60 | iteration++; 61 | printf("%7d: %d\n", iteration, active_vertices); 62 | int now = iteration % 2; 63 | int next = 1 - now; 64 | std::swap(active_in, active_out); 65 | active_out->clear(); 66 | active_vertices = graph.stream_edges([&](Edge & e) { 67 | if (visited[e.target][now] != visited[e.source][now]) { 68 | __sync_fetch_and_or( &visited[e.target][next], visited[e.source][now] ); 69 | VertexId old_radii = radii[e.target]; 70 | if (radii[e.target]!=iteration) { 71 | if (cas(&radii[e.target], old_radii, iteration)) { 72 | active_out->set_bit(e.target); 73 | return 1; 74 | } 75 | } 76 | } 77 | return 0; 78 | }, active_in); 79 | active_vertices = graph.stream_vertices([&](VertexId i){ 80 | visited[i][now] = visited[i][next]; 81 | return 1; 82 | }, active_out); // necessary? 83 | } 84 | max_radii = 0; 85 | for (VertexId i=0;i candidates; 91 | VertexId threshold = 0; 92 | while (candidates.size()clear(); 101 | graph.stream_vertices([&](VertexId i){ 102 | visited[i][0] = 0ul; 103 | visited[i][1] = 0ul; 104 | radii[i] = -1; 105 | return 0; 106 | }); 107 | for (int k=0;kset_bit(vid); 112 | } 113 | iteration = 0; 114 | active_vertices = K; 115 | while (active_vertices > 0) { 116 | iteration++; 117 | printf("%7d: %d\n", iteration, active_vertices); 118 | int now = iteration % 2; 119 | int next = 1 - now; 120 | std::swap(active_in, active_out); 121 | active_out->clear(); 122 | active_vertices = graph.stream_edges([&](Edge & e) { 123 | if (visited[e.target][now] != visited[e.source][now]) { 124 | __sync_fetch_and_or( &visited[e.target][next], visited[e.source][now] ); 125 | VertexId old_radii = radii[e.target]; 126 | if (radii[e.target]!=iteration) { 127 | if (cas(&radii[e.target], old_radii, iteration)) { 128 | active_out->set_bit(e.target); 129 | return 1; 130 | } 131 | } 132 | } 133 | return 0; 134 | }, active_in); 135 | active_vertices = graph.stream_vertices([&](VertexId i){ 136 | visited[i][now] = visited[i][next]; 137 | return 1; 138 | }, active_out); // necessary? 139 | } 140 | max_radii = 0; 141 | for (VertexId i=0;i=4)?atol(argv[3]):8l)*1024l*1024l*1024l; 26 | 27 | Graph graph(path); 28 | assert(graph.edge_type==1); 29 | graph.set_memory_bytes(memory_bytes); 30 | BigVector input(graph.path+"/input", graph.vertices); 31 | BigVector output(graph.path+"/output", graph.vertices); 32 | graph.set_vertex_data_bytes( (long) graph.vertices * ( sizeof(float) * 2 ) ); 33 | 34 | double begin_time = get_time(); 35 | graph.hint(input, output); 36 | graph.stream_vertices( 37 | [&](VertexId i){ 38 | input[i] = i; 39 | output[i] = 0; 40 | return 0; 41 | }, nullptr, 0, 42 | [&](std::pair vid_range){ 43 | input.load(vid_range.first, vid_range.second); 44 | output.load(vid_range.first, vid_range.second); 45 | }, 46 | [&](std::pair vid_range){ 47 | input.save(); 48 | output.save(); 49 | } 50 | ); 51 | graph.hint(input); 52 | graph.stream_edges( 53 | [&](Edge & e){ 54 | write_add(&output[e.target], input[e.source] * e.weight); 55 | return 0; 56 | }, nullptr, 0, 1, 57 | [&](std::pair source_vid_range){ 58 | input.lock(source_vid_range.first, source_vid_range.second); 59 | }, 60 | [&](std::pair source_vid_range){ 61 | input.unlock(source_vid_range.first, source_vid_range.second); 62 | } 63 | ); 64 | double end_time = get_time(); 65 | 66 | printf("spmv took %.2f seconds\n", end_time - begin_time); 67 | } 68 | -------------------------------------------------------------------------------- /examples/wcc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include "core/graph.hpp" 18 | 19 | int main(int argc, char ** argv) { 20 | if (argc<2) { 21 | fprintf(stderr, "usage: wcc [path] [memory budget in GB]\n"); 22 | exit(-1); 23 | } 24 | std::string path = argv[1]; 25 | long memory_bytes = (argc>=3)?atol(argv[2])*1024l*1024l*1024l:8l*1024l*1024l*1024l; 26 | 27 | Graph graph(path); 28 | graph.set_memory_bytes(memory_bytes); 29 | Bitmap * active_in = graph.alloc_bitmap(); 30 | Bitmap * active_out = graph.alloc_bitmap(); 31 | BigVector label(graph.path+"/label", graph.vertices); 32 | graph.set_vertex_data_bytes( graph.vertices * sizeof(VertexId) ); 33 | 34 | active_out->fill(); 35 | VertexId active_vertices = graph.stream_vertices([&](VertexId i){ 36 | label[i] = i; 37 | return 1; 38 | }); 39 | 40 | double start_time = get_time(); 41 | int iteration = 0; 42 | while (active_vertices!=0) { 43 | iteration++; 44 | printf("%7d: %d\n", iteration, active_vertices); 45 | std::swap(active_in, active_out); 46 | active_out->clear(); 47 | graph.hint(label); 48 | active_vertices = graph.stream_edges([&](Edge & e){ 49 | if (label[e.source]set_bit(e.target); 52 | return 1; 53 | } 54 | } 55 | return 0; 56 | }, active_in); 57 | } 58 | double end_time = get_time(); 59 | 60 | BigVector label_stat(graph.path+"/label_stat", graph.vertices); 61 | label_stat.fill(0); 62 | graph.stream_vertices([&](VertexId i){ 63 | write_add(&label_stat[label[i]], 1); 64 | return 1; 65 | }); 66 | VertexId components = graph.stream_vertices([&](VertexId i){ 67 | return label_stat[i]!=0; 68 | }); 69 | printf("%d components found in %.2f seconds\n", components, end_time - start_time); 70 | 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /tools/clear_cache.sh: -------------------------------------------------------------------------------- 1 | sync; echo 3 > /proc/sys/vm/drop_caches 2 | -------------------------------------------------------------------------------- /tools/preprocess.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014-2015 Xiaowei Zhu, Tsinghua University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include "core/constants.hpp" 31 | #include "core/type.hpp" 32 | #include "core/filesystem.hpp" 33 | #include "core/queue.hpp" 34 | #include "core/partition.hpp" 35 | #include "core/time.hpp" 36 | #include "core/atomic.hpp" 37 | 38 | long PAGESIZE = 4096; 39 | 40 | void generate_edge_grid(std::string input, std::string output, VertexId vertices, int partitions, int edge_type) { 41 | int parallelism = std::thread::hardware_concurrency(); 42 | int edge_unit; 43 | EdgeId edges; 44 | switch (edge_type) { 45 | case 0: 46 | edge_unit = sizeof(VertexId) * 2; 47 | edges = file_size(input) / edge_unit; 48 | break; 49 | case 1: 50 | edge_unit = sizeof(VertexId) * 2 + sizeof(Weight); 51 | edges = file_size(input) / edge_unit; 52 | break; 53 | default: 54 | fprintf(stderr, "edge type (%d) is not supported.\n", edge_type); 55 | exit(-1); 56 | } 57 | printf("vertices = %d, edges = %ld\n", vertices, edges); 58 | 59 | char ** buffers = new char * [parallelism*2]; 60 | bool * occupied = new bool [parallelism*2]; 61 | for (int i=0;i > tasks(parallelism); 66 | int ** fout; 67 | std::mutex ** mutexes; 68 | fout = new int * [partitions]; 69 | mutexes = new std::mutex * [partitions]; 70 | if (file_exists(output)) { 71 | remove_directory(output); 72 | } 73 | create_directory(output); 74 | 75 | const int grid_buffer_size = 768; // 12 * 8 * 8 76 | char * global_grid_buffer = (char *) memalign(PAGESIZE, grid_buffer_size * partitions * partitions); 77 | char *** grid_buffer = new char ** [partitions]; 78 | int ** grid_buffer_offset = new int * [partitions]; 79 | for (int i=0;i threads; 94 | for (int ti=0;ti lock(mutexes[i][j]); 141 | if (local_grid_offset[ij] - start > edge_unit) { 142 | write(fout[i][j], local_buffer+start, local_grid_offset[ij]-start); 143 | } else if (local_grid_offset[ij] - start == edge_unit) { 144 | memcpy(grid_buffer[i][j]+grid_buffer_offset[i][j], local_buffer+start, edge_unit); 145 | grid_buffer_offset[i][j] += edge_unit; 146 | if (grid_buffer_offset[i][j]==grid_buffer_size) { 147 | write(fout[i][j], grid_buffer[i][j], grid_buffer_size); 148 | grid_buffer_offset[i][j] = 0; 149 | } 150 | } 151 | start = local_grid_offset[ij]; 152 | } 153 | occupied[cursor] = false; 154 | } 155 | }); 156 | } 157 | 158 | int fin = open(input.c_str(), O_RDONLY); 159 | if (fin==-1) printf("%s\n", strerror(errno)); 160 | assert(fin!=-1); 161 | int cursor = 0; 162 | long total_bytes = file_size(input); 163 | long read_bytes = 0; 164 | double start_time = get_time(); 165 | while (true) { 166 | long bytes = read(fin, buffers[cursor], IOSIZE); 167 | assert(bytes!=-1); 168 | if (bytes==0) break; 169 | occupied[cursor] = true; 170 | tasks.push(std::make_tuple(cursor, bytes)); 171 | read_bytes += bytes; 172 | printf("progress: %.2f%%\r", 100. * read_bytes / total_bytes); 173 | fflush(stdout); 174 | while (occupied[cursor]) { 175 | cursor = (cursor + 1) % (parallelism * 2); 176 | } 177 | } 178 | close(fin); 179 | assert(read_bytes==edges*edge_unit); 180 | 181 | for (int ti=0;ti ", get_time() - start_time); 190 | long ts = 0; 191 | for (int i=0;i0) { 194 | ts += grid_buffer_offset[i][j]; 195 | write(fout[i][j], grid_buffer[i][j], grid_buffer_offset[i][j]); 196 | } 197 | } 198 | } 199 | printf("%lf (%ld)\n", get_time() - start_time, ts); 200 | 201 | for (int i=0;i