├── 00_baseline ├── nodeset.go └── search.go ├── 01_reuse_level ├── nodeset.go └── search.go ├── 02_sort ├── nodeset.go └── search.go ├── 03_inline_sort ├── nodeset.go ├── search.go └── sort.go ├── 04_radix_sort ├── nodeset.go └── search.go ├── 05_lift_level ├── nodeset.go └── search.go ├── 06_ordering ├── nodeset.go └── search.go ├── 07_fused ├── nodeset.go └── search.go ├── 07_fused_if ├── nodeset.go └── search.go ├── 08_cuckoo ├── cuckoof.go ├── nodeset.go └── search.go ├── 09_unroll_4 ├── nodeset.go └── search.go ├── 09_unroll_8 ├── nodeset.go └── search.go ├── 09_unroll_8_4 ├── nodeset.go └── search.go ├── 10_parallel ├── nodeset.go └── search.go ├── 10_parchan ├── nodeset.go └── search.go ├── 11_frontier ├── nodeset.go └── search.go ├── 12_almost ├── nodeset.go └── search.go ├── 13_marking ├── nodeset.go └── search.go ├── 14_early_2 ├── nodeset.go └── search.go ├── 14_early_3 ├── nodeset.go └── search.go ├── 14_early_4 ├── nodeset.go └── search.go ├── 14_early_r ├── nodeset.go └── search.go ├── 15_worker ├── nodeset.go └── search.go ├── 16_busy ├── nodeset.go └── search.go ├── README.md ├── data ├── .gitignore └── sg-10k-250k.txt ├── go.mod ├── go.sum ├── graph ├── graph.go ├── loaddat.go └── loadtext.go ├── main.go ├── plot ├── .gitignore ├── convert.sh └── plot.go └── results ├── Linux-Xeon-E5-2670v3-B.txt ├── Linux-Xeon-E5-2670v3.txt ├── Mac-i5-5257U-B.txt ├── Mac-i5-5257U.txt ├── Win-i7-2820QM-B.txt └── Win-i7-2820QM.txt /00_baseline/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /00_baseline/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | ) 6 | 7 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 8 | if len(level) != g.Order() { 9 | panic("invalid level length") 10 | } 11 | 12 | visited := NewNodeSet(g.Order()) 13 | 14 | currentLevel := make([]graph.Node, 0, g.Order()) 15 | nextLevel := make([]graph.Node, 0, g.Order()) 16 | 17 | level[source] = 1 18 | visited.Add(source) 19 | currentLevel = append(currentLevel, source) 20 | 21 | levelNumber := 2 22 | 23 | for len(currentLevel) > 0 { 24 | for _, node := range currentLevel { 25 | for _, neighbor := range g.Neighbors(node) { 26 | if !visited.Contains(neighbor) { 27 | nextLevel = append(nextLevel, neighbor) 28 | level[neighbor] = levelNumber 29 | visited.Add(neighbor) 30 | } 31 | } 32 | } 33 | 34 | levelNumber++ 35 | currentLevel = currentLevel[:0:cap(currentLevel)] 36 | currentLevel, nextLevel = nextLevel, currentLevel 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /01_reuse_level/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /01_reuse_level/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | ) 6 | 7 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 8 | if len(level) != g.Order() { 9 | panic("invalid level length") 10 | } 11 | 12 | currentLevel := make([]graph.Node, 0, g.Order()) 13 | nextLevel := make([]graph.Node, 0, g.Order()) 14 | 15 | level[source] = 1 16 | currentLevel = append(currentLevel, source) 17 | 18 | levelNumber := 2 19 | 20 | for len(currentLevel) > 0 { 21 | for _, node := range currentLevel { 22 | for _, neighbor := range g.Neighbors(node) { 23 | if level[neighbor] == 0 { 24 | nextLevel = append(nextLevel, neighbor) 25 | level[neighbor] = levelNumber 26 | } 27 | } 28 | } 29 | 30 | levelNumber++ 31 | currentLevel = currentLevel[:0:cap(currentLevel)] 32 | currentLevel, nextLevel = nextLevel, currentLevel 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /02_sort/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /02_sort/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sort" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 10 | if len(level) != g.Order() { 11 | panic("invalid level length") 12 | } 13 | 14 | visited := NewNodeSet(g.Order()) 15 | 16 | currentLevel := make([]graph.Node, 0, g.Order()) 17 | nextLevel := make([]graph.Node, 0, g.Order()) 18 | 19 | level[source] = 1 20 | visited.Add(source) 21 | currentLevel = append(currentLevel, source) 22 | 23 | levelNumber := 2 24 | 25 | for len(currentLevel) > 0 { 26 | for _, node := range currentLevel { 27 | for _, neighbor := range g.Neighbors(node) { 28 | if !visited.Contains(neighbor) { 29 | nextLevel = append(nextLevel, neighbor) 30 | level[neighbor] = levelNumber 31 | visited.Add(neighbor) 32 | } 33 | } 34 | } 35 | 36 | sort.Slice(nextLevel, func(i, k int) bool { 37 | return nextLevel[i] < nextLevel[k] 38 | }) 39 | 40 | levelNumber++ 41 | currentLevel = currentLevel[:0:cap(currentLevel)] 42 | currentLevel, nextLevel = nextLevel, currentLevel 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /03_inline_sort/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /03_inline_sort/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | ) 6 | 7 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 8 | if len(level) != g.Order() { 9 | panic("invalid level length") 10 | } 11 | 12 | visited := NewNodeSet(g.Order()) 13 | 14 | currentLevel := make([]graph.Node, 0, g.Order()) 15 | nextLevel := make([]graph.Node, 0, g.Order()) 16 | 17 | level[source] = 1 18 | visited.Add(source) 19 | currentLevel = append(currentLevel, source) 20 | 21 | levelNumber := 2 22 | 23 | for len(currentLevel) > 0 { 24 | for _, node := range currentLevel { 25 | for _, neighbor := range g.Neighbors(node) { 26 | if !visited.Contains(neighbor) { 27 | nextLevel = append(nextLevel, neighbor) 28 | level[neighbor] = levelNumber 29 | visited.Add(neighbor) 30 | } 31 | } 32 | } 33 | 34 | Sort(nextLevel) 35 | 36 | levelNumber++ 37 | currentLevel = currentLevel[:0:cap(currentLevel)] 38 | currentLevel, nextLevel = nextLevel, currentLevel 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /03_inline_sort/sort.go: -------------------------------------------------------------------------------- 1 | // Copyright 2016 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package search 6 | 7 | import "github.com/egonelbre/a-tale-of-bfs/graph" 8 | 9 | type lessSwap []graph.Node 10 | 11 | func (nodes lessSwap) Less(i, k int) bool { return nodes[i] < nodes[k] } 12 | func (nodes lessSwap) Swap(i, k int) { nodes[i], nodes[k] = nodes[k], nodes[i] } 13 | func (nodes lessSwap) Len() int { return len(nodes) } 14 | 15 | func Sort(nodes []graph.Node) { 16 | n := len(nodes) 17 | quickSort(lessSwap(nodes), 0, n, maxDepth(n)) 18 | } 19 | 20 | // maxDepth returns a threshold at which quicksort should switch 21 | // to heapsort. It returns 2*ceil(lg(n+1)). 22 | func maxDepth(n int) int { 23 | var depth int 24 | for i := n; i > 0; i >>= 1 { 25 | depth++ 26 | } 27 | return depth * 2 28 | } 29 | 30 | // Auto-generated variant of sort.go:insertionSort 31 | func insertionSort(data lessSwap, a, b int) { 32 | for i := a + 1; i < b; i++ { 33 | for j := i; j > a && data.Less(j, j-1); j-- { 34 | data.Swap(j, j-1) 35 | } 36 | } 37 | } 38 | 39 | // Auto-generated variant of sort.go:siftDown 40 | func siftDown(data lessSwap, lo, hi, first int) { 41 | root := lo 42 | for { 43 | child := 2*root + 1 44 | if child >= hi { 45 | break 46 | } 47 | if child+1 < hi && data.Less(first+child, first+child+1) { 48 | child++ 49 | } 50 | if !data.Less(first+root, first+child) { 51 | return 52 | } 53 | data.Swap(first+root, first+child) 54 | root = child 55 | } 56 | } 57 | 58 | // Auto-generated variant of sort.go:heapSort 59 | func heapSort(data lessSwap, a, b int) { 60 | first := a 61 | lo := 0 62 | hi := b - a 63 | for i := (hi - 1) / 2; i >= 0; i-- { 64 | siftDown(data, i, hi, first) 65 | } 66 | for i := hi - 1; i >= 0; i-- { 67 | data.Swap(first, first+i) 68 | siftDown(data, lo, i, first) 69 | } 70 | } 71 | 72 | // Auto-generated variant of sort.go:medianOfThree 73 | func medianOfThree(data lessSwap, m1, m0, m2 int) { 74 | if data.Less(m1, m0) { 75 | data.Swap(m1, m0) 76 | } 77 | if data.Less(m2, m1) { 78 | data.Swap(m2, m1) 79 | if data.Less(m1, m0) { 80 | data.Swap(m1, m0) 81 | } 82 | } 83 | } 84 | 85 | // Auto-generated variant of sort.go:swapRange 86 | func swapRange(data lessSwap, a, b, n int) { 87 | for i := 0; i < n; i++ { 88 | data.Swap(a+i, b+i) 89 | } 90 | } 91 | 92 | // Auto-generated variant of sort.go:doPivot 93 | func doPivot(data lessSwap, lo, hi int) (midlo, midhi int) { 94 | m := int(uint(lo+hi) >> 1) 95 | if hi-lo > 40 { 96 | s := (hi - lo) / 8 97 | medianOfThree(data, lo, lo+s, lo+2*s) 98 | medianOfThree(data, m, m-s, m+s) 99 | medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) 100 | } 101 | medianOfThree(data, lo, m, hi-1) 102 | pivot := lo 103 | a, c := lo+1, hi-1 104 | for ; a < c && data.Less(a, pivot); a++ { 105 | } 106 | b := a 107 | for { 108 | for ; b < c && !data.Less(pivot, b); b++ { 109 | } 110 | for ; b < c && data.Less(pivot, c-1); c-- { 111 | } 112 | if b >= c { 113 | break 114 | } 115 | data.Swap(b, c-1) 116 | b++ 117 | c-- 118 | } 119 | protect := hi-c < 5 120 | if !protect && hi-c < (hi-lo)/4 { 121 | dups := 0 122 | if !data.Less(pivot, hi-1) { 123 | data.Swap(c, hi-1) 124 | c++ 125 | dups++ 126 | } 127 | if !data.Less(b-1, pivot) { 128 | b-- 129 | dups++ 130 | } 131 | if !data.Less(m, pivot) { 132 | data.Swap(m, b-1) 133 | b-- 134 | dups++ 135 | } 136 | protect = dups > 1 137 | } 138 | if protect { 139 | for { 140 | for ; a < b && !data.Less(b-1, pivot); b-- { 141 | } 142 | for ; a < b && data.Less(a, pivot); a++ { 143 | } 144 | if a >= b { 145 | break 146 | } 147 | data.Swap(a, b-1) 148 | a++ 149 | b-- 150 | } 151 | } 152 | data.Swap(pivot, b-1) 153 | return b - 1, c 154 | } 155 | 156 | // Auto-generated variant of sort.go:quickSort 157 | func quickSort(data lessSwap, a, b, maxDepth int) { 158 | for b-a > 12 { 159 | if maxDepth == 0 { 160 | heapSort(data, a, b) 161 | return 162 | } 163 | maxDepth-- 164 | mlo, mhi := doPivot(data, a, b) 165 | if mlo-a < b-mhi { 166 | quickSort(data, a, mlo, maxDepth) 167 | a = mhi 168 | } else { 169 | quickSort(data, mhi, b, maxDepth) 170 | b = mlo 171 | } 172 | } 173 | if b-a > 1 { 174 | for i := a + 6; i < b; i++ { 175 | if data.Less(i, i-6) { 176 | data.Swap(i, i-6) 177 | } 178 | } 179 | insertionSort(data, a, b) 180 | } 181 | } 182 | 183 | // Auto-generated variant of sort.go:symMerge 184 | func symMerge(data lessSwap, a, m, b int) { 185 | if m-a == 1 { 186 | i := m 187 | j := b 188 | for i < j { 189 | h := int(uint(i+j) >> 1) 190 | if data.Less(h, a) { 191 | i = h + 1 192 | } else { 193 | j = h 194 | } 195 | } 196 | for k := a; k < i-1; k++ { 197 | data.Swap(k, k+1) 198 | } 199 | return 200 | } 201 | if b-m == 1 { 202 | i := a 203 | j := m 204 | for i < j { 205 | h := int(uint(i+j) >> 1) 206 | if !data.Less(m, h) { 207 | i = h + 1 208 | } else { 209 | j = h 210 | } 211 | } 212 | for k := m; k > i; k-- { 213 | data.Swap(k, k-1) 214 | } 215 | return 216 | } 217 | mid := int(uint(a+b) >> 1) 218 | n := mid + m 219 | var start, r int 220 | if m > mid { 221 | start = n - b 222 | r = mid 223 | } else { 224 | start = a 225 | r = m 226 | } 227 | p := n - 1 228 | for start < r { 229 | c := int(uint(start+r) >> 1) 230 | if !data.Less(p-c, c) { 231 | start = c + 1 232 | } else { 233 | r = c 234 | } 235 | } 236 | end := n - start 237 | if start < m && m < end { 238 | rotate(data, start, m, end) 239 | } 240 | if a < start && start < mid { 241 | symMerge(data, a, start, mid) 242 | } 243 | if mid < end && end < b { 244 | symMerge(data, mid, end, b) 245 | } 246 | } 247 | 248 | // Auto-generated variant of sort.go:rotate 249 | func rotate(data lessSwap, a, m, b int) { 250 | i := m - a 251 | j := b - m 252 | for i != j { 253 | if i > j { 254 | swapRange(data, m-i, m, j) 255 | i -= j 256 | } else { 257 | swapRange(data, m-i, m+j-i, i) 258 | j -= i 259 | } 260 | } 261 | swapRange(data, m-i, m, i) 262 | } 263 | -------------------------------------------------------------------------------- /04_radix_sort/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /04_radix_sort/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | for _, neighbor := range g.Neighbors(node) { 27 | if !visited.Contains(neighbor) { 28 | nextLevel = append(nextLevel, neighbor) 29 | level[neighbor] = levelNumber 30 | visited.Add(neighbor) 31 | } 32 | } 33 | } 34 | 35 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 36 | 37 | levelNumber++ 38 | currentLevel = currentLevel[:0:cap(currentLevel)] 39 | currentLevel, nextLevel = nextLevel, currentLevel 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /05_lift_level/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /05_lift_level/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | for _, neighbor := range g.Neighbors(node) { 27 | if !visited.Contains(neighbor) { 28 | nextLevel = append(nextLevel, neighbor) 29 | visited.Add(neighbor) 30 | } 31 | } 32 | } 33 | 34 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 35 | 36 | for _, neighbor := range nextLevel { 37 | level[neighbor] = levelNumber 38 | } 39 | 40 | levelNumber++ 41 | currentLevel = currentLevel[:0:cap(currentLevel)] 42 | currentLevel, nextLevel = nextLevel, currentLevel 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /06_ordering/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /06_ordering/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | for _, neighbor := range g.Neighbors(node) { 27 | if !visited.Contains(neighbor) { 28 | visited.Add(neighbor) 29 | nextLevel = append(nextLevel, neighbor) 30 | } 31 | } 32 | } 33 | 34 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 35 | 36 | for _, neighbor := range nextLevel { 37 | level[neighbor] = levelNumber 38 | } 39 | 40 | levelNumber++ 41 | currentLevel = currentLevel[:0:cap(currentLevel)] 42 | currentLevel, nextLevel = nextLevel, currentLevel 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /07_fused/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | 33 | func (set NodeSet) TryAdd(node graph.Node) bool { 34 | bucket, bit := set.Offset(node) 35 | empty := set[bucket]&bit == 0 36 | set[bucket] |= bit 37 | return empty 38 | } 39 | -------------------------------------------------------------------------------- /07_fused/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | for _, neighbor := range g.Neighbors(node) { 27 | if visited.TryAdd(neighbor) { 28 | nextLevel = append(nextLevel, neighbor) 29 | } 30 | } 31 | } 32 | 33 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 34 | 35 | for _, neighbor := range nextLevel { 36 | level[neighbor] = levelNumber 37 | } 38 | 39 | levelNumber++ 40 | currentLevel = currentLevel[:0:cap(currentLevel)] 41 | currentLevel, nextLevel = nextLevel, currentLevel 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /07_fused_if/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | 33 | func (set NodeSet) TryAdd(node graph.Node) bool { 34 | bucket, bit := set.Offset(node) 35 | empty := set[bucket]&bit == 0 36 | if empty { 37 | set[bucket] |= bit 38 | } 39 | return empty 40 | } 41 | -------------------------------------------------------------------------------- /07_fused_if/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | for _, neighbor := range g.Neighbors(node) { 27 | if visited.TryAdd(neighbor) { 28 | nextLevel = append(nextLevel, neighbor) 29 | } 30 | } 31 | } 32 | 33 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 34 | 35 | for _, neighbor := range nextLevel { 36 | level[neighbor] = levelNumber 37 | } 38 | 39 | levelNumber++ 40 | currentLevel = currentLevel[:0:cap(currentLevel)] 41 | currentLevel, nextLevel = nextLevel, currentLevel 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /08_cuckoo/cuckoof.go: -------------------------------------------------------------------------------- 1 | // based on https://github.com/dgryski/go-cuckoof 2 | package search 3 | 4 | import ( 5 | "math/rand" 6 | ) 7 | 8 | // CF is a cuckoo filter 9 | type CF struct { 10 | // TODO(dgryski): add larger fingerprints 11 | t [][4]byte 12 | occupied []byte 13 | rnd uint64 14 | } 15 | 16 | // New returns a new cuckoo filter with size hash table entries. Size must be a power of two 17 | func NewCuckoof(size int) *CF { 18 | 19 | if size&(size-1) != 0 { 20 | panic("cuckoof: size must be a power of two") 21 | } 22 | 23 | // TODO(dgryski): size is a power of two, all `%len(t)` should become bitmasks instead 24 | 25 | return &CF{ 26 | t: make([][4]byte, size), 27 | occupied: make([]byte, size/2), 28 | rnd: uint64(rand.Int63()), 29 | } 30 | } 31 | 32 | // Insert adds an element to the filter and returns if the insertion was successful. 33 | func (cf *CF) Insert(x uint32) bool { 34 | h := x 35 | 36 | i1 := uint32(h) % uint32(len(cf.t)) 37 | 38 | f := byte(h >> 32) 39 | 40 | i2 := (i1 ^ hashfp(f)) % uint32(len(cf.t)) 41 | 42 | if idx, ok := cf.hasSpace(i1); ok { 43 | cf.setOccupied(i1, idx, f) 44 | return true 45 | } 46 | 47 | if idx, ok := cf.hasSpace(i2); ok { 48 | cf.setOccupied(i2, idx, f) 49 | return true 50 | } 51 | 52 | i := i1 53 | cf.rnd = rnd(cf.rnd) 54 | if cf.rnd&1 == 1 { 55 | i = i2 56 | } 57 | 58 | for n := 0; n < 500; n++ { 59 | f = cf.evict(i, f) 60 | i = (i ^ hashfp(f)) % uint32(len(cf.t)) 61 | if idx, ok := cf.hasSpace(i); ok { 62 | cf.setOccupied(i, idx, f) 63 | return true 64 | } 65 | } 66 | 67 | return false 68 | } 69 | 70 | // Lookup queries the cuckoo filter for an item 71 | func (cf *CF) Lookup(x uint32) bool { 72 | h := x 73 | 74 | i1 := uint32(h) % uint32(len(cf.t)) 75 | 76 | f := byte(h >> 32) 77 | 78 | if cf.hasFP(i1, f) { 79 | return true 80 | } 81 | 82 | i2 := (i1 ^ hashfp(f)) % uint32(len(cf.t)) 83 | 84 | return cf.hasFP(i2, f) 85 | } 86 | 87 | // evict sets f in row and returns the evicted element 88 | func (cf *CF) evict(row uint32, f byte) byte { 89 | cf.rnd = rnd(cf.rnd) 90 | 91 | // random bucket 92 | bucket := cf.rnd & 3 93 | e := cf.t[row][bucket] 94 | cf.t[row][bucket] = f 95 | 96 | return e 97 | } 98 | 99 | // hasFP searches the row for the given fingerprint 100 | func (cf *CF) hasFP(row uint32, f byte) bool { 101 | b := cf.occupied[row/2] 102 | t := row & 1 103 | b = (b >> (uint(t) * 4)) & 0xF 104 | 105 | return false || 106 | b&0x01 == 0x01 && cf.t[row][0] == f || 107 | b&0x02 == 0x02 && cf.t[row][1] == f || 108 | b&0x04 == 0x04 && cf.t[row][2] == f || 109 | b&0x08 == 0x08 && cf.t[row][3] == f 110 | } 111 | 112 | // setOccupied puts the fingerprint at the given row/index and marks the slot as occupied 113 | func (cf *CF) setOccupied(row uint32, idx byte, f byte) { 114 | t := row & 1 115 | cf.t[row][idx] = f 116 | cf.occupied[row/2] |= (1 << idx) << (uint(t) * 4) 117 | } 118 | 119 | // freebits indicates the offset of the first 0 bit in the nybble 120 | var freebits = [16]byte{ 121 | 0, // 0000 122 | 1, // 0001 123 | 0, // 0010 124 | 2, // 0011 125 | 0, // 0100 126 | 1, // 0101 127 | 0, // 0110 128 | 3, // 0111 129 | 0, // 1000 130 | 1, // 1001 131 | 0, // 1010 132 | 2, // 1011 133 | 0, // 1100 134 | 1, // 1101 135 | 0, // 1110 136 | 0, // 1111 137 | } 138 | 139 | // hasSpace returns the index of a free entry in 'row' and a bool indicating if it was found 140 | func (cf *CF) hasSpace(row uint32) (byte, bool) { 141 | b := cf.occupied[row/2] 142 | t := row & 1 143 | b = (b >> (uint(t) * 4)) & 0xF 144 | return freebits[b], b != 0xF 145 | } 146 | 147 | // TODO(dgryski): make rnd a type that can respond with 1-4 *bits* 148 | 149 | // rnd is an xorshift/multiple random number generator 150 | func rnd(x uint64) uint64 { 151 | x ^= x >> 12 // a 152 | x ^= x << 25 // b 153 | x ^= x >> 27 // c 154 | x *= 2685821657736338717 155 | return x 156 | } 157 | 158 | // hashfp hashes a fingerprint with 2 rounds of an xorshift-mult rng 159 | func hashfp(b byte) uint32 { 160 | x := rnd(rnd(uint64(b))) 161 | return uint32(x) ^ uint32(x>>32) 162 | } 163 | -------------------------------------------------------------------------------- /08_cuckoo/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /08_cuckoo/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | filter := NewCuckoof(1 << 10) 14 | 15 | visited := NewNodeSet(g.Order()) 16 | 17 | currentLevel := make([]graph.Node, 0, g.Order()) 18 | nextLevel := make([]graph.Node, 0, g.Order()) 19 | 20 | level[source] = 1 21 | visited.Add(source) 22 | currentLevel = append(currentLevel, source) 23 | 24 | levelNumber := 2 25 | 26 | for len(currentLevel) > 0 { 27 | for _, node := range currentLevel { 28 | for _, neighbor := range g.Neighbors(node) { 29 | if !visited.Contains(neighbor) { 30 | visited.Add(neighbor) 31 | filter.Insert(neighbor) 32 | nextLevel = append(nextLevel, neighbor) 33 | } 34 | } 35 | } 36 | 37 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 38 | 39 | for _, neighbor := range nextLevel { 40 | level[neighbor] = levelNumber 41 | } 42 | 43 | levelNumber++ 44 | currentLevel = currentLevel[:0:cap(currentLevel)] 45 | currentLevel, nextLevel = nextLevel, currentLevel 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /09_unroll_4/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /09_unroll_4/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | neighbors := g.Neighbors(node) 27 | i := 0 28 | 29 | for ; i < len(neighbors)-3; i += 4 { 30 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 31 | if !visited.Contains(n1) { 32 | visited.Add(n1) 33 | nextLevel = append(nextLevel, n1) 34 | } 35 | if !visited.Contains(n2) { 36 | visited.Add(n2) 37 | nextLevel = append(nextLevel, n2) 38 | } 39 | if !visited.Contains(n3) { 40 | visited.Add(n3) 41 | nextLevel = append(nextLevel, n3) 42 | } 43 | if !visited.Contains(n4) { 44 | visited.Add(n4) 45 | nextLevel = append(nextLevel, n4) 46 | } 47 | } 48 | 49 | for _, n := range neighbors[i:] { 50 | if !visited.Contains(n) { 51 | visited.Add(n) 52 | nextLevel = append(nextLevel, n) 53 | } 54 | } 55 | } 56 | 57 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 58 | 59 | for _, neighbor := range nextLevel { 60 | level[neighbor] = levelNumber 61 | } 62 | 63 | levelNumber++ 64 | currentLevel = currentLevel[:0:cap(currentLevel)] 65 | currentLevel, nextLevel = nextLevel, currentLevel 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /09_unroll_8/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /09_unroll_8/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | neighbors := g.Neighbors(node) 27 | i := 0 28 | 29 | for ; i < len(neighbors)-7; i += 8 { 30 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 31 | n5, n6, n7, n8 := neighbors[i+4], neighbors[i+5], neighbors[i+6], neighbors[i+7] 32 | if !visited.Contains(n1) { 33 | visited.Add(n1) 34 | nextLevel = append(nextLevel, n1) 35 | } 36 | if !visited.Contains(n2) { 37 | visited.Add(n2) 38 | nextLevel = append(nextLevel, n2) 39 | } 40 | if !visited.Contains(n3) { 41 | visited.Add(n3) 42 | nextLevel = append(nextLevel, n3) 43 | } 44 | if !visited.Contains(n4) { 45 | visited.Add(n4) 46 | nextLevel = append(nextLevel, n4) 47 | } 48 | if !visited.Contains(n5) { 49 | visited.Add(n5) 50 | nextLevel = append(nextLevel, n5) 51 | } 52 | if !visited.Contains(n6) { 53 | visited.Add(n6) 54 | nextLevel = append(nextLevel, n6) 55 | } 56 | if !visited.Contains(n7) { 57 | visited.Add(n7) 58 | nextLevel = append(nextLevel, n7) 59 | } 60 | if !visited.Contains(n8) { 61 | visited.Add(n8) 62 | nextLevel = append(nextLevel, n8) 63 | } 64 | } 65 | 66 | for _, n := range neighbors[i:] { 67 | if !visited.Contains(n) { 68 | visited.Add(n) 69 | nextLevel = append(nextLevel, n) 70 | } 71 | } 72 | } 73 | 74 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 75 | 76 | for _, neighbor := range nextLevel { 77 | level[neighbor] = levelNumber 78 | } 79 | 80 | levelNumber++ 81 | currentLevel = currentLevel[:0:cap(currentLevel)] 82 | currentLevel, nextLevel = nextLevel, currentLevel 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /09_unroll_8_4/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import "github.com/egonelbre/a-tale-of-bfs/graph" 4 | 5 | const ( 6 | bucket_bits = 5 7 | bucket_size = 1 << 5 8 | bucket_mask = bucket_size - 1 9 | ) 10 | 11 | type NodeSet []uint32 12 | 13 | func NewNodeSet(size int) NodeSet { 14 | return NodeSet(make([]uint32, (size+31)/32)) 15 | } 16 | 17 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 18 | bucket = uint32(node >> bucket_bits) 19 | bit = uint32(1 << (node & bucket_mask)) 20 | return bucket, bit 21 | } 22 | 23 | func (set NodeSet) Add(node graph.Node) { 24 | bucket, bit := set.Offset(node) 25 | set[bucket] |= bit 26 | } 27 | 28 | func (set NodeSet) Contains(node graph.Node) bool { 29 | bucket, bit := set.Offset(node) 30 | return set[bucket]&bit != 0 31 | } 32 | -------------------------------------------------------------------------------- /09_unroll_8_4/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/egonelbre/a-tale-of-bfs/graph" 5 | "github.com/shawnsmithdev/zermelo/zuint32" 6 | ) 7 | 8 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 9 | if len(level) != g.Order() { 10 | panic("invalid level length") 11 | } 12 | 13 | visited := NewNodeSet(g.Order()) 14 | 15 | currentLevel := make([]graph.Node, 0, g.Order()) 16 | nextLevel := make([]graph.Node, 0, g.Order()) 17 | 18 | level[source] = 1 19 | visited.Add(source) 20 | currentLevel = append(currentLevel, source) 21 | 22 | levelNumber := 2 23 | 24 | for len(currentLevel) > 0 { 25 | for _, node := range currentLevel { 26 | neighbors := g.Neighbors(node) 27 | i := 0 28 | 29 | for ; i < len(neighbors)-7; i += 8 { 30 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 31 | n5, n6, n7, n8 := neighbors[i+4], neighbors[i+5], neighbors[i+6], neighbors[i+7] 32 | if !visited.Contains(n1) { 33 | visited.Add(n1) 34 | nextLevel = append(nextLevel, n1) 35 | } 36 | if !visited.Contains(n2) { 37 | visited.Add(n2) 38 | nextLevel = append(nextLevel, n2) 39 | } 40 | if !visited.Contains(n3) { 41 | visited.Add(n3) 42 | nextLevel = append(nextLevel, n3) 43 | } 44 | if !visited.Contains(n4) { 45 | visited.Add(n4) 46 | nextLevel = append(nextLevel, n4) 47 | } 48 | if !visited.Contains(n5) { 49 | visited.Add(n5) 50 | nextLevel = append(nextLevel, n5) 51 | } 52 | if !visited.Contains(n6) { 53 | visited.Add(n6) 54 | nextLevel = append(nextLevel, n6) 55 | } 56 | if !visited.Contains(n7) { 57 | visited.Add(n7) 58 | nextLevel = append(nextLevel, n7) 59 | } 60 | if !visited.Contains(n8) { 61 | visited.Add(n8) 62 | nextLevel = append(nextLevel, n8) 63 | } 64 | } 65 | 66 | for ; i < len(neighbors)-3; i += 4 { 67 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 68 | if !visited.Contains(n1) { 69 | visited.Add(n1) 70 | nextLevel = append(nextLevel, n1) 71 | } 72 | if !visited.Contains(n2) { 73 | visited.Add(n2) 74 | nextLevel = append(nextLevel, n2) 75 | } 76 | if !visited.Contains(n3) { 77 | visited.Add(n3) 78 | nextLevel = append(nextLevel, n3) 79 | } 80 | if !visited.Contains(n4) { 81 | visited.Add(n4) 82 | nextLevel = append(nextLevel, n4) 83 | } 84 | } 85 | 86 | for _, n := range neighbors[i:] { 87 | if !visited.Contains(n) { 88 | visited.Add(n) 89 | nextLevel = append(nextLevel, n) 90 | } 91 | } 92 | } 93 | 94 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 95 | 96 | for _, neighbor := range nextLevel { 97 | level[neighbor] = levelNumber 98 | } 99 | 100 | levelNumber++ 101 | currentLevel = currentLevel[:0:cap(currentLevel)] 102 | currentLevel, nextLevel = nextLevel, currentLevel 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /10_parallel/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) Add(node graph.Node) { 28 | bucket, bit := set.Offset(node) 29 | addr := &set[bucket] 30 | for { 31 | old := atomic.LoadUint32(addr) 32 | if old&bit != 0 || atomic.CompareAndSwapUint32(addr, old, old|bit) { 33 | return 34 | } 35 | } 36 | } 37 | 38 | func (set NodeSet) Contains(node graph.Node) bool { 39 | bucket, bit := set.Offset(node) 40 | return atomic.LoadUint32(&set[bucket])&bit != 0 41 | } 42 | -------------------------------------------------------------------------------- /10_parallel/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/shawnsmithdev/zermelo/zuint32" 9 | ) 10 | 11 | func process(ch chan<- []graph.Node, g *graph.Graph, block []graph.Node, visited *NodeSet) { 12 | for _, v := range block { 13 | neighbors := make([]graph.Node, 0) 14 | for _, neighbor := range g.Neighbors(v) { 15 | if !visited.Contains(neighbor) { 16 | visited.Add(neighbor) 17 | neighbors = append(neighbors, neighbor) 18 | } 19 | } 20 | ch <- neighbors 21 | } 22 | } 23 | 24 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int) { 25 | if len(level) != g.Order() { 26 | panic("invalid level length") 27 | } 28 | 29 | np := runtime.GOMAXPROCS(-1) / 4 30 | 31 | visited := NewNodeSet(g.Order()) 32 | 33 | currentLevel := make([]graph.Node, 0, g.Order()) 34 | nextLevel := make([]graph.Node, 0, g.Order()) 35 | 36 | level[source] = 1 37 | visited.Add(source) 38 | currentLevel = append(currentLevel, source) 39 | 40 | levelNumber := 2 41 | 42 | for len(currentLevel) > 0 { 43 | var wg sync.WaitGroup 44 | 45 | chunkSize := (len(currentLevel) + np - 1) / np 46 | var workblocks [][]graph.Node 47 | for i := 0; i < len(currentLevel); i += chunkSize { 48 | end := i + chunkSize 49 | if end > len(currentLevel) { 50 | end = len(currentLevel) 51 | } 52 | workblocks = append(workblocks, currentLevel[i:end]) 53 | } 54 | 55 | ch := make(chan []uint32, len(workblocks)) 56 | wg.Add(len(workblocks)) 57 | for _, block := range workblocks { 58 | go func(block []graph.Node) { 59 | process(ch, g, block, &visited) 60 | wg.Done() 61 | }(block) 62 | } 63 | go func() { 64 | wg.Wait() 65 | close(ch) 66 | }() 67 | 68 | for ns := range ch { 69 | nextLevel = append(nextLevel, ns...) 70 | } 71 | 72 | zuint32.SortBYOB(nextLevel, currentLevel[:cap(currentLevel)]) 73 | 74 | for _, neighbor := range nextLevel { 75 | level[neighbor] = levelNumber 76 | } 77 | 78 | levelNumber++ 79 | currentLevel = currentLevel[:0:cap(currentLevel)] 80 | currentLevel, nextLevel = nextLevel, currentLevel 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /10_parchan/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) Add(node graph.Node) { 28 | bucket, bit := set.Offset(node) 29 | addr := &set[bucket] 30 | for { 31 | old := atomic.LoadUint32(addr) 32 | if old&bit != 0 || atomic.CompareAndSwapUint32(addr, old, old|bit) { 33 | return 34 | } 35 | } 36 | } 37 | 38 | func (set NodeSet) Contains(node graph.Node) bool { 39 | bucket, bit := set.Offset(node) 40 | return atomic.LoadUint32(&set[bucket])&bit != 0 41 | } 42 | 43 | func (set NodeSet) TryAdd(node graph.Node) bool { 44 | bucket, bit := set.Offset(node) 45 | addr := &set[bucket] 46 | retry: 47 | old := atomic.LoadUint32(addr) 48 | if old&bit != 0 { 49 | return false 50 | } 51 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 52 | return true 53 | } 54 | goto retry 55 | } 56 | -------------------------------------------------------------------------------- /10_parchan/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | "github.com/egonelbre/async" 8 | ) 9 | 10 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 11 | if len(level) != g.Order() { 12 | panic("invalid level length") 13 | } 14 | 15 | visited := NewNodeSet(g.Order()) 16 | 17 | currentLevel := make(chan graph.Node, g.Order()) 18 | nextLevel := make(chan graph.Node, g.Order()) 19 | 20 | level[source] = 1 21 | visited.Add(source) 22 | currentLevel <- source 23 | 24 | levelNumber := 2 25 | for len(currentLevel) > 0 { 26 | async.Run(procs, func(gid int) { 27 | runtime.LockOSThread() 28 | for { 29 | select { 30 | case node := <-currentLevel: 31 | for _, neighbor := range g.Neighbors(node) { 32 | if visited.TryAdd(neighbor) { 33 | level[neighbor] = levelNumber 34 | nextLevel <- neighbor 35 | } 36 | } 37 | default: 38 | // queue is empty 39 | return 40 | } 41 | } 42 | }) 43 | 44 | // :( we cannot sort here 45 | 46 | levelNumber++ 47 | currentLevel, nextLevel = nextLevel, currentLevel 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /11_frontier/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) TryAdd(node graph.Node) bool { 28 | bucket, bit := set.Offset(node) 29 | addr := &set[bucket] 30 | retry: 31 | old := atomic.LoadUint32(addr) 32 | if old&bit != 0 { 33 | return false 34 | } 35 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 36 | return true 37 | } 38 | goto retry 39 | } 40 | -------------------------------------------------------------------------------- /11_frontier/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | neighbors := g.Neighbors(node) 56 | i := 0 57 | 58 | for ; i < len(neighbors)-3; i += 4 { 59 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 60 | if visited.TryAdd(n1) { 61 | nextLevel.Write(&writeLow, &writeHigh, n1) 62 | } 63 | if visited.TryAdd(n2) { 64 | nextLevel.Write(&writeLow, &writeHigh, n2) 65 | } 66 | if visited.TryAdd(n3) { 67 | nextLevel.Write(&writeLow, &writeHigh, n3) 68 | } 69 | if visited.TryAdd(n4) { 70 | nextLevel.Write(&writeLow, &writeHigh, n4) 71 | } 72 | } 73 | 74 | for _, n := range neighbors[i:] { 75 | if visited.TryAdd(n) { 76 | nextLevel.Write(&writeLow, &writeHigh, n) 77 | } 78 | } 79 | } 80 | } 81 | 82 | for i := writeLow; i < writeHigh; i += 1 { 83 | nextLevel.Nodes[i] = SentinelNode 84 | } 85 | } 86 | 87 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 88 | if len(level) != g.Order() { 89 | panic("invalid level length") 90 | } 91 | 92 | visited := NewNodeSet(g.Order()) 93 | 94 | maxSize := g.Order() + WriteBlockSize*procs 95 | 96 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 97 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 98 | 99 | level[source] = 1 100 | visited.TryAdd(source) 101 | currentLevel.Nodes = append(currentLevel.Nodes, source) 102 | 103 | levelNumber := 2 104 | 105 | for len(currentLevel.Nodes) > 0 { 106 | async.Run(procs, func(i int) { 107 | runtime.LockOSThread() 108 | process(g, currentLevel, nextLevel, visited) 109 | }) 110 | 111 | zuint32.SortBYOB(nextLevel.Nodes[:nextLevel.Head], currentLevel.Nodes[:cap(currentLevel.Nodes)]) 112 | 113 | for nextLevel.Head > 0 && nextLevel.Nodes[nextLevel.Head-1] == SentinelNode { 114 | nextLevel.Head-- 115 | } 116 | for _, neighbor := range nextLevel.Nodes[:nextLevel.Head] { 117 | level[neighbor] = levelNumber 118 | } 119 | 120 | levelNumber++ 121 | currentLevel, nextLevel = nextLevel, currentLevel 122 | 123 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 124 | currentLevel.Head = 0 125 | 126 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 127 | nextLevel.Head = 0 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /12_almost/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) TryAdd(node graph.Node) bool { 28 | bucket, bit := set.Offset(node) 29 | addr := &set[bucket] 30 | retry: 31 | old := atomic.LoadUint32(addr) 32 | if old&bit != 0 { 33 | return false 34 | } 35 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 36 | return true 37 | } 38 | goto retry 39 | } 40 | -------------------------------------------------------------------------------- /12_almost/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | if visited.TryAdd(n1) { 65 | nextLevel.Write(&writeLow, &writeHigh, n1) 66 | } 67 | if visited.TryAdd(n2) { 68 | nextLevel.Write(&writeLow, &writeHigh, n2) 69 | } 70 | if visited.TryAdd(n3) { 71 | nextLevel.Write(&writeLow, &writeHigh, n3) 72 | } 73 | if visited.TryAdd(n4) { 74 | nextLevel.Write(&writeLow, &writeHigh, n4) 75 | } 76 | } 77 | 78 | for _, n := range neighbors[i:] { 79 | if visited.TryAdd(n) { 80 | nextLevel.Write(&writeLow, &writeHigh, n) 81 | } 82 | } 83 | } 84 | } 85 | 86 | for i := writeLow; i < writeHigh; i += 1 { 87 | nextLevel.Nodes[i] = SentinelNode 88 | } 89 | } 90 | 91 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 92 | if len(level) != g.Order() { 93 | panic("invalid level length") 94 | } 95 | 96 | visited := NewNodeSet(g.Order()) 97 | 98 | maxSize := g.Order() + WriteBlockSize*procs 99 | 100 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 101 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 102 | 103 | level[source] = 1 104 | visited.TryAdd(source) 105 | currentLevel.Nodes = append(currentLevel.Nodes, source) 106 | 107 | levelNumber := 2 108 | 109 | for len(currentLevel.Nodes) > 0 { 110 | async.Run(procs, func(i int) { 111 | runtime.LockOSThread() 112 | process(g, currentLevel, nextLevel, visited) 113 | }) 114 | 115 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 116 | runtime.LockOSThread() 117 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 118 | }) 119 | 120 | for _, neighbor := range nextLevel.Nodes[:nextLevel.Head] { 121 | if neighbor == SentinelNode { 122 | continue 123 | } 124 | level[neighbor] = levelNumber 125 | } 126 | 127 | levelNumber++ 128 | currentLevel, nextLevel = nextLevel, currentLevel 129 | 130 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 131 | currentLevel.Head = 0 132 | 133 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 134 | nextLevel.Head = 0 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /13_marking/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) TryAdd(node graph.Node) bool { 28 | bucket, bit := set.Offset(node) 29 | addr := &set[bucket] 30 | retry: 31 | old := atomic.LoadUint32(addr) 32 | if old&bit != 0 { 33 | return false 34 | } 35 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 36 | return true 37 | } 38 | goto retry 39 | } 40 | -------------------------------------------------------------------------------- /13_marking/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | if visited.TryAdd(n1) { 65 | nextLevel.Write(&writeLow, &writeHigh, n1) 66 | } 67 | if visited.TryAdd(n2) { 68 | nextLevel.Write(&writeLow, &writeHigh, n2) 69 | } 70 | if visited.TryAdd(n3) { 71 | nextLevel.Write(&writeLow, &writeHigh, n3) 72 | } 73 | if visited.TryAdd(n4) { 74 | nextLevel.Write(&writeLow, &writeHigh, n4) 75 | } 76 | } 77 | 78 | for _, n := range neighbors[i:] { 79 | if visited.TryAdd(n) { 80 | nextLevel.Write(&writeLow, &writeHigh, n) 81 | } 82 | } 83 | } 84 | } 85 | 86 | for i := writeLow; i < writeHigh; i += 1 { 87 | nextLevel.Nodes[i] = SentinelNode 88 | } 89 | } 90 | 91 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 92 | if len(level) != g.Order() { 93 | panic("invalid level length") 94 | } 95 | 96 | visited := NewNodeSet(g.Order()) 97 | 98 | maxSize := g.Order() + WriteBlockSize*procs 99 | 100 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 101 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 102 | 103 | level[source] = 1 104 | visited.TryAdd(source) 105 | currentLevel.Nodes = append(currentLevel.Nodes, source) 106 | 107 | levelNumber := 2 108 | 109 | for len(currentLevel.Nodes) > 0 { 110 | async.Run(procs, func(i int) { 111 | runtime.LockOSThread() 112 | process(g, currentLevel, nextLevel, visited) 113 | }) 114 | 115 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 116 | runtime.LockOSThread() 117 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 118 | for _, neighbor := range nextLevel.Nodes[low:high] { 119 | if neighbor == SentinelNode { 120 | break 121 | } 122 | level[neighbor] = levelNumber 123 | } 124 | }) 125 | 126 | levelNumber++ 127 | currentLevel, nextLevel = nextLevel, currentLevel 128 | 129 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 130 | currentLevel.Head = 0 131 | 132 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 133 | nextLevel.Head = 0 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /14_early_2/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) GetBuckets1(a graph.Node) (x uint32) { 28 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 29 | return 30 | } 31 | 32 | func (set NodeSet) GetBuckets2(a, b graph.Node) (x, y uint32) { 33 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 34 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 35 | return 36 | } 37 | 38 | func (set NodeSet) GetBuckets3(a, b, c graph.Node) (x, y, z uint32) { 39 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 40 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 41 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 42 | return 43 | } 44 | 45 | func (set NodeSet) GetBuckets4(a, b, c, d graph.Node) (x, y, z, w uint32) { 46 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 47 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 48 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 49 | w = atomic.LoadUint32(&set[d>>bucket_bits]) 50 | return 51 | } 52 | 53 | func (set NodeSet) TryAdd(node graph.Node) bool { 54 | bucket, bit := set.Offset(node) 55 | addr := &set[bucket] 56 | retry: 57 | old := atomic.LoadUint32(addr) 58 | if old&bit != 0 { 59 | return false 60 | } 61 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 62 | return true 63 | } 64 | goto retry 65 | } 66 | 67 | func (set NodeSet) TryAddFrom(old uint32, node graph.Node) bool { 68 | bucket, bit := set.Offset(node) 69 | if old&bit != 0 { 70 | return false 71 | } 72 | addr := &set[bucket] 73 | retry: 74 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 75 | return true 76 | } 77 | old = atomic.LoadUint32(addr) 78 | if old&bit != 0 { 79 | return false 80 | } 81 | goto retry 82 | } 83 | -------------------------------------------------------------------------------- /14_early_2/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | x3, x4 := visited.GetBuckets2(n3, n4) 65 | if visited.TryAdd(n1) { 66 | nextLevel.Write(&writeLow, &writeHigh, n1) 67 | } 68 | if visited.TryAdd(n2) { 69 | nextLevel.Write(&writeLow, &writeHigh, n2) 70 | } 71 | if visited.TryAddFrom(x3, n3) { 72 | nextLevel.Write(&writeLow, &writeHigh, n3) 73 | } 74 | if visited.TryAddFrom(x4, n4) { 75 | nextLevel.Write(&writeLow, &writeHigh, n4) 76 | } 77 | } 78 | 79 | for _, n := range neighbors[i:] { 80 | if visited.TryAdd(n) { 81 | nextLevel.Write(&writeLow, &writeHigh, n) 82 | } 83 | } 84 | } 85 | } 86 | 87 | for i := writeLow; i < writeHigh; i += 1 { 88 | nextLevel.Nodes[i] = SentinelNode 89 | } 90 | } 91 | 92 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 93 | if len(level) != g.Order() { 94 | panic("invalid level length") 95 | } 96 | 97 | visited := NewNodeSet(g.Order()) 98 | 99 | maxSize := g.Order() + WriteBlockSize*procs 100 | 101 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 102 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 103 | 104 | level[source] = 1 105 | visited.TryAdd(source) 106 | currentLevel.Nodes = append(currentLevel.Nodes, source) 107 | 108 | levelNumber := 2 109 | 110 | for len(currentLevel.Nodes) > 0 { 111 | async.Run(procs, func(i int) { 112 | runtime.LockOSThread() 113 | process(g, currentLevel, nextLevel, visited) 114 | }) 115 | 116 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 117 | runtime.LockOSThread() 118 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 119 | for _, neighbor := range nextLevel.Nodes[low:high] { 120 | if neighbor == SentinelNode { 121 | break 122 | } 123 | level[neighbor] = levelNumber 124 | } 125 | }) 126 | 127 | levelNumber++ 128 | currentLevel, nextLevel = nextLevel, currentLevel 129 | 130 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 131 | currentLevel.Head = 0 132 | 133 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 134 | nextLevel.Head = 0 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /14_early_3/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) GetBuckets1(a graph.Node) (x uint32) { 28 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 29 | return 30 | } 31 | 32 | func (set NodeSet) GetBuckets2(a, b graph.Node) (x, y uint32) { 33 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 34 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 35 | return 36 | } 37 | 38 | func (set NodeSet) GetBuckets3(a, b, c graph.Node) (x, y, z uint32) { 39 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 40 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 41 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 42 | return 43 | } 44 | 45 | func (set NodeSet) GetBuckets4(a, b, c, d graph.Node) (x, y, z, w uint32) { 46 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 47 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 48 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 49 | w = atomic.LoadUint32(&set[d>>bucket_bits]) 50 | return 51 | } 52 | 53 | func (set NodeSet) TryAdd(node graph.Node) bool { 54 | bucket, bit := set.Offset(node) 55 | addr := &set[bucket] 56 | retry: 57 | old := atomic.LoadUint32(addr) 58 | if old&bit != 0 { 59 | return false 60 | } 61 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 62 | return true 63 | } 64 | goto retry 65 | } 66 | 67 | func (set NodeSet) TryAddFrom(old uint32, node graph.Node) bool { 68 | bucket, bit := set.Offset(node) 69 | if old&bit != 0 { 70 | return false 71 | } 72 | addr := &set[bucket] 73 | retry: 74 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 75 | return true 76 | } 77 | old = atomic.LoadUint32(addr) 78 | if old&bit != 0 { 79 | return false 80 | } 81 | goto retry 82 | } 83 | -------------------------------------------------------------------------------- /14_early_3/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | x2, x3, x4 := visited.GetBuckets3(n2, n3, n4) 65 | if visited.TryAdd(n1) { 66 | nextLevel.Write(&writeLow, &writeHigh, n1) 67 | } 68 | if visited.TryAddFrom(x2, n2) { 69 | nextLevel.Write(&writeLow, &writeHigh, n2) 70 | } 71 | if visited.TryAddFrom(x3, n3) { 72 | nextLevel.Write(&writeLow, &writeHigh, n3) 73 | } 74 | if visited.TryAddFrom(x4, n4) { 75 | nextLevel.Write(&writeLow, &writeHigh, n4) 76 | } 77 | } 78 | 79 | for _, n := range neighbors[i:] { 80 | if visited.TryAdd(n) { 81 | nextLevel.Write(&writeLow, &writeHigh, n) 82 | } 83 | } 84 | } 85 | } 86 | 87 | for i := writeLow; i < writeHigh; i += 1 { 88 | nextLevel.Nodes[i] = SentinelNode 89 | } 90 | } 91 | 92 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 93 | if len(level) != g.Order() { 94 | panic("invalid level length") 95 | } 96 | 97 | visited := NewNodeSet(g.Order()) 98 | 99 | maxSize := g.Order() + WriteBlockSize*procs 100 | 101 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 102 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 103 | 104 | level[source] = 1 105 | visited.TryAdd(source) 106 | currentLevel.Nodes = append(currentLevel.Nodes, source) 107 | 108 | levelNumber := 2 109 | 110 | for len(currentLevel.Nodes) > 0 { 111 | async.Run(procs, func(i int) { 112 | runtime.LockOSThread() 113 | process(g, currentLevel, nextLevel, visited) 114 | }) 115 | 116 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 117 | runtime.LockOSThread() 118 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 119 | for _, neighbor := range nextLevel.Nodes[low:high] { 120 | if neighbor == SentinelNode { 121 | break 122 | } 123 | level[neighbor] = levelNumber 124 | } 125 | }) 126 | 127 | levelNumber++ 128 | currentLevel, nextLevel = nextLevel, currentLevel 129 | 130 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 131 | currentLevel.Head = 0 132 | 133 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 134 | nextLevel.Head = 0 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /14_early_4/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) GetBuckets1(a graph.Node) (x uint32) { 28 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 29 | return 30 | } 31 | 32 | func (set NodeSet) GetBuckets2(a, b graph.Node) (x, y uint32) { 33 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 34 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 35 | return 36 | } 37 | 38 | func (set NodeSet) GetBuckets3(a, b, c graph.Node) (x, y, z uint32) { 39 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 40 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 41 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 42 | return 43 | } 44 | 45 | func (set NodeSet) GetBuckets4(a, b, c, d graph.Node) (x, y, z, w uint32) { 46 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 47 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 48 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 49 | w = atomic.LoadUint32(&set[d>>bucket_bits]) 50 | return 51 | } 52 | 53 | func (set NodeSet) TryAdd(node graph.Node) bool { 54 | bucket, bit := set.Offset(node) 55 | addr := &set[bucket] 56 | retry: 57 | old := atomic.LoadUint32(addr) 58 | if old&bit != 0 { 59 | return false 60 | } 61 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 62 | return true 63 | } 64 | goto retry 65 | } 66 | 67 | func (set NodeSet) TryAddFrom(old uint32, node graph.Node) bool { 68 | bucket, bit := set.Offset(node) 69 | if old&bit != 0 { 70 | return false 71 | } 72 | addr := &set[bucket] 73 | retry: 74 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 75 | return true 76 | } 77 | old = atomic.LoadUint32(addr) 78 | if old&bit != 0 { 79 | return false 80 | } 81 | goto retry 82 | } 83 | -------------------------------------------------------------------------------- /14_early_4/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | x1, x2, x3, x4 := visited.GetBuckets4(n1, n2, n3, n4) 65 | if visited.TryAddFrom(x1, n1) { 66 | nextLevel.Write(&writeLow, &writeHigh, n1) 67 | } 68 | if visited.TryAddFrom(x2, n2) { 69 | nextLevel.Write(&writeLow, &writeHigh, n2) 70 | } 71 | if visited.TryAddFrom(x3, n3) { 72 | nextLevel.Write(&writeLow, &writeHigh, n3) 73 | } 74 | if visited.TryAddFrom(x4, n4) { 75 | nextLevel.Write(&writeLow, &writeHigh, n4) 76 | } 77 | } 78 | 79 | for _, n := range neighbors[i:] { 80 | if visited.TryAdd(n) { 81 | nextLevel.Write(&writeLow, &writeHigh, n) 82 | } 83 | } 84 | } 85 | } 86 | 87 | for i := writeLow; i < writeHigh; i += 1 { 88 | nextLevel.Nodes[i] = SentinelNode 89 | } 90 | } 91 | 92 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 93 | if len(level) != g.Order() { 94 | panic("invalid level length") 95 | } 96 | 97 | visited := NewNodeSet(g.Order()) 98 | 99 | maxSize := g.Order() + WriteBlockSize*procs 100 | 101 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 102 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 103 | 104 | level[source] = 1 105 | visited.TryAdd(source) 106 | currentLevel.Nodes = append(currentLevel.Nodes, source) 107 | 108 | levelNumber := 2 109 | 110 | for len(currentLevel.Nodes) > 0 { 111 | async.Run(procs, func(i int) { 112 | runtime.LockOSThread() 113 | process(g, currentLevel, nextLevel, visited) 114 | }) 115 | 116 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 117 | runtime.LockOSThread() 118 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 119 | for _, neighbor := range nextLevel.Nodes[low:high] { 120 | if neighbor == SentinelNode { 121 | break 122 | } 123 | level[neighbor] = levelNumber 124 | } 125 | }) 126 | 127 | levelNumber++ 128 | currentLevel, nextLevel = nextLevel, currentLevel 129 | 130 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 131 | currentLevel.Head = 0 132 | 133 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 134 | nextLevel.Head = 0 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /14_early_r/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) GetBuckets1(a graph.Node) (x uint32) { 28 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 29 | return 30 | } 31 | 32 | func (set NodeSet) GetBuckets2(a, b graph.Node) (x, y uint32) { 33 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 34 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 35 | return 36 | } 37 | 38 | func (set NodeSet) GetBuckets3(a, b, c graph.Node) (x, y, z uint32) { 39 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 40 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 41 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 42 | return 43 | } 44 | 45 | func (set NodeSet) GetBuckets4(a, b, c, d graph.Node) (x, y, z, w uint32) { 46 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 47 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 48 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 49 | w = atomic.LoadUint32(&set[d>>bucket_bits]) 50 | return 51 | } 52 | 53 | func (set NodeSet) TryAdd(node graph.Node) bool { 54 | bucket, bit := set.Offset(node) 55 | addr := &set[bucket] 56 | retry: 57 | old := atomic.LoadUint32(addr) 58 | if old&bit != 0 { 59 | return false 60 | } 61 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 62 | return true 63 | } 64 | goto retry 65 | } 66 | 67 | func (set NodeSet) TryAddFrom(old uint32, node graph.Node) bool { 68 | bucket, bit := set.Offset(node) 69 | if old&bit != 0 { 70 | return false 71 | } 72 | addr := &set[bucket] 73 | retry: 74 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 75 | return true 76 | } 77 | old = atomic.LoadUint32(addr) 78 | if old&bit != 0 { 79 | return false 80 | } 81 | goto retry 82 | } 83 | -------------------------------------------------------------------------------- /14_early_r/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | 65 | x2 := visited.GetBuckets1(n2) 66 | if visited.TryAdd(n1) { 67 | nextLevel.Write(&writeLow, &writeHigh, n1) 68 | } 69 | x3 := visited.GetBuckets1(n3) 70 | if visited.TryAddFrom(x2, n2) { 71 | nextLevel.Write(&writeLow, &writeHigh, n2) 72 | } 73 | x4 := visited.GetBuckets1(n4) 74 | if visited.TryAddFrom(x3, n3) { 75 | nextLevel.Write(&writeLow, &writeHigh, n3) 76 | } 77 | if visited.TryAddFrom(x4, n4) { 78 | nextLevel.Write(&writeLow, &writeHigh, n4) 79 | } 80 | } 81 | 82 | for _, n := range neighbors[i:] { 83 | if visited.TryAdd(n) { 84 | nextLevel.Write(&writeLow, &writeHigh, n) 85 | } 86 | } 87 | } 88 | } 89 | 90 | for i := writeLow; i < writeHigh; i += 1 { 91 | nextLevel.Nodes[i] = SentinelNode 92 | } 93 | } 94 | 95 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 96 | if len(level) != g.Order() { 97 | panic("invalid level length") 98 | } 99 | 100 | visited := NewNodeSet(g.Order()) 101 | 102 | maxSize := g.Order() + WriteBlockSize*procs 103 | 104 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 105 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 106 | 107 | level[source] = 1 108 | visited.TryAdd(source) 109 | currentLevel.Nodes = append(currentLevel.Nodes, source) 110 | 111 | levelNumber := 2 112 | 113 | for len(currentLevel.Nodes) > 0 { 114 | async.Run(procs, func(i int) { 115 | runtime.LockOSThread() 116 | process(g, currentLevel, nextLevel, visited) 117 | }) 118 | 119 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 120 | runtime.LockOSThread() 121 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 122 | for _, neighbor := range nextLevel.Nodes[low:high] { 123 | if neighbor == SentinelNode { 124 | break 125 | } 126 | level[neighbor] = levelNumber 127 | } 128 | }) 129 | 130 | levelNumber++ 131 | currentLevel, nextLevel = nextLevel, currentLevel 132 | 133 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 134 | currentLevel.Head = 0 135 | 136 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 137 | nextLevel.Head = 0 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /15_worker/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) GetBuckets1(a graph.Node) (x uint32) { 28 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 29 | return 30 | } 31 | 32 | func (set NodeSet) GetBuckets2(a, b graph.Node) (x, y uint32) { 33 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 34 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 35 | return 36 | } 37 | 38 | func (set NodeSet) GetBuckets3(a, b, c graph.Node) (x, y, z uint32) { 39 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 40 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 41 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 42 | return 43 | } 44 | 45 | func (set NodeSet) GetBuckets4(a, b, c, d graph.Node) (x, y, z, w uint32) { 46 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 47 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 48 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 49 | w = atomic.LoadUint32(&set[d>>bucket_bits]) 50 | return 51 | } 52 | 53 | func (set NodeSet) TryAdd(node graph.Node) bool { 54 | bucket, bit := set.Offset(node) 55 | addr := &set[bucket] 56 | retry: 57 | old := atomic.LoadUint32(addr) 58 | if old&bit != 0 { 59 | return false 60 | } 61 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 62 | return true 63 | } 64 | goto retry 65 | } 66 | 67 | func (set NodeSet) TryAddFrom(old uint32, node graph.Node) bool { 68 | bucket, bit := set.Offset(node) 69 | if old&bit != 0 { 70 | return false 71 | } 72 | addr := &set[bucket] 73 | retry: 74 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 75 | return true 76 | } 77 | old = atomic.LoadUint32(addr) 78 | if old&bit != 0 { 79 | return false 80 | } 81 | goto retry 82 | } 83 | -------------------------------------------------------------------------------- /15_worker/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync" 6 | "sync/atomic" 7 | 8 | "github.com/egonelbre/a-tale-of-bfs/graph" 9 | "github.com/egonelbre/async" 10 | "github.com/shawnsmithdev/zermelo/zuint32" 11 | ) 12 | 13 | const ( 14 | ReadBlockSize = 256 15 | WriteBlockSize = 256 16 | SentinelNode = ^graph.Node(0) 17 | ) 18 | 19 | type Frontier struct { 20 | Nodes []graph.Node 21 | Head uint32 22 | } 23 | 24 | func (front *Frontier) NextRead() (low, high uint32) { 25 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 26 | low = high - ReadBlockSize 27 | if high > uint32(len(front.Nodes)) { 28 | high = uint32(len(front.Nodes)) 29 | } 30 | return 31 | } 32 | 33 | func (front *Frontier) NextWrite() (low, high uint32) { 34 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 35 | low = high - WriteBlockSize 36 | return 37 | } 38 | 39 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 40 | if *low >= *high { 41 | *low, *high = front.NextWrite() 42 | } 43 | front.Nodes[*low] = v 44 | *low += 1 45 | } 46 | 47 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 48 | writeLow, writeHigh := uint32(0), uint32(0) 49 | for { 50 | readLow, readHigh := currentLevel.NextRead() 51 | if readLow >= readHigh { 52 | break 53 | } 54 | 55 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 56 | if node == SentinelNode { 57 | continue 58 | } 59 | 60 | neighbors := g.Neighbors(node) 61 | i := 0 62 | 63 | for ; i < len(neighbors)-3; i += 4 { 64 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 65 | x1, x2, x3, x4 := visited.GetBuckets4(n1, n2, n3, n4) 66 | if visited.TryAddFrom(x1, n1) { 67 | nextLevel.Write(&writeLow, &writeHigh, n1) 68 | } 69 | if visited.TryAddFrom(x2, n2) { 70 | nextLevel.Write(&writeLow, &writeHigh, n2) 71 | } 72 | if visited.TryAddFrom(x3, n3) { 73 | nextLevel.Write(&writeLow, &writeHigh, n3) 74 | } 75 | if visited.TryAddFrom(x4, n4) { 76 | nextLevel.Write(&writeLow, &writeHigh, n4) 77 | } 78 | } 79 | 80 | for _, n := range neighbors[i:] { 81 | if visited.TryAdd(n) { 82 | nextLevel.Write(&writeLow, &writeHigh, n) 83 | } 84 | } 85 | } 86 | } 87 | 88 | for i := writeLow; i < writeHigh; i += 1 { 89 | nextLevel.Nodes[i] = SentinelNode 90 | } 91 | } 92 | 93 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 94 | if len(level) != g.Order() { 95 | panic("invalid level length") 96 | } 97 | 98 | visited := NewNodeSet(g.Order()) 99 | 100 | maxSize := g.Order() + WriteBlockSize*procs 101 | 102 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 103 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 104 | 105 | level[source] = 1 106 | visited.TryAdd(source) 107 | currentLevel.Nodes = append(currentLevel.Nodes, source) 108 | 109 | levelNumber := 2 110 | 111 | var waitForLast1, waitForLast2 sync.WaitGroup 112 | doneProcessingCounter := int32(procs) 113 | waitForLast1.Add(1) 114 | 115 | allDone := uint32(0) 116 | 117 | worker := func(gid int) { 118 | runtime.LockOSThread() 119 | 120 | for atomic.LoadUint32(&allDone) == 0 { 121 | { 122 | // process the current level in parallel 123 | process(g, currentLevel, nextLevel, visited) 124 | } 125 | 126 | // use a counter to see how many are still processing 127 | if atomic.AddInt32(&doneProcessingCounter, -1) == 0 { 128 | // the last one updates the Nodes size 129 | { 130 | nextLevel.Nodes = nextLevel.Nodes[:nextLevel.Head] 131 | nextLevel.Head = 0 132 | } 133 | 134 | // reset counters 135 | atomic.StoreInt32(&doneProcessingCounter, int32(procs)) 136 | waitForLast2.Add(1) 137 | // ... and release the routines 138 | waitForLast1.Done() 139 | } else { 140 | // wait for the last one finishing processing to setup for the next phase 141 | waitForLast1.Wait() 142 | } 143 | 144 | { 145 | // sort a part of the nextLevel in equal chunks 146 | blockSize := (len(nextLevel.Nodes) + procs - 1) / procs 147 | 148 | low := blockSize * gid 149 | high := low + blockSize 150 | if high > len(nextLevel.Nodes) { 151 | high = len(nextLevel.Nodes) 152 | } 153 | 154 | if low < len(nextLevel.Nodes) { 155 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 156 | // update the vertLevels 157 | // sentinels are sorted to the end of the array, 158 | // so we can break when we find the first one 159 | for _, v := range nextLevel.Nodes[low:high] { 160 | if v == SentinelNode { 161 | break 162 | } 163 | level[v] = levelNumber 164 | } 165 | } 166 | } 167 | 168 | // similarly to before, the last one finishing, does the setup for next phase 169 | if atomic.AddInt32(&doneProcessingCounter, -1) == 0 { 170 | { 171 | levelNumber++ 172 | currentLevel, nextLevel = nextLevel, currentLevel 173 | 174 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 175 | nextLevel.Head = 0 176 | 177 | // if we are done, set the allDone flag 178 | if len(currentLevel.Nodes) == 0 { 179 | atomic.StoreUint32(&allDone, 1) 180 | } 181 | } 182 | 183 | // reset counters 184 | atomic.StoreInt32(&doneProcessingCounter, int32(procs)) 185 | waitForLast1.Add(1) 186 | // release the hounds 187 | waitForLast2.Done() 188 | } else { 189 | // wait for the last one to finish 190 | waitForLast2.Wait() 191 | } 192 | } 193 | } 194 | 195 | for len(currentLevel.Nodes) > 0 { 196 | async.Run(procs, func(i int) { 197 | runtime.LockOSThread() 198 | process(g, currentLevel, nextLevel, visited) 199 | }) 200 | 201 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 202 | runtime.LockOSThread() 203 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 204 | for _, neighbor := range nextLevel.Nodes[low:high] { 205 | if neighbor == SentinelNode { 206 | break 207 | } 208 | level[neighbor] = levelNumber 209 | } 210 | }) 211 | 212 | levelNumber++ 213 | currentLevel, nextLevel = nextLevel, currentLevel 214 | 215 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 216 | currentLevel.Head = 0 217 | 218 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 219 | nextLevel.Head = 0 220 | } 221 | 222 | for gid := 1; gid < procs; gid++ { 223 | go worker(gid) 224 | } 225 | worker(0) 226 | } 227 | -------------------------------------------------------------------------------- /16_busy/nodeset.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/egonelbre/a-tale-of-bfs/graph" 7 | ) 8 | 9 | const ( 10 | bucket_bits = 5 11 | bucket_size = 1 << 5 12 | bucket_mask = bucket_size - 1 13 | ) 14 | 15 | type NodeSet []uint32 16 | 17 | func NewNodeSet(size int) NodeSet { 18 | return NodeSet(make([]uint32, (size+31)/32)) 19 | } 20 | 21 | func (set NodeSet) Offset(node graph.Node) (bucket, bit uint32) { 22 | bucket = uint32(node >> bucket_bits) 23 | bit = uint32(1 << (node & bucket_mask)) 24 | return bucket, bit 25 | } 26 | 27 | func (set NodeSet) GetBuckets1(a graph.Node) (x uint32) { 28 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 29 | return 30 | } 31 | 32 | func (set NodeSet) GetBuckets2(a, b graph.Node) (x, y uint32) { 33 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 34 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 35 | return 36 | } 37 | 38 | func (set NodeSet) GetBuckets3(a, b, c graph.Node) (x, y, z uint32) { 39 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 40 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 41 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 42 | return 43 | } 44 | 45 | func (set NodeSet) GetBuckets4(a, b, c, d graph.Node) (x, y, z, w uint32) { 46 | x = atomic.LoadUint32(&set[a>>bucket_bits]) 47 | y = atomic.LoadUint32(&set[b>>bucket_bits]) 48 | z = atomic.LoadUint32(&set[c>>bucket_bits]) 49 | w = atomic.LoadUint32(&set[d>>bucket_bits]) 50 | return 51 | } 52 | 53 | func (set NodeSet) TryAdd(node graph.Node) bool { 54 | bucket, bit := set.Offset(node) 55 | addr := &set[bucket] 56 | retry: 57 | old := atomic.LoadUint32(addr) 58 | if old&bit != 0 { 59 | return false 60 | } 61 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 62 | return true 63 | } 64 | goto retry 65 | } 66 | 67 | func (set NodeSet) TryAddFrom(old uint32, node graph.Node) bool { 68 | bucket, bit := set.Offset(node) 69 | if old&bit != 0 { 70 | return false 71 | } 72 | addr := &set[bucket] 73 | retry: 74 | if atomic.CompareAndSwapUint32(addr, old, old|bit) { 75 | return true 76 | } 77 | old = atomic.LoadUint32(addr) 78 | if old&bit != 0 { 79 | return false 80 | } 81 | goto retry 82 | } 83 | -------------------------------------------------------------------------------- /16_busy/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "runtime" 5 | "sync/atomic" 6 | 7 | "github.com/egonelbre/a-tale-of-bfs/graph" 8 | "github.com/egonelbre/async" 9 | "github.com/shawnsmithdev/zermelo/zuint32" 10 | ) 11 | 12 | const ( 13 | ReadBlockSize = 256 14 | WriteBlockSize = 256 15 | SentinelNode = ^graph.Node(0) 16 | ) 17 | 18 | type Frontier struct { 19 | Nodes []graph.Node 20 | Head uint32 21 | } 22 | 23 | func (front *Frontier) NextRead() (low, high uint32) { 24 | high = atomic.AddUint32(&front.Head, ReadBlockSize) 25 | low = high - ReadBlockSize 26 | if high > uint32(len(front.Nodes)) { 27 | high = uint32(len(front.Nodes)) 28 | } 29 | return 30 | } 31 | 32 | func (front *Frontier) NextWrite() (low, high uint32) { 33 | high = atomic.AddUint32(&front.Head, WriteBlockSize) 34 | low = high - WriteBlockSize 35 | return 36 | } 37 | 38 | func (front *Frontier) Write(low, high *uint32, v graph.Node) { 39 | if *low >= *high { 40 | *low, *high = front.NextWrite() 41 | } 42 | front.Nodes[*low] = v 43 | *low += 1 44 | } 45 | 46 | func process(g *graph.Graph, currentLevel, nextLevel *Frontier, visited NodeSet) { 47 | writeLow, writeHigh := uint32(0), uint32(0) 48 | for { 49 | readLow, readHigh := currentLevel.NextRead() 50 | if readLow >= readHigh { 51 | break 52 | } 53 | 54 | for _, node := range currentLevel.Nodes[readLow:readHigh] { 55 | if node == SentinelNode { 56 | continue 57 | } 58 | 59 | neighbors := g.Neighbors(node) 60 | i := 0 61 | 62 | for ; i < len(neighbors)-3; i += 4 { 63 | n1, n2, n3, n4 := neighbors[i], neighbors[i+1], neighbors[i+2], neighbors[i+3] 64 | x1, x2, x3, x4 := visited.GetBuckets4(n1, n2, n3, n4) 65 | if visited.TryAddFrom(x1, n1) { 66 | nextLevel.Write(&writeLow, &writeHigh, n1) 67 | } 68 | if visited.TryAddFrom(x2, n2) { 69 | nextLevel.Write(&writeLow, &writeHigh, n2) 70 | } 71 | if visited.TryAddFrom(x3, n3) { 72 | nextLevel.Write(&writeLow, &writeHigh, n3) 73 | } 74 | if visited.TryAddFrom(x4, n4) { 75 | nextLevel.Write(&writeLow, &writeHigh, n4) 76 | } 77 | } 78 | 79 | for _, n := range neighbors[i:] { 80 | if visited.TryAdd(n) { 81 | nextLevel.Write(&writeLow, &writeHigh, n) 82 | } 83 | } 84 | } 85 | } 86 | 87 | for i := writeLow; i < writeHigh; i += 1 { 88 | nextLevel.Nodes[i] = SentinelNode 89 | } 90 | } 91 | 92 | func BreadthFirst(g *graph.Graph, source graph.Node, level []int, procs int) { 93 | if len(level) != g.Order() { 94 | panic("invalid level length") 95 | } 96 | 97 | visited := NewNodeSet(g.Order()) 98 | 99 | maxSize := g.Order() + WriteBlockSize*procs 100 | 101 | currentLevel := &Frontier{make([]graph.Node, 0, maxSize), 0} 102 | nextLevel := &Frontier{make([]graph.Node, maxSize, maxSize), 0} 103 | 104 | level[source] = 1 105 | visited.TryAdd(source) 106 | currentLevel.Nodes = append(currentLevel.Nodes, source) 107 | 108 | levelNumber := 2 109 | 110 | var waitForLast1, waitForLast2 BusyGroup 111 | doneProcessingCounter := int32(procs) 112 | waitForLast1.Add(1) 113 | 114 | allDone := uint32(0) 115 | 116 | worker := func(gid int) { 117 | runtime.LockOSThread() 118 | 119 | for atomic.LoadUint32(&allDone) == 0 { 120 | { 121 | // process the current level in parallel 122 | process(g, currentLevel, nextLevel, visited) 123 | } 124 | 125 | // use a counter to see how many are still processing 126 | if atomic.AddInt32(&doneProcessingCounter, -1) == 0 { 127 | // the last one updates the Nodes size 128 | { 129 | nextLevel.Nodes = nextLevel.Nodes[:nextLevel.Head] 130 | nextLevel.Head = 0 131 | } 132 | 133 | // reset counters 134 | atomic.StoreInt32(&doneProcessingCounter, int32(procs)) 135 | waitForLast2.Add(1) 136 | // ... and release the routines 137 | waitForLast1.Done() 138 | } else { 139 | // wait for the last one finishing processing to setup for the next phase 140 | waitForLast1.Wait() 141 | } 142 | 143 | { 144 | // sort a part of the nextLevel in equal chunks 145 | blockSize := (len(nextLevel.Nodes) + procs - 1) / procs 146 | 147 | low := blockSize * gid 148 | high := low + blockSize 149 | if high > len(nextLevel.Nodes) { 150 | high = len(nextLevel.Nodes) 151 | } 152 | 153 | if low < len(nextLevel.Nodes) { 154 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 155 | // update the vertLevels 156 | // sentinels are sorted to the end of the array, 157 | // so we can break when we find the first one 158 | for _, v := range nextLevel.Nodes[low:high] { 159 | if v == SentinelNode { 160 | break 161 | } 162 | level[v] = levelNumber 163 | } 164 | } 165 | } 166 | 167 | // similarly to before, the last one finishing, does the setup for next phase 168 | if atomic.AddInt32(&doneProcessingCounter, -1) == 0 { 169 | { 170 | levelNumber++ 171 | currentLevel, nextLevel = nextLevel, currentLevel 172 | 173 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 174 | nextLevel.Head = 0 175 | 176 | // if we are done, set the allDone flag 177 | if len(currentLevel.Nodes) == 0 { 178 | atomic.StoreUint32(&allDone, 1) 179 | } 180 | } 181 | 182 | // reset counters 183 | atomic.StoreInt32(&doneProcessingCounter, int32(procs)) 184 | waitForLast1.Add(1) 185 | // release the hounds 186 | waitForLast2.Done() 187 | } else { 188 | // wait for the last one to finish 189 | waitForLast2.Wait() 190 | } 191 | } 192 | } 193 | 194 | for len(currentLevel.Nodes) > 0 { 195 | async.Run(procs, func(i int) { 196 | runtime.LockOSThread() 197 | process(g, currentLevel, nextLevel, visited) 198 | }) 199 | 200 | async.BlockIter(int(nextLevel.Head), procs, func(low, high int) { 201 | runtime.LockOSThread() 202 | zuint32.SortBYOB(nextLevel.Nodes[low:high], currentLevel.Nodes[low:high]) 203 | for _, neighbor := range nextLevel.Nodes[low:high] { 204 | if neighbor == SentinelNode { 205 | break 206 | } 207 | level[neighbor] = levelNumber 208 | } 209 | }) 210 | 211 | levelNumber++ 212 | currentLevel, nextLevel = nextLevel, currentLevel 213 | 214 | currentLevel.Nodes = currentLevel.Nodes[:currentLevel.Head] 215 | currentLevel.Head = 0 216 | 217 | nextLevel.Nodes = nextLevel.Nodes[:cap(nextLevel.Nodes)] 218 | nextLevel.Head = 0 219 | } 220 | 221 | for gid := 1; gid < procs; gid++ { 222 | go worker(gid) 223 | } 224 | worker(0) 225 | } 226 | 227 | type BusyGroup struct{ sema int32 } 228 | 229 | func (bg *BusyGroup) Add(v int) { atomic.AddInt32(&bg.sema, int32(v)) } 230 | func (bg *BusyGroup) Done() { bg.Add(-1) } 231 | 232 | func (bg *BusyGroup) Wait() { 233 | for atomic.LoadInt32(&bg.sema) != 0 { 234 | runtime.Gosched() 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Tale of Breadth First Search 2 | 3 | This repository contains code for articles: 4 | 5 | * [A Tale of BFS](https://medium.com/@egonelbre/a-tale-of-bfs-4ea1b8ab5eeb) 6 | * [A Tale of BFS - Going Parallel](https://medium.com/@egonelbre/a-tale-of-bfs-going-parallel-cdca89b9b295) 7 | 8 | All of this is based on http://github.com/sbromberger/gographs 9 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | *.dat 2 | sg-5m* -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/egonelbre/a-tale-of-bfs 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/edsrzf/mmap-go v1.0.0 7 | github.com/egonelbre/async v0.0.0-20180428125808-52e5b9c6d8ca 8 | github.com/egonelbre/exp v0.0.0-20200120101203-27ad60c1a24a 9 | github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac // indirect 10 | github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 // indirect 11 | github.com/gonum/integrate v0.0.0-20181209220457-a422b5c0fdf2 // indirect 12 | github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 // indirect 13 | github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9 // indirect 14 | github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9 // indirect 15 | github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b 16 | github.com/loov/csvcolumn v0.0.0-20200205102021-4bdc5d8d53d6 17 | github.com/loov/diagram v0.0.0-20200205133358-3c00c8e48506 18 | github.com/montanaflynn/stats v0.5.0 // indirect 19 | github.com/shawnsmithdev/zermelo v0.0.0-20190712023933-72892ed011e9 20 | golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 // indirect 21 | gonum.org/v1/gonum v0.6.2 22 | ) 23 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= 2 | github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= 3 | github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= 4 | github.com/egonelbre/async v0.0.0-20180428125808-52e5b9c6d8ca h1:itwMF54ep0rjYe4GtkjCQVfV3imgEOKfGBECeLC6kvk= 5 | github.com/egonelbre/async v0.0.0-20180428125808-52e5b9c6d8ca/go.mod h1:rl9mUiifNmlmfTguAMdyZnEBjDQngNWsxaMT4TTx7s8= 6 | github.com/egonelbre/exp v0.0.0-20200120101203-27ad60c1a24a h1:fsosP1ahdQ6N5lWTySyf1ELye2z9UwE8vsGdxH56/zA= 7 | github.com/egonelbre/exp v0.0.0-20200120101203-27ad60c1a24a/go.mod h1:Tjj9DHdvD+X9Qr+htYbYu5hLzMWRRDcV9bapSZH5JP0= 8 | github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= 9 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= 10 | github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac h1:Q0Jsdxl5jbxouNs1TQYt0gxesYMU4VXRbsTlgDloZ50= 11 | github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac/go.mod h1:P32wAyui1PQ58Oce/KYkOqQv8cVw1zAapXOl+dRFGbc= 12 | github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82 h1:EvokxLQsaaQjcWVWSV38221VAK7qc2zhaO17bKys/18= 13 | github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82/go.mod h1:PxC8OnwL11+aosOB5+iEPoV3picfs8tUpkVd0pDo+Kg= 14 | github.com/gonum/integrate v0.0.0-20181209220457-a422b5c0fdf2 h1:GUSkTcIe1SlregbHNUKbYDhBsS8lNgYfIp4S4cToUyU= 15 | github.com/gonum/integrate v0.0.0-20181209220457-a422b5c0fdf2/go.mod h1:pDgmNM6seYpwvPos3q+zxlXMsbve6mOIPucUnUOrI7Y= 16 | github.com/gonum/internal v0.0.0-20181124074243-f884aa714029 h1:8jtTdc+Nfj9AR+0soOeia9UZSvYBvETVHZrugUowJ7M= 17 | github.com/gonum/internal v0.0.0-20181124074243-f884aa714029/go.mod h1:Pu4dmpkhSyOzRwuXkOgAvijx4o+4YMUJJo9OvPYMkks= 18 | github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9 h1:7qnwS9+oeSiOIsiUMajT+0R7HR6hw5NegnKPmn/94oI= 19 | github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9/go.mod h1:XA3DeT6rxh2EAE789SSiSJNqxPaC0aE9J8NTOI0Jo/A= 20 | github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9 h1:V2IgdyerlBa/MxaEFRbV5juy/C3MGdj4ePi+g6ePIp4= 21 | github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9/go.mod h1:0EXg4mc1CNP0HCqCz+K4ts155PXIlUywf0wqN+GfPZw= 22 | github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b h1:fbskpz/cPqWH8VqkQ7LJghFkl2KPAiIFUHrTJ2O3RGk= 23 | github.com/gonum/stat v0.0.0-20181125101827-41a0da705a5b/go.mod h1:Z4GIJBJO3Wa4gD4vbwQxXXZ+WHmW6E9ixmNrwvs0iZs= 24 | github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= 25 | github.com/loov/csvcolumn v0.0.0-20200105141458-28902ec2389c h1:FeWdFjpm6+N+Utmd7tB3mVkDmtwa1drQy1jlFbzWYpY= 26 | github.com/loov/csvcolumn v0.0.0-20200105141458-28902ec2389c/go.mod h1:auQUQ1pduNvwT/wIxb7jGPuNyjqBBqhmkdXlnEJsekE= 27 | github.com/loov/csvcolumn v0.0.0-20200205102021-4bdc5d8d53d6 h1:Fx4wOrJBdw5R6l6QuPOX83R0xfTkj6jNh52WDt7MKKI= 28 | github.com/loov/csvcolumn v0.0.0-20200205102021-4bdc5d8d53d6/go.mod h1:auQUQ1pduNvwT/wIxb7jGPuNyjqBBqhmkdXlnEJsekE= 29 | github.com/loov/diagram v0.0.0-20200122154408-84ad7d84ad82 h1:WUMAclKei2sEhZzPZ8NGlCQPSOa8hdvuVb/Z18CYKEI= 30 | github.com/loov/diagram v0.0.0-20200122154408-84ad7d84ad82/go.mod h1:PsxpRCbuqmmAESOKmphOvBz4q6FLAFFPNdlXb6mcxN8= 31 | github.com/loov/diagram v0.0.0-20200205133358-3c00c8e48506 h1:uYsds/zvvTeY1+2Ne2NWDuSPQf+UAbaeJhHgHpU65/8= 32 | github.com/loov/diagram v0.0.0-20200205133358-3c00c8e48506/go.mod h1:PsxpRCbuqmmAESOKmphOvBz4q6FLAFFPNdlXb6mcxN8= 33 | github.com/montanaflynn/stats v0.5.0 h1:2EkzeTSqBB4V4bJwWrt5gIIrZmpJBcoIRGS2kWLgzmk= 34 | github.com/montanaflynn/stats v0.5.0/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= 35 | github.com/shawnsmithdev/zermelo v0.0.0-20190712023933-72892ed011e9 h1:QtguuPQV4FoI/CE63SfH0tYTOM4FTPWMYivPON3FVS0= 36 | github.com/shawnsmithdev/zermelo v0.0.0-20190712023933-72892ed011e9/go.mod h1:jy93IfRFTG4cjaKOrOqCc0zUIr81AufosGdjxZWIY5E= 37 | golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 38 | golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 39 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2 h1:y102fOLFqhV41b+4GPiJoa0k/x+pJcEi2/HB1Y5T6fU= 40 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 41 | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= 42 | golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0= 43 | golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 44 | golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 45 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 46 | gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= 47 | gonum.org/v1/gonum v0.6.2 h1:4r+yNT0+8SWcOkXP+63H2zQbN+USnC73cjGUxnDF94Q= 48 | gonum.org/v1/gonum v0.6.2/go.mod h1:9mxDZsDKxgMAuccQkewq682L+0eCu4dCN2yonUJTCLU= 49 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= 50 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= 51 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= 52 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 53 | -------------------------------------------------------------------------------- /graph/graph.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | type Node = uint32 4 | 5 | type Graph struct { 6 | List []Node 7 | Span []uint64 8 | } 9 | 10 | func (graph *Graph) Neighbors(n Node) []Node { 11 | start, end := graph.Span[n], graph.Span[n+1] 12 | return graph.List[start:end] 13 | } 14 | 15 | func (graph *Graph) Order() int { 16 | return len(graph.List) 17 | } 18 | -------------------------------------------------------------------------------- /graph/loaddat.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "os" 5 | "unsafe" 6 | 7 | mmap "github.com/edsrzf/mmap-go" 8 | ) 9 | 10 | func LoadDAT(filename string) (*Graph, error) { 11 | file, err := os.OpenFile(filename, os.O_RDONLY, 0644) 12 | if err != nil { 13 | return nil, err 14 | } 15 | defer file.Close() 16 | 17 | data, err := mmap.Map(file, mmap.RDONLY, 0) 18 | if err != nil { 19 | return nil, err 20 | } 21 | defer data.Unmap() 22 | 23 | var listlen uint64 24 | var spanlen uint64 25 | 26 | x := 0 27 | copy((*[8]byte)(unsafe.Pointer(&listlen))[:], data[x:x+8]) 28 | x += 8 29 | copy((*[8]byte)(unsafe.Pointer(&spanlen))[:], data[x:x+8]) 30 | x += 8 31 | 32 | graph := &Graph{} 33 | graph.List = make([]Node, listlen) 34 | graph.Span = make([]uint64, spanlen) 35 | 36 | listdata := ((*[1 << 40]Node)(unsafe.Pointer(&data[x]))) 37 | copy(graph.List, listdata[:]) 38 | x += 4 * int(listlen) 39 | 40 | spandata := ((*[1 << 40]uint64)(unsafe.Pointer(&data[x]))) 41 | copy(graph.Span, spandata[:]) 42 | 43 | return graph, nil 44 | } 45 | 46 | func WriteDat(filename string, g *Graph) error { 47 | return nil 48 | } 49 | -------------------------------------------------------------------------------- /graph/loadtext.go: -------------------------------------------------------------------------------- 1 | package graph 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "os" 7 | "strconv" 8 | ) 9 | 10 | func LoadText(filename string) (*Graph, error) { 11 | f, err := os.Open(filename) 12 | if err != nil { 13 | return nil, err 14 | } 15 | defer f.Close() 16 | return ParseText(f) 17 | } 18 | 19 | func ParseText(r io.Reader) (*Graph, error) { 20 | graph := &Graph{} 21 | graph.List = make([]Node, 0, 1<<20) 22 | graph.Span = make([]uint64, 0, 1<<20) 23 | 24 | scanner := bufio.NewScanner(r) 25 | 26 | for scanner.Scan() { 27 | line := scanner.Text() 28 | if line == "-----" { 29 | break 30 | } 31 | 32 | value, err := strconv.ParseUint(line, 10, 32) 33 | if err != nil { 34 | return nil, err 35 | } 36 | 37 | graph.Span = append(graph.Span, uint64(value-1)) 38 | } 39 | 40 | for scanner.Scan() { 41 | line := scanner.Text() 42 | 43 | value, err := strconv.ParseUint(line, 10, 32) 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | graph.List = append(graph.List, Node(value-1)) 49 | } 50 | 51 | return graph, nil 52 | } 53 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "reflect" 9 | "regexp" 10 | "runtime" 11 | "runtime/debug" 12 | "sort" 13 | "strings" 14 | "time" 15 | 16 | "github.com/egonelbre/a-tale-of-bfs/graph" 17 | "github.com/egonelbre/exp/qpc" 18 | "github.com/gonum/stat" 19 | "gonum.org/v1/gonum/floats" 20 | 21 | s00_baseline "github.com/egonelbre/a-tale-of-bfs/00_baseline" 22 | s01_reuse_level "github.com/egonelbre/a-tale-of-bfs/01_reuse_level" 23 | s02_sort "github.com/egonelbre/a-tale-of-bfs/02_sort" 24 | s03_inline_sort "github.com/egonelbre/a-tale-of-bfs/03_inline_sort" 25 | s04_radix_sort "github.com/egonelbre/a-tale-of-bfs/04_radix_sort" 26 | s05_lift_level "github.com/egonelbre/a-tale-of-bfs/05_lift_level" 27 | 28 | s06_ordering "github.com/egonelbre/a-tale-of-bfs/06_ordering" 29 | s07_fused "github.com/egonelbre/a-tale-of-bfs/07_fused" 30 | s07_fused_if "github.com/egonelbre/a-tale-of-bfs/07_fused_if" 31 | s08_cuckoo "github.com/egonelbre/a-tale-of-bfs/08_cuckoo" 32 | 33 | s09_unroll_4 "github.com/egonelbre/a-tale-of-bfs/09_unroll_4" 34 | s09_unroll_8 "github.com/egonelbre/a-tale-of-bfs/09_unroll_8" 35 | s09_unroll_8_4 "github.com/egonelbre/a-tale-of-bfs/09_unroll_8_4" 36 | 37 | s10_parallel "github.com/egonelbre/a-tale-of-bfs/10_parallel" 38 | s10_parchan "github.com/egonelbre/a-tale-of-bfs/10_parchan" 39 | s11_frontier "github.com/egonelbre/a-tale-of-bfs/11_frontier" 40 | s12_almost "github.com/egonelbre/a-tale-of-bfs/12_almost" 41 | s13_marking "github.com/egonelbre/a-tale-of-bfs/13_marking" 42 | 43 | s14_early_2 "github.com/egonelbre/a-tale-of-bfs/14_early_2" 44 | s14_early_3 "github.com/egonelbre/a-tale-of-bfs/14_early_3" 45 | s14_early_4 "github.com/egonelbre/a-tale-of-bfs/14_early_4" 46 | s14_early_r "github.com/egonelbre/a-tale-of-bfs/14_early_r" 47 | 48 | s15_worker "github.com/egonelbre/a-tale-of-bfs/15_worker" 49 | s16_busy "github.com/egonelbre/a-tale-of-bfs/16_busy" 50 | ) 51 | 52 | var ( 53 | cold = flag.Bool("cold", false, "also include cold run") 54 | run = flag.String("run", "", "filter approaches") 55 | N = flag.Int("N", 10, "benchmark iterations") 56 | ) 57 | 58 | type IterateFn func(g *graph.Graph, source graph.Node, levels []int) 59 | type IterateFnParallel func(g *graph.Graph, source graph.Node, levels []int, procs int) 60 | 61 | func IterateParallel(procs int, iterate IterateFnParallel) IterateFn { 62 | return func(g *graph.Graph, source graph.Node, levels []int) { 63 | iterate(g, source, levels, procs) 64 | } 65 | } 66 | 67 | func EmptyRun(g *graph.Graph, source graph.Node, iterate IterateFn) { 68 | levels := make([]int, g.Order()) 69 | debug.SetGCPercent(0) 70 | runtime.GC() 71 | { 72 | iterate(g, source, levels) 73 | } 74 | debug.SetGCPercent(100) 75 | runtime.GC() 76 | } 77 | 78 | func Benchmark(g *graph.Graph, source graph.Node, iterate IterateFn, N int) []float64 { 79 | timings := []float64{} 80 | for k := 0; k < N; k++ { 81 | var start, stop qpc.Count 82 | levels := make([]int, g.Order()) 83 | { 84 | debug.SetGCPercent(0) 85 | runtime.GC() 86 | { 87 | start = qpc.Now() 88 | iterate(g, source, levels) 89 | stop = qpc.Now() 90 | } 91 | debug.SetGCPercent(100) 92 | runtime.GC() 93 | } 94 | timings = append(timings, stop.Sub(start).Duration().Seconds()) 95 | } 96 | 97 | return timings 98 | } 99 | 100 | func Test(g *graph.Graph, name string, source graph.Node, iterate IterateFn, expected []int) { 101 | levels := make([]int, g.Order()) 102 | 103 | done := make(chan struct{}) 104 | go func() { 105 | iterate(g, source, levels) 106 | done <- struct{}{} 107 | }() 108 | 109 | select { 110 | case <-done: 111 | case <-time.After(time.Second): 112 | fmt.Fprintln(os.Stderr, "Locked ", name) 113 | return 114 | } 115 | 116 | levelCounts := []int{} 117 | for _, level := range levels { 118 | if level >= len(levelCounts) { 119 | levelCounts = append(levelCounts, make([]int, level-len(levelCounts)+1)...) 120 | } 121 | levelCounts[level]++ 122 | } 123 | 124 | if !reflect.DeepEqual(levelCounts, expected) { 125 | fmt.Fprintln(os.Stderr, "Invalid ", name, "got", levelCounts, "exp", expected) 126 | } 127 | } 128 | 129 | func Stats(timings []float64) string { 130 | sort.Float64s(timings) 131 | 132 | min := floats.Min(timings) 133 | max := floats.Max(timings) 134 | mean, variance := stat.MeanStdDev(timings, nil) 135 | q := stat.Quantile(0.5, stat.Empirical, timings, nil) 136 | 137 | ms := func(v float64) string { 138 | return fmt.Sprintf("%.2f", v*1000) 139 | } 140 | 141 | return fmt.Sprintf("%v\t%v\t%v\t%v\t%v", ms(q), ms(mean), ms(variance), ms(min), ms(max)) 142 | } 143 | 144 | func main() { 145 | runtime.LockOSThread() 146 | flag.Parse() 147 | 148 | type Dataset struct { 149 | Name string 150 | Graph *graph.Graph 151 | } 152 | 153 | var datasets []Dataset 154 | for _, filename := range flag.Args() { 155 | fmt.Fprintln(os.Stderr, "# Loading dataset ", filename) 156 | var g *graph.Graph 157 | var err error 158 | switch filepath.Ext(filename) { 159 | case ".dat": 160 | g, err = graph.LoadDAT(filename) 161 | case ".txt": 162 | g, err = graph.LoadText(filename) 163 | default: 164 | fmt.Fprintln(os.Stderr, "unknown file format: "+filename) 165 | os.Exit(1) 166 | } 167 | 168 | if err != nil { 169 | fmt.Fprintln(os.Stderr, err) 170 | os.Exit(1) 171 | } 172 | 173 | datasets = append(datasets, Dataset{ 174 | Name: removeExt(filepath.Base(filename)), 175 | Graph: g, 176 | }) 177 | } 178 | 179 | const SOURCE = graph.Node(2) 180 | 181 | g10k, err := graph.LoadText("data/sg-10k-250k.txt") 182 | if err != nil { 183 | fmt.Fprintln(os.Stderr, err) 184 | os.Exit(1) 185 | } 186 | 187 | max := runtime.GOMAXPROCS(-1) 188 | maxs := fmt.Sprintf("%dx", max) 189 | 190 | iterators := []struct { 191 | Name string 192 | Iterate IterateFn 193 | Skip bool 194 | }{ 195 | {"baseline", s00_baseline.BreadthFirst, false}, 196 | {"reuse level", s01_reuse_level.BreadthFirst, false}, 197 | {"sort", s02_sort.BreadthFirst, false}, 198 | {"inline sort", s03_inline_sort.BreadthFirst, false}, 199 | {"radix sort", s04_radix_sort.BreadthFirst, false}, 200 | {"lift level", s05_lift_level.BreadthFirst, false}, 201 | 202 | {"ordering", s06_ordering.BreadthFirst, false}, 203 | {"fused", s07_fused.BreadthFirst, false}, 204 | {"fused if", s07_fused_if.BreadthFirst, false}, 205 | {"cuckoo", s08_cuckoo.BreadthFirst, true}, 206 | 207 | {"unroll 4", s09_unroll_4.BreadthFirst, false}, 208 | {"unroll 8", s09_unroll_8.BreadthFirst, false}, 209 | {"unroll 8 4", s09_unroll_8_4.BreadthFirst, false}, 210 | 211 | {"parallel", s10_parallel.BreadthFirst, true}, 212 | {"parchan 4x", IterateParallel(4, s10_parchan.BreadthFirst), false}, 213 | {"parchan " + maxs, IterateParallel(max, s10_parchan.BreadthFirst), false}, 214 | 215 | {"frontier 4x", IterateParallel(4, s11_frontier.BreadthFirst), false}, 216 | {"frontier " + maxs, IterateParallel(max, s11_frontier.BreadthFirst), false}, 217 | 218 | {"almost 4x", IterateParallel(4, s12_almost.BreadthFirst), false}, 219 | {"almost " + maxs, IterateParallel(max, s12_almost.BreadthFirst), false}, 220 | 221 | {"marking 4x", IterateParallel(4, s13_marking.BreadthFirst), false}, 222 | {"marking " + maxs, IterateParallel(max, s13_marking.BreadthFirst), false}, 223 | 224 | {"early2 4x", IterateParallel(4, s14_early_2.BreadthFirst), false}, 225 | {"early2 " + maxs, IterateParallel(max, s14_early_2.BreadthFirst), false}, 226 | 227 | {"early3 4x", IterateParallel(4, s14_early_3.BreadthFirst), false}, 228 | {"early3 " + maxs, IterateParallel(max, s14_early_3.BreadthFirst), false}, 229 | 230 | {"early4 4x", IterateParallel(4, s14_early_4.BreadthFirst), false}, 231 | {"early4 " + maxs, IterateParallel(max, s14_early_4.BreadthFirst), false}, 232 | 233 | {"earlyR 4x", IterateParallel(4, s14_early_r.BreadthFirst), false}, 234 | {"earlyR " + maxs, IterateParallel(max, s14_early_r.BreadthFirst), false}, 235 | 236 | {"worker 4x", IterateParallel(4, s15_worker.BreadthFirst), false}, 237 | {"worker " + maxs, IterateParallel(max, s15_worker.BreadthFirst), false}, 238 | 239 | {"busy 4x", IterateParallel(4, s16_busy.BreadthFirst), false}, 240 | {"busy " + maxs, IterateParallel(max, s16_busy.BreadthFirst), false}, 241 | } 242 | 243 | for _, it := range iterators { 244 | Test(g10k, it.Name, SOURCE, it.Iterate, []int{490000, 1, 55, 2416, 7528}) 245 | } 246 | 247 | rx := regexp.MustCompile(*run) 248 | 249 | //w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0) 250 | // defer w.Flush() 251 | 252 | w := os.Stdout 253 | fmt.Fprintf(w, "dataset\tapproach\tmed\tavg\tvar\tmin\tmax\n") 254 | for _, dataset := range datasets { 255 | fmt.Fprintln(os.Stderr, "# Dataset", dataset.Name) 256 | for _, it := range iterators { 257 | if *run != "" && !rx.MatchString(it.Name) { 258 | continue 259 | } 260 | 261 | fmt.Fprint(os.Stderr, " > ", it.Name, "\t") 262 | 263 | if *cold { 264 | EmptyRun(dataset.Graph, SOURCE, it.Iterate) 265 | } 266 | 267 | n := *N 268 | if it.Skip { 269 | n = 1 270 | } 271 | 272 | timings := Benchmark(dataset.Graph, SOURCE, it.Iterate, n) 273 | stats := Stats(timings) 274 | fmt.Fprintln(os.Stderr, stats) 275 | fmt.Fprintf(w, "%v\t%v\t%v\n", dataset.Name, it.Name, stats) 276 | } 277 | } 278 | fmt.Fprint(os.Stderr, "\n") 279 | } 280 | 281 | func removeExt(name string) string { 282 | p := strings.Index(name, ".") 283 | if p < 0 { 284 | return name 285 | } 286 | return name[:p] 287 | } 288 | -------------------------------------------------------------------------------- /plot/.gitignore: -------------------------------------------------------------------------------- 1 | *.svg 2 | -------------------------------------------------------------------------------- /plot/convert.sh: -------------------------------------------------------------------------------- 1 | ls -1 *.svg | xargs -I "{}" "c:\Program Files\Inkscape\inkscape.exe" -e "png/{}.png" "{}" -------------------------------------------------------------------------------- /plot/plot.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "image/color" 6 | "io" 7 | "io/ioutil" 8 | "os" 9 | 10 | "github.com/loov/csvcolumn" 11 | "github.com/loov/diagram" 12 | ) 13 | 14 | func main() { 15 | xeon, xeonerr := ParseFile("../results/Linux-Xeon-E5-2670v3.txt") 16 | wii7, wii7err := ParseFile("../results/Win-i7-2820QM.txt") 17 | if xeonerr != nil || wii7err != nil { 18 | fmt.Fprintln(os.Stderr, xeonerr) 19 | fmt.Fprintln(os.Stderr, wii7err) 20 | os.Exit(1) 21 | } 22 | xeon = xeon.Dataset("friendster") 23 | wii7 = wii7.Dataset("sg-5m-100m") 24 | 25 | baseline := Line{"baseline", wii7.A("baseline"), xeon.A("baseline")} 26 | reuse := Line{"reuse level", wii7.A("reuse level"), xeon.A("reuse level")} 27 | sort := Line{"sort", wii7.A("sort"), xeon.A("sort")} 28 | sort_inline := Line{"inline sort", wii7.A("inline sort"), xeon.A("inline sort")} 29 | sort_radix := Line{"radix sort", wii7.A("radix sort"), xeon.A("radix sort")} 30 | lift_level := Line{"lift level", wii7.A("lift level"), xeon.A("lift level")} 31 | ordering := Line{"ordering", wii7.A("ordering"), xeon.A("ordering")} 32 | fused := Line{"fused", wii7.A("fused"), xeon.A("fused")} 33 | fusedif := Line{"fused if", wii7.A("fused if"), xeon.A("fused if")} 34 | cuckoo := Line{"cuckoo", wii7.A("cuckoo"), xeon.A("cuckoo")} 35 | unroll_4 := Line{"unroll 4", wii7.A("unroll 4"), xeon.A("unroll 4")} 36 | unroll_8 := Line{"unroll 8", wii7.A("unroll 8"), xeon.A("unroll 8")} 37 | unroll_8_4 := Line{"unroll 8 4", wii7.A("unroll 8 4"), xeon.A("unroll 8 4")} 38 | parallel := Line{"parallel", wii7.A("parallel"), xeon.A("parallel")} 39 | 40 | frontier_4x := Line{"frontier", wii7.A("frontier 4x"), xeon.A("frontier 4x")} 41 | almost_4x := Line{"almost", wii7.A("almost 4x"), xeon.A("almost 4x")} 42 | marking_4x := Line{"marking", wii7.A("marking 4x"), xeon.A("marking 4x")} 43 | early2_4x := Line{"early2", wii7.A("early2 4x"), xeon.A("early2 4x")} 44 | early3_4x := Line{"early3", wii7.A("early3 4x"), xeon.A("early3 4x")} 45 | early4_4x := Line{"early4", wii7.A("early4 4x"), xeon.A("early4 4x")} 46 | earlyR_4x := Line{"earlyR", wii7.A("earlyR 4x"), xeon.A("earlyR 4x")} 47 | worker_4x := Line{"worker", wii7.A("worker 4x"), xeon.A("worker 4x")} 48 | busy_4x := Line{"busy", wii7.A("busy 4x"), xeon.A("busy 4x")} 49 | 50 | frontier_48x := Line{"frontier", wii7.A("frontier 8x"), xeon.A("frontier 48x")} 51 | almost_48x := Line{"almost", wii7.A("almost 8x"), xeon.A("almost 48x")} 52 | marking_48x := Line{"marking", wii7.A("marking 8x"), xeon.A("marking 48x")} 53 | early2_48x := Line{"early2", wii7.A("early2 8x"), xeon.A("early2 48x")} 54 | early3_48x := Line{"early3", wii7.A("early3 8x"), xeon.A("early3 48x")} 55 | early4_48x := Line{"early4", wii7.A("early4 8x"), xeon.A("early4 48x")} 56 | earlyR_48x := Line{"earlyR", wii7.A("earlyR 8x"), xeon.A("earlyR 48x")} 57 | worker_48x := Line{"worker", wii7.A("worker 8x"), xeon.A("worker 48x")} 58 | busy_48x := Line{"busy", wii7.A("busy 8x"), xeon.A("busy 48x")} 59 | 60 | Plot("00-baseline.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 61 | baseline, 62 | ) 63 | 64 | Plot("01-reuse-levels.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 65 | baseline, 66 | reuse, 67 | ) 68 | 69 | Plot("02-sort.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 70 | baseline, 71 | reuse, 72 | sort, 73 | ) 74 | 75 | Plot("03-sort-inline.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 76 | baseline, 77 | reuse, 78 | sort, 79 | sort_inline, 80 | ) 81 | 82 | Plot("04-sort-radix.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 83 | baseline, 84 | reuse, 85 | sort, 86 | sort_inline, 87 | sort_radix, 88 | ) 89 | 90 | Plot("05-lift-level.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 91 | baseline, 92 | sort, 93 | sort_inline, 94 | sort_radix, 95 | lift_level, 96 | ) 97 | 98 | Plot("06.0-ordering.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 99 | baseline, 100 | sort_radix, 101 | lift_level, 102 | ordering, 103 | ) 104 | 105 | Plot("06.1-fusing.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 106 | baseline, 107 | sort_radix, 108 | lift_level, 109 | ordering, 110 | fused, 111 | fusedif, 112 | ) 113 | 114 | Plot("07-cuckoo.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 115 | baseline, 116 | ordering, 117 | cuckoo, 118 | ) 119 | 120 | Plot("08-unroll.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 121 | baseline, 122 | ordering, 123 | unroll_4, 124 | unroll_8, 125 | unroll_8_4, 126 | ) 127 | 128 | Plot("09-summary.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 129 | baseline, 130 | reuse, 131 | sort, 132 | sort_inline, 133 | sort_radix, 134 | lift_level, 135 | ordering, 136 | fused, 137 | fusedif, 138 | cuckoo, 139 | unroll_4, 140 | unroll_8, 141 | unroll_8_4, 142 | ) 143 | 144 | Plot("10-parallel.svg", "i7-2820QM | 5M nodes", "65M nodes | E5-2670v3", 145 | baseline, 146 | unroll_8, 147 | parallel, 148 | ) 149 | 150 | Plot("11-frontier-48x.svg", "8x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 48x", 151 | baseline, 152 | unroll_8, 153 | parallel, 154 | frontier_48x, 155 | ) 156 | 157 | Plot("12-almost-48x.svg", "8x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 48x", 158 | baseline, 159 | unroll_8, 160 | frontier_48x, 161 | almost_48x, 162 | marking_48x, 163 | ) 164 | 165 | Plot("13-early-48x.svg", "8x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 48x", 166 | baseline, 167 | unroll_8, 168 | frontier_48x, 169 | marking_48x, 170 | early2_48x, 171 | early3_48x, 172 | early4_48x, 173 | earlyR_48x, 174 | ) 175 | 176 | Plot("14-workers-48x.svg", "8x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 48x", 177 | baseline, 178 | unroll_8, 179 | frontier_48x, 180 | marking_48x, 181 | early4_48x, 182 | worker_48x, 183 | ) 184 | 185 | Plot("15-busy-48x.svg", "8x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 48x", 186 | baseline, 187 | unroll_8, 188 | frontier_48x, 189 | marking_48x, 190 | early4_48x, 191 | worker_48x, 192 | busy_48x, 193 | ) 194 | 195 | Plot("19-final_48x.svg", "8x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 48x", 196 | baseline, 197 | unroll_8, 198 | parallel, 199 | frontier_48x, 200 | almost_48x, 201 | marking_48x, 202 | early2_48x, 203 | early3_48x, 204 | early4_48x, 205 | earlyR_48x, 206 | worker_48x, 207 | busy_48x, 208 | ) 209 | 210 | Plot("19-final_4x.svg", "4x i7-2820QM | 5M nodes", "65M nodes | E5-2670v3 4x", 211 | baseline, 212 | unroll_8, 213 | parallel, 214 | frontier_4x, 215 | almost_4x, 216 | marking_4x, 217 | early2_4x, 218 | early3_4x, 219 | early4_4x, 220 | earlyR_4x, 221 | worker_4x, 222 | busy_4x, 223 | ) 224 | } 225 | 226 | func Plot(filename string, lefttitle, righttitle string, lines ...Line) { 227 | const ( 228 | head = 20 229 | sidewidth = 350 230 | height = 16 231 | textheight = 16 232 | textwidth = textheight * 12 233 | pad = 2 234 | 235 | sleft = 10000 236 | sright = 50000 237 | ) 238 | 239 | const margin = 30 240 | canvaswidth := float64(sidewidth*2 + textwidth + margin*2) 241 | canvasheight := float64(head + (height+2*pad)*len(lines) + margin*2) 242 | 243 | canvas := diagram.NewSVG(canvaswidth, canvasheight) 244 | r := canvas.Bounds().Shrink(diagram.Point{margin, margin}) 245 | 246 | inner := canvas.Context(r) 247 | base := inner.Layer(0) 248 | grid := inner.Layer(1) 249 | text := inner.Layer(2) 250 | 251 | var xl float64 = sidewidth 252 | var xr float64 = sidewidth + textwidth 253 | 254 | var left diagram.Rect 255 | left.Max.X = xl 256 | left.Max.Y = height 257 | 258 | var center diagram.Rect 259 | center.Min.X = xl 260 | center.Max.X = xr 261 | center.Max.Y = height 262 | 263 | var right diagram.Rect 264 | right.Min.X = xr 265 | right.Max.Y = height 266 | 267 | black := color.Gray16{0} 268 | //white := color.Gray16{0xffff} 269 | 270 | pleft := func(v float64) float64 { 271 | return xl - v*sidewidth/sleft 272 | } 273 | pright := func(v float64) float64 { 274 | return xr + v*sidewidth/sright 275 | } 276 | 277 | text.Text(lefttitle, diagram.Point{xl, -pad}, 278 | &diagram.Style{ 279 | Fill: black, 280 | Size: 14, 281 | Origin: diagram.Point{0, -1}, 282 | }) 283 | text.Text(righttitle, diagram.Point{xr, -pad}, 284 | &diagram.Style{ 285 | Fill: black, 286 | Size: 14, 287 | Origin: diagram.Point{0, -1}, 288 | }) 289 | 290 | for k := 0; k <= 10; k += 1 { 291 | x := pleft(float64(k) * 1000) 292 | grid.Poly(diagram.Ps( 293 | x, head, 294 | x, grid.Bounds().Size().Y, 295 | ), &diagram.Style{ 296 | Stroke: color.Gray16{0x4444}, 297 | Size: 1, 298 | }) 299 | 300 | grid.Text(fmt.Sprintf("%ds", k), 301 | diagram.Point{x, head}, 302 | &diagram.Style{ 303 | Fill: black, 304 | Size: 14, 305 | Origin: diagram.Point{0, 1}, 306 | }) 307 | } 308 | for k := 0; k <= 50; k += 5 { 309 | x := pright(float64(k) * 1000) 310 | grid.Poly(diagram.Ps( 311 | x, head, 312 | x, grid.Bounds().Size().Y, 313 | ), &diagram.Style{ 314 | Stroke: color.Gray16{0x4444}, 315 | Size: 1, 316 | }) 317 | 318 | grid.Text(fmt.Sprintf("%ds", k), 319 | diagram.Point{x, head}, 320 | &diagram.Style{ 321 | Fill: black, 322 | Size: 14, 323 | Origin: diagram.Point{0, 1}, 324 | }) 325 | } 326 | 327 | left = left.Offset(diagram.Point{Y: head}) 328 | center = center.Offset(diagram.Point{Y: head}) 329 | right = right.Offset(diagram.Point{Y: head}) 330 | 331 | for _, line := range lines { 332 | left = left.Offset(diagram.Point{Y: pad}) 333 | center = center.Offset(diagram.Point{Y: pad}) 334 | right = right.Offset(diagram.Point{Y: pad}) 335 | 336 | left.Min.X = pleft(line.Left.Median) 337 | right.Max.X = pright(line.Right.Median) 338 | 339 | y := center.Max.Y - pad 340 | 341 | base.Rect(left, &diagram.Style{Fill: black}) 342 | text.Text( 343 | fmt.Sprintf("%.2fs", line.Left.Median/1000), 344 | diagram.Point{ 345 | X: left.Max.X + pad, 346 | Y: y, 347 | }, &diagram.Style{ 348 | Fill: black, 349 | Size: textheight * 0.9, 350 | Origin: diagram.Point{-1, 1}, 351 | }) 352 | 353 | base.Rect(right, &diagram.Style{Fill: black}) 354 | text.Text( 355 | fmt.Sprintf("%.2fs", line.Right.Median/1000), 356 | diagram.Point{ 357 | X: right.Min.X - pad, 358 | Y: y, 359 | }, &diagram.Style{ 360 | Fill: black, 361 | Size: textheight * 0.9, 362 | Origin: diagram.Point{1, 1}, 363 | }) 364 | 365 | text.Text(line.Name, diagram.Point{ 366 | X: (center.Min.X + center.Max.X) / 2, 367 | Y: y, 368 | }, &diagram.Style{ 369 | Fill: black, 370 | Size: textheight, 371 | Font: "bold", 372 | Origin: diagram.Point{0, 1}, 373 | }) 374 | 375 | left = left.Offset(diagram.Point{Y: height + pad}) 376 | center = center.Offset(diagram.Point{Y: height + pad}) 377 | right = right.Offset(diagram.Point{Y: height + pad}) 378 | } 379 | 380 | ioutil.WriteFile(filename, canvas.Bytes(), 0644) 381 | } 382 | 383 | type Lines []Line 384 | 385 | type Line struct { 386 | Name string 387 | Left Measurement 388 | Right Measurement 389 | } 390 | 391 | type Entry struct { 392 | Dataset string 393 | Approach string 394 | } 395 | 396 | type Measurements []Measurement 397 | 398 | func (xs Measurements) Dataset(dataset string) Measurements { 399 | var rs Measurements 400 | for _, x := range xs { 401 | if x.Dataset == dataset { 402 | rs = append(rs, x) 403 | } 404 | } 405 | return rs 406 | } 407 | 408 | func (xs Measurements) E(dataset, approach string) Measurement { 409 | return xs.Entry(Entry{dataset, approach}) 410 | } 411 | 412 | func (xs Measurements) A(approach string) Measurement { 413 | for _, x := range xs { 414 | if x.Approach == approach { 415 | return x 416 | } 417 | } 418 | return Measurement{} 419 | } 420 | 421 | func (xs Measurements) Entry(e Entry) Measurement { 422 | for _, x := range xs { 423 | if x.Entry == e { 424 | return x 425 | } 426 | } 427 | return Measurement{} 428 | } 429 | 430 | type Measurement struct { 431 | Entry 432 | Median float64 433 | Average float64 434 | Stdev float64 435 | Min float64 436 | Max float64 437 | } 438 | 439 | func ParseFile(name string) (Measurements, error) { 440 | f, err := os.Open(name) 441 | if err != nil { 442 | return nil, err 443 | } 444 | defer f.Close() 445 | 446 | return ParseMeasurements(f) 447 | } 448 | 449 | func ParseMeasurements(in io.Reader) (Measurements, error) { 450 | data := csvcolumn.NewReader(in) 451 | data.LazyQuotes = true 452 | data.Comma = '\t' 453 | 454 | dataset, approach := data.String("dataset"), data.String("approach") 455 | med, avg, stdev := data.Float64("med"), data.Float64("avg"), data.Float64("stdev") 456 | min, max := data.Float64("min"), data.Float64("max") 457 | 458 | var xs Measurements 459 | for data.Next() && data.Err() == nil { 460 | var x Measurement 461 | x.Dataset = *dataset 462 | x.Approach = *approach 463 | x.Median = *med 464 | x.Average = *avg 465 | x.Stdev = *stdev 466 | x.Min = *min 467 | x.Max = *max 468 | xs = append(xs, x) 469 | } 470 | if err := data.Err(); err != nil { 471 | return nil, fmt.Errorf("failed to parse: %w", err) 472 | } 473 | return xs, nil 474 | } 475 | -------------------------------------------------------------------------------- /results/Linux-Xeon-E5-2670v3-B.txt: -------------------------------------------------------------------------------- 1 | dataset approach med avg stdev min max 2 | friendster baseline 66056.44 72684.60 16391.31 54459.00 92891.77 3 | friendster reuse level 118742.89 118773.26 667.13 117953.46 119586.15 4 | friendster sort 54892.10 53577.01 2907.00 49220.80 55201.89 5 | friendster inline sort 39738.94 41939.05 4113.76 38247.90 47618.78 6 | friendster radix sort 30852.09 36352.01 7403.04 29090.21 42755.96 7 | friendster lift level 28408.64 32566.40 5441.91 27416.99 38149.54 8 | friendster ordering 26649.19 32011.35 6584.18 25986.19 37990.87 9 | friendster fused 33350.19 36731.43 5937.43 30097.78 42032.09 10 | friendster fused if 31864.88 32945.22 1676.01 31298.16 34952.22 11 | friendster cuckoo 623705.70 623705.70 NaN 623705.70 623705.70 12 | friendster unroll 4 36492.91 34576.35 6355.53 25152.22 38834.61 13 | friendster unroll 8 28148.58 30747.41 3851.79 27272.57 35686.30 14 | friendster unroll 8 4 30066.80 31219.65 2445.16 28478.57 34036.23 15 | friendster parallel 383668.43 383668.43 NaN 383668.43 383668.43 16 | friendster frontier 4x 13759.30 17940.02 5091.50 13412.63 23263.36 17 | friendster frontier 48x 5330.17 8018.78 3931.12 5244.15 13623.59 18 | friendster almost 4x 11294.02 13924.79 3534.17 10868.55 18355.51 19 | friendster almost 48x 4980.14 7047.31 3915.05 4969.68 12914.66 20 | friendster marking 4x 10938.06 15639.17 5718.47 10581.83 21704.10 21 | friendster marking 48x 7835.65 13059.36 10099.10 5368.70 27767.35 22 | friendster early2 4x 13811.64 15993.55 4764.34 11748.36 22718.55 23 | friendster early2 48x 3630.76 3967.10 571.81 3612.02 4814.14 24 | friendster early3 4x 12380.37 15311.89 4042.85 11301.55 19145.16 25 | friendster early3 48x 3628.39 4880.60 2484.70 3614.34 8607.46 26 | friendster early4 4x 11927.32 14203.25 4609.51 11587.60 21107.60 27 | friendster early4 48x 4058.13 5903.28 3665.41 4045.59 11401.24 28 | friendster earlyR 4x 11201.27 14154.06 5629.92 11017.29 22583.47 29 | friendster earlyR 48x 5132.86 5848.91 1717.90 4324.60 8293.41 30 | friendster worker 4x 13968.84 14492.01 2754.84 10810.19 16814.71 31 | friendster worker 48x 6349.94 6430.52 2215.94 3632.97 9043.58 32 | friendster busy 4x 14281.88 16907.97 5133.08 11144.47 22470.01 33 | friendster busy 48x 7564.58 7592.57 2958.59 3631.64 10730.31 -------------------------------------------------------------------------------- /results/Linux-Xeon-E5-2670v3.txt: -------------------------------------------------------------------------------- 1 | dataset approach med avg stdev min max 2 | friendster baseline 48438.12 50974.16 5141.85 48135.96 58680.63 3 | friendster reuse level 111104.31 112062.02 2747.54 108760.23 115128.85 4 | friendster sort 50134.13 51561.39 4298.30 46128.68 55406.69 5 | friendster inline sort 44762.82 42564.77 6071.12 33500.97 46103.25 6 | friendster radix sort 32959.94 34476.40 2165.81 32283.81 36538.90 7 | friendster lift level 30838.93 31457.74 3238.32 27659.33 35522.88 8 | friendster ordering 27762.31 31311.70 5245.93 25946.44 36617.45 9 | friendster fused 28084.12 34262.61 7468.82 27530.32 41222.19 10 | friendster fused if 26352.97 31554.39 6818.55 25060.40 38366.43 11 | friendster cuckoo 597774.09 597774.09 NaN 597774.09 597774.09 12 | friendster unroll 4 29564.24 31405.14 5039.33 25901.10 37863.93 13 | friendster unroll 8 23971.01 29993.20 7254.38 23590.02 37512.79 14 | friendster unroll 8 4 26516.34 31044.48 6464.37 24529.69 37415.86 15 | friendster parallel 390401.24 390401.24 NaN 390401.24 390401.24 16 | friendster frontier 4x 15847.08 18416.70 4221.73 14062.60 23281.93 17 | friendster frontier 48x 5348.08 7824.89 4511.51 5285.74 14569.57 18 | friendster almost 4x 14344.07 18008.26 5756.84 11857.95 22982.05 19 | friendster almost 48x 5220.54 9256.83 5337.51 4339.75 15301.37 20 | friendster marking 4x 14264.15 15016.56 2431.74 11915.68 17202.66 21 | friendster marking 48x 4093.50 10264.42 8263.47 3764.12 21303.26 22 | friendster early2 4x 12544.99 14714.37 4168.67 11597.62 20802.83 23 | friendster early2 48x 4313.70 6391.07 4049.30 4305.95 12463.81 24 | friendster early3 4x 12477.08 14833.97 4070.13 11905.49 20762.94 25 | friendster early3 48x 4503.99 13027.83 16960.92 3873.29 38454.58 26 | friendster early4 4x 13819.66 15843.93 3337.68 12279.00 19343.62 27 | friendster early4 48x 3785.68 5004.56 2052.14 3778.76 8051.37 28 | friendster earlyR 4x 13070.19 15094.39 4316.65 11395.31 21253.15 29 | friendster earlyR 48x 6657.79 9676.80 6143.26 4309.19 18349.26 30 | friendster worker 4x 16936.37 17207.57 4316.08 11759.95 22278.65 31 | friendster worker 48x 7879.95 10213.65 7291.77 4189.89 20828.78 32 | friendster busy 4x 14524.43 15526.01 2856.74 11931.74 17825.14 33 | friendster busy 48x 8454.80 9230.34 3914.97 4001.90 12329.89 -------------------------------------------------------------------------------- /results/Mac-i5-5257U-B.txt: -------------------------------------------------------------------------------- 1 | dataset approach med avg stdev min max 2 | sg-10k-250k baseline 2.27 2.50 0.55 2.11 4.60 3 | sg-10k-250k reuse level 1.94 2.23 0.61 1.75 4.29 4 | sg-10k-250k sort 3.21 3.31 0.39 3.02 4.85 5 | sg-10k-250k inline sort 2.42 2.58 0.47 2.29 4.40 6 | sg-10k-250k radix sort 2.06 2.22 0.45 1.94 4.48 7 | sg-10k-250k lift level 2.12 2.28 0.45 1.99 3.96 8 | sg-10k-250k ordering 2.07 2.19 0.41 1.89 4.17 9 | sg-10k-250k fused 2.31 2.51 0.37 2.17 3.54 10 | sg-10k-250k fused if 2.30 2.48 0.48 2.07 4.31 11 | sg-10k-250k cuckoo 57.05 57.05 NaN 57.05 57.05 12 | sg-10k-250k unroll 4 2.16 2.39 0.47 1.96 3.88 13 | sg-10k-250k unroll 8 2.07 2.31 0.58 1.89 4.42 14 | sg-10k-250k unroll 8 4 1.95 2.15 0.52 1.83 4.11 15 | sg-10k-250k parallel 81.91 81.91 NaN 81.91 81.91 16 | sg-10k-250k frontier 4x 2.37 2.47 0.32 2.08 3.53 17 | sg-10k-250k frontier 4x 2.42 2.58 0.46 2.22 4.42 18 | sg-10k-250k almost 4x 3.04 3.22 0.51 2.67 5.14 19 | sg-10k-250k almost 4x 3.18 3.44 0.72 2.73 6.10 20 | sg-10k-250k marking 4x 3.22 3.48 0.67 2.81 5.94 21 | sg-10k-250k marking 4x 3.30 3.51 0.64 2.86 5.73 22 | sg-10k-250k early2 4x 3.12 3.30 0.57 2.69 5.16 23 | sg-10k-250k early2 4x 3.14 3.34 0.64 2.73 5.65 24 | sg-10k-250k early3 4x 3.10 3.37 0.64 2.82 5.84 25 | sg-10k-250k early3 4x 3.18 3.38 0.68 2.74 6.30 26 | sg-10k-250k early4 4x 3.30 3.60 0.78 2.80 6.19 27 | sg-10k-250k early4 4x 3.18 3.60 1.32 2.69 10.46 28 | sg-10k-250k earlyR 4x 3.26 3.53 0.83 2.64 6.40 29 | sg-10k-250k earlyR 4x 3.25 3.55 0.95 2.74 8.01 30 | sg-10k-250k worker 4x 3.35 3.69 0.93 2.98 7.43 31 | sg-10k-250k worker 4x 4.18 4.66 1.43 3.27 10.30 32 | sg-10k-250k busy 4x 4.13 7.14 11.69 3.23 78.24 33 | sg-10k-250k busy 4x 3.77 4.43 1.87 3.21 11.53 34 | sg-5m-100m baseline 1966.48 2012.61 183.37 1930.31 3139.00 35 | sg-5m-100m reuse level 3594.83 3627.56 77.74 3510.09 3860.04 36 | sg-5m-100m sort 1822.99 1839.46 81.24 1795.03 2368.41 37 | sg-5m-100m inline sort 1325.87 1328.26 17.73 1297.68 1376.18 38 | sg-5m-100m radix sort 964.44 966.77 14.57 946.20 1029.35 39 | sg-5m-100m lift level 934.59 936.92 14.36 910.43 986.49 40 | sg-5m-100m ordering 942.45 944.32 23.13 917.72 1026.27 41 | sg-5m-100m fused 1044.57 1049.23 16.00 1018.00 1095.65 42 | sg-5m-100m fused if 981.67 983.48 19.24 953.67 1065.77 43 | sg-5m-100m cuckoo 43654.09 43654.09 NaN 43654.09 43654.09 44 | sg-5m-100m unroll 4 855.97 858.89 13.68 838.69 894.49 45 | sg-5m-100m unroll 8 845.40 851.45 17.63 828.57 903.98 46 | sg-5m-100m unroll 8 4 856.72 886.46 139.23 840.09 1675.60 47 | sg-5m-100m parallel 38331.87 38331.87 NaN 38331.87 38331.87 48 | sg-5m-100m frontier 4x 557.50 581.94 65.93 536.93 777.55 49 | sg-5m-100m frontier 4x 556.46 566.70 43.81 530.48 784.88 50 | sg-5m-100m almost 4x 653.28 700.02 100.00 621.96 941.30 51 | sg-5m-100m almost 4x 652.93 679.28 73.62 624.37 931.68 52 | sg-5m-100m marking 4x 647.34 674.06 70.24 612.51 894.57 53 | sg-5m-100m marking 4x 649.10 664.83 66.89 610.25 948.04 54 | sg-5m-100m early2 4x 660.41 686.26 82.95 614.35 937.01 55 | sg-5m-100m early2 4x 650.56 678.35 73.10 626.00 967.78 56 | sg-5m-100m early3 4x 652.10 676.37 67.17 621.22 954.75 57 | sg-5m-100m early3 4x 664.82 676.11 51.47 622.41 933.84 58 | sg-5m-100m early4 4x 646.63 660.98 57.58 605.93 955.51 59 | sg-5m-100m early4 4x 645.25 658.95 53.98 613.73 924.16 60 | sg-5m-100m earlyR 4x 648.80 674.16 79.05 601.88 936.59 61 | sg-5m-100m earlyR 4x 643.22 659.43 50.54 622.43 915.88 62 | sg-5m-100m worker 4x 649.91 709.82 228.96 620.06 2153.89 63 | sg-5m-100m worker 4x 644.30 725.28 320.76 606.40 2610.59 64 | sg-5m-100m busy 4x 645.79 647.67 18.76 603.83 693.02 65 | sg-5m-100m busy 4x 647.65 665.70 64.41 599.43 928.16 66 | -------------------------------------------------------------------------------- /results/Mac-i5-5257U.txt: -------------------------------------------------------------------------------- 1 | dataset approach med avg stdev min max 2 | sg-10k-250k baseline 2.29 2.54 0.57 2.13 4.44 3 | sg-10k-250k reuse level 2.06 2.28 0.57 1.85 4.35 4 | sg-10k-250k sort 3.41 3.53 0.44 3.20 5.55 5 | sg-10k-250k inline sort 2.55 2.71 0.34 2.45 4.14 6 | sg-10k-250k radix sort 2.12 2.27 0.42 2.00 3.90 7 | sg-10k-250k lift level 2.08 2.21 0.38 1.97 3.79 8 | sg-10k-250k ordering 2.06 2.16 0.38 1.94 3.83 9 | sg-10k-250k fused 2.24 2.35 0.36 2.09 3.98 10 | sg-10k-250k fused if 2.25 2.37 0.38 2.12 4.03 11 | sg-10k-250k cuckoo 53.64 53.64 NaN 53.64 53.64 12 | sg-10k-250k unroll 4 2.11 2.24 0.41 1.97 4.06 13 | sg-10k-250k unroll 8 2.10 2.22 0.32 2.02 3.57 14 | sg-10k-250k unroll 8 4 2.13 2.25 0.36 2.03 3.87 15 | sg-10k-250k parallel 84.05 84.05 NaN 84.05 84.05 16 | sg-10k-250k frontier 4x 2.53 2.64 0.43 2.28 4.61 17 | sg-10k-250k frontier 4x 2.58 2.73 0.45 2.30 4.75 18 | sg-10k-250k almost 4x 3.35 3.54 0.52 3.00 5.30 19 | sg-10k-250k almost 4x 3.44 3.59 0.58 3.04 6.10 20 | sg-10k-250k marking 4x 3.35 3.66 0.71 2.96 5.60 21 | sg-10k-250k marking 4x 3.43 3.69 0.72 3.12 6.98 22 | sg-10k-250k early2 4x 3.45 3.74 0.77 3.13 6.47 23 | sg-10k-250k early2 4x 3.53 3.86 1.02 3.00 9.20 24 | sg-10k-250k early3 4x 3.35 3.74 0.81 3.02 7.01 25 | sg-10k-250k early3 4x 3.62 3.84 0.73 3.11 6.27 26 | sg-10k-250k early4 4x 3.50 3.81 0.80 2.87 6.21 27 | sg-10k-250k early4 4x 3.37 3.62 0.77 2.92 6.28 28 | sg-10k-250k earlyR 4x 3.32 3.72 1.08 2.91 8.16 29 | sg-10k-250k earlyR 4x 3.52 3.80 0.89 2.83 7.58 30 | sg-10k-250k worker 4x 3.72 3.97 0.93 3.03 7.56 31 | sg-10k-250k worker 4x 3.41 3.61 0.70 2.91 5.77 32 | sg-10k-250k busy 4x 3.35 3.64 0.80 2.85 6.82 33 | sg-10k-250k busy 4x 3.36 3.58 0.71 2.88 6.13 34 | sg-5m-100m baseline 1942.58 2107.67 570.57 1923.05 5033.87 35 | sg-5m-100m reuse level 3826.75 3876.82 90.81 3787.75 4167.88 36 | sg-5m-100m sort 1973.22 2000.54 75.11 1952.04 2301.81 37 | sg-5m-100m inline sort 1373.87 1374.72 13.13 1340.72 1412.07 38 | sg-5m-100m radix sort 977.88 978.03 15.27 954.01 1025.61 39 | sg-5m-100m lift level 940.96 945.33 16.41 922.40 1017.12 40 | sg-5m-100m ordering 957.60 958.79 18.43 928.02 1034.52 41 | sg-5m-100m fused 1037.03 1039.75 18.00 1015.85 1124.79 42 | sg-5m-100m fused if 981.70 985.00 14.24 964.32 1018.21 43 | sg-5m-100m cuckoo 42922.37 42922.37 NaN 42922.37 42922.37 44 | sg-5m-100m unroll 4 919.71 924.90 15.45 899.98 973.39 45 | sg-5m-100m unroll 8 910.97 912.88 13.13 892.17 957.60 46 | sg-5m-100m unroll 8 4 920.66 925.81 19.49 903.16 996.37 47 | sg-5m-100m parallel 37960.34 37960.34 NaN 37960.34 37960.34 48 | sg-5m-100m frontier 4x 604.32 613.50 38.08 577.85 829.07 49 | sg-5m-100m frontier 4x 606.91 640.49 75.65 584.13 854.87 50 | sg-5m-100m almost 4x 718.58 743.78 92.43 666.47 1061.34 51 | sg-5m-100m almost 4x 705.13 728.30 71.78 639.00 1016.37 52 | sg-5m-100m marking 4x 689.42 722.02 98.15 651.36 1117.22 53 | sg-5m-100m marking 4x 691.19 695.93 20.38 639.11 750.92 54 | sg-5m-100m early2 4x 690.43 722.76 92.30 653.33 1150.09 55 | sg-5m-100m early2 4x 692.50 714.50 89.02 661.01 1281.71 56 | sg-5m-100m early3 4x 695.23 716.28 74.65 661.64 1021.08 57 | sg-5m-100m early3 4x 703.60 734.62 84.60 672.21 1023.76 58 | sg-5m-100m early4 4x 690.95 704.77 49.97 657.17 956.26 59 | sg-5m-100m early4 4x 689.35 706.36 63.39 646.41 1001.34 60 | sg-5m-100m earlyR 4x 678.86 706.60 91.47 639.81 1106.42 61 | sg-5m-100m earlyR 4x 700.85 718.66 67.59 649.10 1016.22 62 | sg-5m-100m worker 4x 690.06 722.15 85.17 644.87 1009.40 63 | sg-5m-100m worker 4x 686.19 705.76 66.74 662.98 1032.80 64 | sg-5m-100m busy 4x 705.72 724.75 73.02 655.67 999.47 65 | sg-5m-100m busy 4x 687.81 750.30 203.49 655.42 1946.54 66 | -------------------------------------------------------------------------------- /results/Win-i7-2820QM-B.txt: -------------------------------------------------------------------------------- 1 | dataset approach med avg stdev min max 2 | sg-10k-250k baseline 2.66 2.66 0.08 2.51 2.98 3 | sg-10k-250k reuse level 2.10 2.12 0.07 2.03 2.41 4 | sg-10k-250k sort 3.65 3.72 0.25 3.38 4.70 5 | sg-10k-250k inline sort 2.86 2.87 0.09 2.62 3.05 6 | sg-10k-250k radix sort 2.51 2.51 0.10 2.34 2.73 7 | sg-10k-250k lift level 2.51 2.51 0.09 2.22 2.70 8 | sg-10k-250k ordering 2.55 2.55 0.08 2.35 2.75 9 | sg-10k-250k fused 2.83 2.84 0.11 2.62 3.32 10 | sg-10k-250k fused if 2.87 2.88 0.10 2.66 3.14 11 | sg-10k-250k cuckoo 58.35 58.35 NaN 58.35 58.35 12 | sg-10k-250k unroll 4 2.47 2.45 0.11 2.24 2.66 13 | sg-10k-250k unroll 8 2.38 2.40 0.12 2.19 2.82 14 | sg-10k-250k unroll 8 4 2.47 2.49 0.14 2.24 3.01 15 | sg-10k-250k parallel 83.54 83.54 NaN 83.54 83.54 16 | sg-10k-250k frontier 4x 3.11 3.17 0.27 2.48 3.96 17 | sg-10k-250k frontier 8x 4.37 4.36 0.31 3.45 5.01 18 | sg-10k-250k almost 4x 4.74 4.87 0.52 4.21 7.30 19 | sg-10k-250k almost 8x 7.02 7.07 0.49 6.08 8.06 20 | sg-10k-250k marking 4x 4.63 4.62 0.31 4.00 5.63 21 | sg-10k-250k marking 8x 6.62 6.74 0.46 6.06 8.71 22 | sg-10k-250k early2 4x 4.38 4.44 0.43 3.61 5.70 23 | sg-10k-250k early2 8x 6.44 6.44 0.37 5.67 7.28 24 | sg-10k-250k early3 4x 4.25 4.35 0.37 3.53 5.63 25 | sg-10k-250k early3 8x 6.31 6.45 0.55 5.67 7.90 26 | sg-10k-250k early4 4x 4.25 4.36 0.36 3.54 5.48 27 | sg-10k-250k early4 8x 6.38 6.46 0.44 5.66 7.37 28 | sg-10k-250k earlyR 4x 4.24 4.24 0.30 3.73 5.31 29 | sg-10k-250k earlyR 8x 6.30 6.36 0.42 5.57 7.35 30 | sg-10k-250k worker 4x 4.51 4.57 0.36 3.80 5.39 31 | sg-10k-250k worker 8x 6.89 6.99 0.39 6.25 7.92 32 | sg-10k-250k busy 4x 4.44 4.45 0.32 3.65 5.25 33 | sg-10k-250k busy 8x 6.96 7.14 0.56 6.38 9.74 34 | sg-5m-100m baseline 2326.92 2336.96 68.86 2236.46 2663.45 35 | sg-5m-100m reuse level 5992.09 5720.10 410.22 5187.24 6234.28 36 | sg-5m-100m sort 2173.41 2153.84 45.16 2062.96 2209.40 37 | sg-5m-100m inline sort 1609.74 1610.39 12.94 1583.70 1645.99 38 | sg-5m-100m radix sort 1283.12 1284.18 14.59 1259.18 1341.81 39 | sg-5m-100m lift level 1227.90 1233.71 25.54 1215.77 1363.26 40 | sg-5m-100m ordering 1228.75 1244.91 103.10 1219.74 1956.99 41 | sg-5m-100m fused 1401.10 1405.52 17.38 1383.37 1504.68 42 | sg-5m-100m fused if 1310.73 1311.40 8.73 1293.34 1334.43 43 | sg-5m-100m cuckoo 48735.95 48735.95 NaN 48735.95 48735.95 44 | sg-5m-100m unroll 4 1136.43 1137.57 7.29 1121.88 1156.27 45 | sg-5m-100m unroll 8 1191.13 1191.18 7.53 1178.71 1210.84 46 | sg-5m-100m unroll 8 4 1211.51 1212.08 8.34 1198.24 1228.07 47 | sg-5m-100m parallel 38329.41 38329.41 NaN 38329.41 38329.41 48 | sg-5m-100m frontier 4x 704.30 705.48 13.11 683.10 741.57 49 | sg-5m-100m frontier 8x 624.53 640.57 67.05 597.11 981.99 50 | sg-5m-100m almost 4x 777.50 773.54 67.10 667.20 1152.95 51 | sg-5m-100m almost 8x 609.03 622.48 40.47 596.67 847.27 52 | sg-5m-100m marking 4x 661.77 677.74 61.08 633.74 1029.16 53 | sg-5m-100m marking 8x 589.40 619.42 108.95 571.76 1263.18 54 | sg-5m-100m early2 4x 647.17 665.87 75.47 606.34 1123.71 55 | sg-5m-100m early2 8x 584.02 618.19 101.97 566.98 1211.43 56 | sg-5m-100m early3 4x 646.06 687.36 131.84 621.84 1345.62 57 | sg-5m-100m early3 8x 587.84 600.58 40.63 568.51 814.79 58 | sg-5m-100m early4 4x 643.86 659.89 65.02 601.59 1016.00 59 | sg-5m-100m early4 8x 579.56 601.00 92.14 561.09 1213.98 60 | sg-5m-100m earlyR 4x 646.08 657.21 78.05 606.22 1174.64 61 | sg-5m-100m earlyR 8x 584.80 614.72 103.95 562.87 1215.81 62 | sg-5m-100m worker 4x 645.22 663.34 77.20 611.33 1129.86 63 | sg-5m-100m worker 8x 587.34 613.15 55.09 567.05 804.60 64 | sg-5m-100m busy 4x 638.86 662.11 94.53 605.98 1144.39 65 | sg-5m-100m busy 8x 578.03 589.32 34.84 561.15 716.05 66 | -------------------------------------------------------------------------------- /results/Win-i7-2820QM.txt: -------------------------------------------------------------------------------- 1 | dataset approach med avg stdev min max 2 | sg-10k-250k baseline 2.65 2.66 0.09 2.53 3.04 3 | sg-10k-250k reuse level 2.20 2.21 0.08 2.11 2.54 4 | sg-10k-250k sort 3.92 3.94 0.25 3.54 4.66 5 | sg-10k-250k inline sort 2.96 2.96 0.10 2.73 3.19 6 | sg-10k-250k radix sort 2.55 2.55 0.11 2.30 2.83 7 | sg-10k-250k lift level 2.51 2.53 0.06 2.39 2.72 8 | sg-10k-250k ordering 2.50 2.50 0.09 2.31 2.69 9 | sg-10k-250k fused 2.83 2.83 0.09 2.59 3.08 10 | sg-10k-250k fused if 2.78 2.79 0.08 2.61 3.02 11 | sg-10k-250k cuckoo 58.55 58.55 NaN 58.55 58.55 12 | sg-10k-250k unroll 4 2.52 2.53 0.08 2.36 2.70 13 | sg-10k-250k unroll 8 2.52 2.51 0.06 2.32 2.61 14 | sg-10k-250k unroll 8 4 2.61 2.61 0.11 2.41 2.85 15 | sg-10k-250k parallel 76.47 76.47 NaN 76.47 76.47 16 | sg-10k-250k frontier 4x 3.24 3.30 0.29 2.72 4.15 17 | sg-10k-250k frontier 8x 4.27 4.32 0.52 3.54 6.02 18 | sg-10k-250k almost 4x 4.29 4.27 0.25 3.81 4.78 19 | sg-10k-250k almost 8x 6.42 6.51 0.60 5.65 8.46 20 | sg-10k-250k marking 4x 4.43 4.49 0.37 3.73 5.26 21 | sg-10k-250k marking 8x 6.27 6.37 0.42 5.65 7.47 22 | sg-10k-250k early2 4x 4.21 4.26 0.31 3.75 5.21 23 | sg-10k-250k early2 8x 6.34 6.44 0.42 5.77 7.72 24 | sg-10k-250k early3 4x 4.12 4.16 0.27 3.58 4.71 25 | sg-10k-250k early3 8x 6.06 6.12 0.37 5.48 7.06 26 | sg-10k-250k early4 4x 4.20 4.18 0.27 3.53 4.73 27 | sg-10k-250k early4 8x 6.12 6.09 0.42 5.23 6.81 28 | sg-10k-250k earlyR 4x 4.04 4.12 0.35 3.51 5.03 29 | sg-10k-250k earlyR 8x 5.96 6.05 0.38 5.34 7.50 30 | sg-10k-250k worker 4x 4.29 4.33 0.31 3.67 5.08 31 | sg-10k-250k worker 8x 6.47 6.56 0.41 5.82 7.87 32 | sg-10k-250k busy 4x 4.30 4.32 0.34 3.63 5.47 33 | sg-10k-250k busy 8x 6.51 6.59 0.39 5.88 7.94 34 | sg-5m-100m baseline 2292.08 2298.21 66.42 2172.67 2733.01 35 | sg-5m-100m reuse level 5790.63 5781.76 52.85 5674.25 5912.12 36 | sg-5m-100m sort 2397.26 2382.99 146.90 2194.03 2917.73 37 | sg-5m-100m inline sort 1760.52 1767.19 33.93 1707.87 1833.79 38 | sg-5m-100m radix sort 1375.41 1377.78 46.64 1305.86 1601.01 39 | sg-5m-100m lift level 1318.27 1318.62 33.83 1259.43 1401.71 40 | sg-5m-100m ordering 1313.54 1313.06 37.24 1259.54 1415.92 41 | sg-5m-100m fused 1520.57 1522.11 49.73 1415.77 1692.73 42 | sg-5m-100m fused if 1419.34 1423.09 42.67 1342.20 1553.48 43 | sg-5m-100m cuckoo 49337.60 49337.60 NaN 49337.60 49337.60 44 | sg-5m-100m unroll 4 1201.32 1191.87 23.35 1158.71 1257.70 45 | sg-5m-100m unroll 8 1211.04 1211.79 2.68 1207.43 1218.94 46 | sg-5m-100m unroll 8 4 1223.86 1223.96 3.33 1218.85 1232.55 47 | sg-5m-100m parallel 35598.58 35598.58 NaN 35598.58 35598.58 48 | sg-5m-100m frontier 4x 663.02 670.77 33.36 634.22 787.92 49 | sg-5m-100m frontier 8x 583.23 585.94 9.59 576.72 629.63 50 | sg-5m-100m almost 4x 692.37 693.19 15.62 660.49 744.84 51 | sg-5m-100m almost 8x 617.66 625.27 21.38 603.86 705.12 52 | sg-5m-100m marking 4x 680.43 709.96 78.56 649.22 1100.90 53 | sg-5m-100m marking 8x 604.50 638.66 116.90 587.57 1362.27 54 | sg-5m-100m early2 4x 675.38 715.86 132.40 620.73 1328.68 55 | sg-5m-100m early2 8x 600.41 607.38 25.61 589.97 749.52 56 | sg-5m-100m early3 4x 677.22 697.80 96.65 646.31 1213.31 57 | sg-5m-100m early3 8x 624.44 649.59 61.98 592.56 872.67 58 | sg-5m-100m early4 4x 674.57 688.37 46.41 635.43 863.84 59 | sg-5m-100m early4 8x 609.89 629.95 51.90 594.42 791.25 60 | sg-5m-100m earlyR 4x 668.55 695.82 84.53 637.18 1178.71 61 | sg-5m-100m earlyR 8x 604.18 641.39 114.46 588.25 1245.04 62 | sg-5m-100m worker 4x 668.81 697.51 101.58 641.45 1213.93 63 | sg-5m-100m worker 8x 608.95 628.67 56.06 587.22 881.53 64 | sg-5m-100m busy 4x 679.17 802.85 331.88 641.79 2373.94 65 | sg-5m-100m busy 8x 608.87 673.40 175.88 585.64 1299.28 66 | --------------------------------------------------------------------------------