├── .gitignore ├── go.mod ├── LICENSE ├── README.md ├── intintmap_test.go └── intintmap.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/brentp/intintmap 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Brent Pedersen - Bioinformatics 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Fast int64 -> int64 hash in golang. 2 | 3 | [![GoDoc](https://godoc.org/github.com/brentp/intintmap?status.svg)](https://godoc.org/github.com/brentp/intintmap) 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/brentp/intintmap)](https://goreportcard.com/report/github.com/brentp/intintmap) 5 | 6 | # intintmap 7 | 8 | import "github.com/brentp/intintmap" 9 | 10 | Package intintmap is a fast int64 key -> int64 value map. 11 | 12 | It is copied nearly verbatim from 13 | http://java-performance.info/implementing-world-fastest-java-int-to-int-hash-map/ . 14 | 15 | It interleaves keys and values in the same underlying array to improve locality. 16 | 17 | It is 2-5X faster than the builtin map: 18 | ``` 19 | BenchmarkIntIntMapFill 10 158436598 ns/op 20 | BenchmarkStdMapFill 5 312135474 ns/op 21 | BenchmarkIntIntMapGet10PercentHitRate 5000 243108 ns/op 22 | BenchmarkStdMapGet10PercentHitRate 5000 268927 ns/op 23 | BenchmarkIntIntMapGet100PercentHitRate 500 2249349 ns/op 24 | BenchmarkStdMapGet100PercentHitRate 100 10258929 ns/op 25 | ``` 26 | 27 | ## Usage 28 | 29 | ```go 30 | m := intintmap.New(32768, 0.6) 31 | m.Put(int64(1234), int64(-222)) 32 | m.Put(int64(123), int64(33)) 33 | 34 | v, ok := m.Get(int64(222)) 35 | v, ok := m.Get(int64(333)) 36 | 37 | m.Del(int64(222)) 38 | m.Del(int64(333)) 39 | 40 | fmt.Println(m.Size()) 41 | 42 | for k := range m.Keys() { 43 | fmt.Printf("key: %d\n", k) 44 | } 45 | 46 | for kv := range m.Items() { 47 | fmt.Printf("key: %d, value: %d\n", kv[0], kv[1]) 48 | } 49 | ``` 50 | 51 | #### type Map 52 | 53 | ```go 54 | type Map struct { 55 | } 56 | ``` 57 | 58 | Map is a map-like data-structure for int64s 59 | 60 | #### func New 61 | 62 | ```go 63 | func New(size int, fillFactor float64) *Map 64 | ``` 65 | New returns a map initialized with n spaces and uses the stated fillFactor. The 66 | map will grow as needed. 67 | 68 | #### func (*Map) Get 69 | 70 | ```go 71 | func (m *Map) Get(key int64) (int64, bool) 72 | ``` 73 | Get returns the value if the key is found. 74 | 75 | #### func (*Map) Put 76 | 77 | ```go 78 | func (m *Map) Put(key int64, val int64) 79 | ``` 80 | Put adds or updates key with value val. 81 | 82 | #### func (*Map) Del 83 | 84 | ```go 85 | func (m *Map) Del(key int64) 86 | ``` 87 | Del deletes a key and its value. 88 | 89 | #### func (*Map) Keys 90 | 91 | ```go 92 | func (m *Map) Keys() chan int64 93 | ``` 94 | Keys returns a channel for iterating all keys. 95 | 96 | #### func (*Map) Items 97 | 98 | ```go 99 | func (m *Map) Items() chan [2]int64 100 | ``` 101 | Items returns a channel for iterating all key-value pairs. 102 | 103 | 104 | #### func (*Map) Size 105 | 106 | ```go 107 | func (m *Map) Size() int 108 | ``` 109 | Size returns size of the map. 110 | -------------------------------------------------------------------------------- /intintmap_test.go: -------------------------------------------------------------------------------- 1 | package intintmap 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestMapSimple(t *testing.T) { 8 | m := New(10, 0.99) 9 | var i int64 10 | var v int64 11 | var ok bool 12 | 13 | // -------------------------------------------------------------------- 14 | // Put() and Get() 15 | 16 | for i = 0; i < 20000; i += 2 { 17 | m.Put(i, i) 18 | } 19 | for i = 0; i < 20000; i += 2 { 20 | if v, ok = m.Get(i); !ok || v != i { 21 | t.Errorf("didn't get expected value") 22 | } 23 | if _, ok = m.Get(i + 1); ok { 24 | t.Errorf("didn't get expected 'not found' flag") 25 | } 26 | } 27 | 28 | if m.Size() != int(20000/2) { 29 | t.Errorf("size (%d) is not right, should be %d", m.Size(), int(20000/2)) 30 | } 31 | 32 | // -------------------------------------------------------------------- 33 | // Keys() 34 | 35 | m0 := make(map[int64]int64, 1000) 36 | for i = 0; i < 20000; i += 2 { 37 | m0[i] = i 38 | } 39 | n := len(m0) 40 | 41 | for k := range m.Keys() { 42 | m0[k] = -k 43 | } 44 | if n != len(m0) { 45 | t.Errorf("get unexpected more keys") 46 | } 47 | 48 | for k, v := range m0 { 49 | if k != -v { 50 | t.Errorf("didn't get expected changed value") 51 | } 52 | } 53 | 54 | // -------------------------------------------------------------------- 55 | // Items() 56 | 57 | m0 = make(map[int64]int64, 1000) 58 | for i = 0; i < 20000; i += 2 { 59 | m0[i] = i 60 | } 61 | n = len(m0) 62 | 63 | for kv := range m.Items() { 64 | m0[kv[0]] = -kv[1] 65 | if kv[0] != kv[1] { 66 | t.Errorf("didn't get expected key-value pair") 67 | } 68 | } 69 | if n != len(m0) { 70 | t.Errorf("get unexpected more keys") 71 | } 72 | 73 | for k, v := range m0 { 74 | if k != -v { 75 | t.Errorf("didn't get expected changed value") 76 | } 77 | } 78 | 79 | // -------------------------------------------------------------------- 80 | // Del() 81 | 82 | for i = 0; i < 20000; i += 2 { 83 | m.Del(i) 84 | } 85 | for i = 0; i < 20000; i += 2 { 86 | if _, ok = m.Get(i); ok { 87 | t.Errorf("didn't get expected 'not found' flag") 88 | } 89 | if _, ok = m.Get(i + 1); ok { 90 | t.Errorf("didn't get expected 'not found' flag") 91 | } 92 | } 93 | 94 | // -------------------------------------------------------------------- 95 | // Put() and Get() 96 | 97 | for i = 0; i < 20000; i += 2 { 98 | m.Put(i, i*2) 99 | } 100 | for i = 0; i < 20000; i += 2 { 101 | if v, ok = m.Get(i); !ok || v != i*2 { 102 | t.Errorf("didn't get expected value") 103 | } 104 | if _, ok = m.Get(i + 1); ok { 105 | t.Errorf("didn't get expected 'not found' flag") 106 | } 107 | } 108 | 109 | } 110 | 111 | func TestMap(t *testing.T) { 112 | m := New(10, 0.6) 113 | var ok bool 114 | var v int64 115 | 116 | step := int64(61) 117 | 118 | var i int64 119 | m.Put(0, 12345) 120 | for i = 1; i < 100000000; i += step { 121 | m.Put(i, i+7) 122 | m.Put(-i, i-7) 123 | 124 | if v, ok = m.Get(i); !ok || v != i+7 { 125 | t.Errorf("expected %d as value for key %d, got %d", i+7, i, v) 126 | } 127 | if v, ok = m.Get(-i); !ok || v != i-7 { 128 | t.Errorf("expected %d as value for key %d, got %d", i-7, -i, v) 129 | } 130 | } 131 | for i = 1; i < 100000000; i += step { 132 | if v, ok = m.Get(i); !ok || v != i+7 { 133 | t.Errorf("expected %d as value for key %d, got %d", i+7, i, v) 134 | } 135 | if v, ok = m.Get(-i); !ok || v != i-7 { 136 | t.Errorf("expected %d as value for key %d, got %d", i-7, -i, v) 137 | } 138 | 139 | for j := i + 1; j < i+step; j++ { 140 | if v, ok = m.Get(j); ok { 141 | t.Errorf("expected 'not found' flag for %d, found %d", j, v) 142 | } 143 | } 144 | } 145 | 146 | if v, ok = m.Get(0); !ok || v != 12345 { 147 | t.Errorf("expected 12345 for key 0") 148 | } 149 | } 150 | 151 | const MAX = 999999999 152 | const STEP = 9534 153 | 154 | func fillIntIntMap(m *Map) { 155 | var j int64 156 | for j = 0; j < MAX; j += STEP { 157 | m.Put(j, -j) 158 | for k := j; k < j+16; k++ { 159 | m.Put(k, -k) 160 | } 161 | 162 | } 163 | } 164 | 165 | func fillStdMap(m map[int64]int64) { 166 | var j int64 167 | for j = 0; j < MAX; j += STEP { 168 | m[j] = -j 169 | for k := j; k < j+16; k++ { 170 | m[k] = -k 171 | } 172 | } 173 | } 174 | 175 | func BenchmarkIntIntMapFill(b *testing.B) { 176 | for i := 0; i < b.N; i++ { 177 | m := New(2048, 0.60) 178 | fillIntIntMap(m) 179 | } 180 | } 181 | 182 | func BenchmarkStdMapFill(b *testing.B) { 183 | for i := 0; i < b.N; i++ { 184 | m := make(map[int64]int64, 2048) 185 | fillStdMap(m) 186 | } 187 | } 188 | 189 | func BenchmarkIntIntMapGet10PercentHitRate(b *testing.B) { 190 | var j, k, v, sum int64 191 | var ok bool 192 | m := New(2048, 0.60) 193 | fillIntIntMap(m) 194 | for i := 0; i < b.N; i++ { 195 | sum = int64(0) 196 | for j = 0; j < MAX; j += STEP { 197 | for k = j; k < 10; k++ { 198 | if v, ok = m.Get(k); ok { 199 | sum += v 200 | } 201 | } 202 | } 203 | //log.Println("int int sum:", sum) 204 | } 205 | } 206 | 207 | func BenchmarkStdMapGet10PercentHitRate(b *testing.B) { 208 | var j, k, v, sum int64 209 | var ok bool 210 | m := make(map[int64]int64, 2048) 211 | fillStdMap(m) 212 | for i := 0; i < b.N; i++ { 213 | sum = int64(0) 214 | for j = 0; j < MAX; j += STEP { 215 | for k = j; k < 10; k++ { 216 | if v, ok = m[k]; ok { 217 | sum += v 218 | } 219 | } 220 | } 221 | //log.Println("map sum:", sum) 222 | } 223 | } 224 | 225 | func BenchmarkIntIntMapGet100PercentHitRate(b *testing.B) { 226 | var j, v, sum int64 227 | var ok bool 228 | m := New(2048, 0.60) 229 | fillIntIntMap(m) 230 | for i := 0; i < b.N; i++ { 231 | sum = int64(0) 232 | for j = 0; j < MAX; j += STEP { 233 | if v, ok = m.Get(j); ok { 234 | sum += v 235 | } 236 | } 237 | //log.Println("int int sum:", sum) 238 | } 239 | } 240 | 241 | func BenchmarkStdMapGet100PercentHitRate(b *testing.B) { 242 | var j, v, sum int64 243 | var ok bool 244 | m := make(map[int64]int64, 2048) 245 | fillStdMap(m) 246 | for i := 0; i < b.N; i++ { 247 | sum = int64(0) 248 | for j = 0; j < MAX; j += STEP { 249 | if v, ok = m[j]; ok { 250 | sum += v 251 | } 252 | } 253 | //log.Println("map sum:", sum) 254 | } 255 | } 256 | 257 | func BenchmarkStdMapRange(b *testing.B) { 258 | var j, v, sum int64 259 | m := make(map[int64]int64, 2048) 260 | fillStdMap(m) 261 | for i := 0; i < b.N; i++ { 262 | sum = int64(0) 263 | for j, v = range m { 264 | sum += j 265 | sum += v 266 | } 267 | //log.Println("map sum:", sum) 268 | } 269 | } 270 | 271 | func BenchmarkIntIntMapItems(b *testing.B) { 272 | var j, v, sum int64 273 | var it [2]int64 274 | m := New(2048, 0.60) 275 | fillIntIntMap(m) 276 | for i := 0; i < b.N; i++ { 277 | sum = int64(0) 278 | for it = range m.Items() { 279 | j, v = it[0], it[1] 280 | sum += j 281 | sum += v 282 | } 283 | //log.Println("int int sum:", sum) 284 | } 285 | } 286 | 287 | func BenchmarkIntIntMapEach(b *testing.B) { 288 | var sum int64 289 | m := New(2048, 0.60) 290 | fillIntIntMap(m) 291 | for i := 0; i < b.N; i++ { 292 | //sum = int64(0) 293 | m.Each(func(k, v int64) { 294 | sum += k 295 | sum += v 296 | }) 297 | 298 | } 299 | //log.Println("int int sum:", sum) 300 | } 301 | -------------------------------------------------------------------------------- /intintmap.go: -------------------------------------------------------------------------------- 1 | // Package intintmap is a fast int64 key -> int64 value map. 2 | // 3 | // It is copied nearly verbatim from http://java-performance.info/implementing-world-fastest-java-int-to-int-hash-map/ 4 | package intintmap 5 | 6 | import ( 7 | "math" 8 | ) 9 | 10 | // INT_PHI is for scrambling the keys 11 | const INT_PHI = 0x9E3779B9 12 | 13 | // FREE_KEY is the 'free' key 14 | const FREE_KEY = 0 15 | 16 | func phiMix(x int64) int64 { 17 | h := x * INT_PHI 18 | return h ^ (h >> 16) 19 | } 20 | 21 | // Map is a map-like data-structure for int64s 22 | type Map struct { 23 | data []int64 // interleaved keys and values 24 | fillFactor float64 25 | threshold int // we will resize a map once it reaches this size 26 | size int 27 | 28 | mask int64 // mask to calculate the original position 29 | mask2 int64 30 | 31 | hasFreeKey bool // do we have 'free' key in the map? 32 | freeVal int64 // value of 'free' key 33 | } 34 | 35 | func nextPowerOf2(x uint32) uint32 { 36 | if x == math.MaxUint32 { 37 | return x 38 | } 39 | 40 | if x == 0 { 41 | return 1 42 | } 43 | 44 | x-- 45 | x |= x >> 1 46 | x |= x >> 2 47 | x |= x >> 4 48 | x |= x >> 8 49 | x |= x >> 16 50 | 51 | return x + 1 52 | } 53 | 54 | func arraySize(exp int, fill float64) int { 55 | s := nextPowerOf2(uint32(math.Ceil(float64(exp) / fill))) 56 | if s < 2 { 57 | s = 2 58 | } 59 | return int(s) 60 | } 61 | 62 | // New returns a map initialized with n spaces and uses the stated fillFactor. 63 | // The map will grow as needed. 64 | func New(size int, fillFactor float64) *Map { 65 | if fillFactor <= 0 || fillFactor >= 1 { 66 | panic("FillFactor must be in (0, 1)") 67 | } 68 | if size <= 0 { 69 | panic("Size must be positive") 70 | } 71 | 72 | capacity := arraySize(size, fillFactor) 73 | return &Map{ 74 | data: make([]int64, 2*capacity), 75 | fillFactor: fillFactor, 76 | threshold: int(math.Floor(float64(capacity) * fillFactor)), 77 | mask: int64(capacity - 1), 78 | mask2: int64(2*capacity - 1), 79 | } 80 | } 81 | 82 | // Get returns the value if the key is found. 83 | func (m *Map) Get(key int64) (int64, bool) { 84 | if key == FREE_KEY { 85 | if m.hasFreeKey { 86 | return m.freeVal, true 87 | } 88 | return 0, false 89 | } 90 | 91 | ptr := (phiMix(key) & m.mask) << 1 92 | if ptr < 0 || ptr >= int64(len(m.data)) { // Check to help to compiler to eliminate a bounds check below. 93 | return 0, false 94 | } 95 | k := m.data[ptr] 96 | 97 | if k == FREE_KEY { // end of chain already 98 | return 0, false 99 | } 100 | if k == key { // we check FREE prior to this call 101 | return m.data[ptr+1], true 102 | } 103 | 104 | for { 105 | ptr = (ptr + 2) & m.mask2 106 | k = m.data[ptr] 107 | if k == FREE_KEY { 108 | return 0, false 109 | } 110 | if k == key { 111 | return m.data[ptr+1], true 112 | } 113 | } 114 | } 115 | 116 | // Put adds or updates key with value val. 117 | func (m *Map) Put(key int64, val int64) { 118 | if key == FREE_KEY { 119 | if !m.hasFreeKey { 120 | m.size++ 121 | } 122 | m.hasFreeKey = true 123 | m.freeVal = val 124 | return 125 | } 126 | 127 | ptr := (phiMix(key) & m.mask) << 1 128 | k := m.data[ptr] 129 | 130 | if k == FREE_KEY { // end of chain already 131 | m.data[ptr] = key 132 | m.data[ptr+1] = val 133 | if m.size >= m.threshold { 134 | m.rehash() 135 | } else { 136 | m.size++ 137 | } 138 | return 139 | } else if k == key { // overwrite existed value 140 | m.data[ptr+1] = val 141 | return 142 | } 143 | 144 | for { 145 | ptr = (ptr + 2) & m.mask2 146 | k = m.data[ptr] 147 | 148 | if k == FREE_KEY { 149 | m.data[ptr] = key 150 | m.data[ptr+1] = val 151 | if m.size >= m.threshold { 152 | m.rehash() 153 | } else { 154 | m.size++ 155 | } 156 | return 157 | } else if k == key { 158 | m.data[ptr+1] = val 159 | return 160 | } 161 | } 162 | 163 | } 164 | 165 | // Del deletes a key and its value. 166 | func (m *Map) Del(key int64) { 167 | if key == FREE_KEY { 168 | m.hasFreeKey = false 169 | m.size-- 170 | return 171 | } 172 | 173 | ptr := (phiMix(key) & m.mask) << 1 174 | k := m.data[ptr] 175 | 176 | if k == key { 177 | m.shiftKeys(ptr) 178 | m.size-- 179 | return 180 | } else if k == FREE_KEY { // end of chain already 181 | return 182 | } 183 | 184 | for { 185 | ptr = (ptr + 2) & m.mask2 186 | k = m.data[ptr] 187 | 188 | if k == key { 189 | m.shiftKeys(ptr) 190 | m.size-- 191 | return 192 | } else if k == FREE_KEY { 193 | return 194 | } 195 | 196 | } 197 | } 198 | 199 | func (m *Map) shiftKeys(pos int64) int64 { 200 | // Shift entries with the same hash. 201 | var last, slot int64 202 | var k int64 203 | var data = m.data 204 | for { 205 | last = pos 206 | pos = (last + 2) & m.mask2 207 | for { 208 | k = data[pos] 209 | if k == FREE_KEY { 210 | data[last] = FREE_KEY 211 | return last 212 | } 213 | 214 | slot = (phiMix(k) & m.mask) << 1 215 | if last <= pos { 216 | if last >= slot || slot > pos { 217 | break 218 | } 219 | } else { 220 | if last >= slot && slot > pos { 221 | break 222 | } 223 | } 224 | pos = (pos + 2) & m.mask2 225 | } 226 | data[last] = k 227 | data[last+1] = data[pos+1] 228 | } 229 | } 230 | 231 | func (m *Map) rehash() { 232 | newCapacity := len(m.data) * 2 233 | m.threshold = int(math.Floor(float64(newCapacity/2) * m.fillFactor)) 234 | m.mask = int64(newCapacity/2 - 1) 235 | m.mask2 = int64(newCapacity - 1) 236 | 237 | data := make([]int64, len(m.data)) // copy of original data 238 | copy(data, m.data) 239 | 240 | m.data = make([]int64, newCapacity) 241 | if m.hasFreeKey { // reset size 242 | m.size = 1 243 | } else { 244 | m.size = 0 245 | } 246 | 247 | var o int64 248 | for i := 0; i < len(data); i += 2 { 249 | o = data[i] 250 | if o != FREE_KEY { 251 | m.Put(o, data[i+1]) 252 | } 253 | } 254 | } 255 | 256 | // Size returns size of the map. 257 | func (m *Map) Size() int { 258 | return m.size 259 | } 260 | 261 | // Keys returns a channel for iterating all keys. 262 | func (m *Map) Keys() chan int64 { 263 | c := make(chan int64, 10) 264 | go func() { 265 | data := m.data 266 | var k int64 267 | 268 | if m.hasFreeKey { 269 | c <- FREE_KEY // value is m.freeVal 270 | } 271 | 272 | for i := 0; i < len(data); i += 2 { 273 | k = data[i] 274 | if k == FREE_KEY { 275 | continue 276 | } 277 | c <- k // value is data[i+1] 278 | } 279 | close(c) 280 | }() 281 | return c 282 | } 283 | 284 | // Items returns a channel for iterating all key-value pairs. 285 | func (m *Map) Items() chan [2]int64 { 286 | c := make(chan [2]int64, 10) 287 | go func() { 288 | data := m.data 289 | var k int64 290 | 291 | if m.hasFreeKey { 292 | c <- [2]int64{FREE_KEY, m.freeVal} 293 | } 294 | 295 | for i := 0; i < len(data); i += 2 { 296 | k = data[i] 297 | if k == FREE_KEY { 298 | continue 299 | } 300 | c <- [2]int64{k, data[i+1]} 301 | } 302 | close(c) 303 | }() 304 | return c 305 | } 306 | 307 | func (m *Map) Each(f func(k, v int64)) { 308 | data := m.data 309 | var k int64 310 | 311 | if m.hasFreeKey { 312 | f(FREE_KEY, m.freeVal) 313 | } 314 | 315 | for i := 0; i < len(data); i += 2 { 316 | k = data[i] 317 | if k == FREE_KEY { 318 | continue 319 | } 320 | f(k, data[i+1]) 321 | } 322 | } 323 | --------------------------------------------------------------------------------