├── .gitignore ├── go.mod ├── simd ├── match_amd64.go ├── match.s └── asm.go ├── fastrand_1.22.go ├── fastrand.go ├── bits_amd64.go ├── bits.go ├── go.sum ├── .github └── workflows │ └── ci-go-tests.yaml ├── README.md ├── map_fuzz_test.go ├── bits_test.go ├── map_bench_test.go ├── map.go ├── LICENSE └── map_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | **/.idea/ 2 | .vscode 3 | .run 4 | venv 5 | .DS_Store -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dolthub/swiss 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/dolthub/maphash v0.1.0 7 | github.com/stretchr/testify v1.8.1 8 | ) 9 | 10 | require ( 11 | github.com/davecgh/go-spew v1.1.1 // indirect 12 | github.com/pmezard/go-difflib v1.0.0 // indirect 13 | gopkg.in/yaml.v3 v3.0.1 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /simd/match_amd64.go: -------------------------------------------------------------------------------- 1 | // Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT. 2 | 3 | //go:build amd64 4 | 5 | package simd 6 | 7 | // MatchMetadata performs a 16-way probe of |metadata| using SSE instructions 8 | // nb: |metadata| must be an aligned pointer 9 | func MatchMetadata(metadata *[16]int8, hash int8) uint16 10 | -------------------------------------------------------------------------------- /simd/match.s: -------------------------------------------------------------------------------- 1 | // Code generated by command: go run asm.go -out match.s -stubs match_amd64.go. DO NOT EDIT. 2 | 3 | //go:build amd64 4 | 5 | #include "textflag.h" 6 | 7 | // func MatchMetadata(metadata *[16]int8, hash int8) uint16 8 | // Requires: SSE2, SSSE3 9 | TEXT ·MatchMetadata(SB), NOSPLIT, $0-18 10 | MOVQ metadata+0(FP), AX 11 | MOVBLSX hash+8(FP), CX 12 | MOVD CX, X0 13 | PXOR X1, X1 14 | PSHUFB X1, X0 15 | MOVOU (AX), X1 16 | PCMPEQB X1, X0 17 | PMOVMSKB X0, AX 18 | MOVW AX, ret+16(FP) 19 | RET 20 | -------------------------------------------------------------------------------- /fastrand_1.22.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build go1.22 16 | 17 | package swiss 18 | 19 | import ( 20 | "math/rand/v2" 21 | ) 22 | 23 | // randIntN returns a random number in the interval [0, n). 24 | func randIntN(n int) uint32 { 25 | return rand.Uint32N(uint32(n)) 26 | } 27 | -------------------------------------------------------------------------------- /fastrand.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !go1.22 16 | 17 | package swiss 18 | 19 | import ( 20 | _ "unsafe" 21 | ) 22 | 23 | //go:linkname fastrand runtime.fastrand 24 | func fastrand() uint32 25 | 26 | // randIntN returns a random number in the interval [0, n). 27 | func randIntN(n int) uint32 { 28 | return fastModN(fastrand(), uint32(n)) 29 | } 30 | -------------------------------------------------------------------------------- /bits_amd64.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build amd64 && !nosimd 16 | 17 | package swiss 18 | 19 | import ( 20 | "math/bits" 21 | _ "unsafe" 22 | 23 | "github.com/dolthub/swiss/simd" 24 | ) 25 | 26 | const ( 27 | groupSize = 16 28 | maxAvgGroupLoad = 14 29 | ) 30 | 31 | type bitset uint16 32 | 33 | func metaMatchH2(m *metadata, h h2) bitset { 34 | b := simd.MatchMetadata((*[16]int8)(m), int8(h)) 35 | return bitset(b) 36 | } 37 | 38 | func metaMatchEmpty(m *metadata) bitset { 39 | b := simd.MatchMetadata((*[16]int8)(m), empty) 40 | return bitset(b) 41 | } 42 | 43 | func nextMatch(b *bitset) (s uint32) { 44 | s = uint32(bits.TrailingZeros16(uint16(*b))) 45 | *b &= ^(1 << s) // clear bit |s| 46 | return 47 | } 48 | -------------------------------------------------------------------------------- /simd/asm.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build ignore 16 | // +build ignore 17 | 18 | package main 19 | 20 | import ( 21 | . "github.com/mmcloughlin/avo/build" 22 | . "github.com/mmcloughlin/avo/operand" 23 | ) 24 | 25 | func main() { 26 | ConstraintExpr("amd64") 27 | 28 | TEXT("MatchMetadata", NOSPLIT, "func(metadata *[16]int8, hash int8) uint16") 29 | Doc("MatchMetadata performs a 16-way probe of |metadata| using SSE instructions", 30 | "nb: |metadata| must be an aligned pointer") 31 | m := Mem{Base: Load(Param("metadata"), GP64())} 32 | h := Load(Param("hash"), GP32()) 33 | mask := GP32() 34 | 35 | x0, x1, x2 := XMM(), XMM(), XMM() 36 | MOVD(h, x0) 37 | PXOR(x1, x1) 38 | PSHUFB(x1, x0) 39 | MOVOU(m, x2) 40 | PCMPEQB(x2, x0) 41 | PMOVMSKB(x0, mask) 42 | 43 | Store(mask.As16(), ReturnIndex(0)) 44 | RET() 45 | Generate() 46 | } 47 | -------------------------------------------------------------------------------- /bits.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !amd64 || nosimd 16 | 17 | package swiss 18 | 19 | import ( 20 | "math/bits" 21 | "unsafe" 22 | ) 23 | 24 | const ( 25 | groupSize = 8 26 | maxAvgGroupLoad = 7 27 | 28 | loBits uint64 = 0x0101010101010101 29 | hiBits uint64 = 0x8080808080808080 30 | ) 31 | 32 | type bitset uint64 33 | 34 | func metaMatchH2(m *metadata, h h2) bitset { 35 | // https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord 36 | return hasZeroByte(castUint64(m) ^ (loBits * uint64(h))) 37 | } 38 | 39 | func metaMatchEmpty(m *metadata) bitset { 40 | return hasZeroByte(castUint64(m) ^ hiBits) 41 | } 42 | 43 | func nextMatch(b *bitset) uint32 { 44 | s := uint32(bits.TrailingZeros64(uint64(*b))) 45 | *b &= ^(1 << s) // clear bit |s| 46 | return s >> 3 // div by 8 47 | } 48 | 49 | func hasZeroByte(x uint64) bitset { 50 | return bitset(((x - loBits) & ^(x)) & hiBits) 51 | } 52 | 53 | func castUint64(m *metadata) uint64 { 54 | return *(*uint64)((unsafe.Pointer)(m)) 55 | } 56 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/dolthub/maphash v0.1.0 h1:bsQ7JsF4FkkWyrP3oCnFJgrCUAFbFf3kOl4L/QxPDyQ= 5 | github.com/dolthub/maphash v0.1.0/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4= 6 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 7 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 8 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 9 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 10 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 11 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 12 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 13 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 14 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 17 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 19 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 20 | -------------------------------------------------------------------------------- /.github/workflows/ci-go-tests.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | pull_request: 4 | branches: [ main ] 5 | workflow_dispatch: 6 | 7 | concurrency: 8 | group: test-${{ github.event.pull_request.number || github.ref }} 9 | cancel-in-progress: false 10 | 11 | jobs: 12 | test: 13 | strategy: 14 | matrix: 15 | go-version: [1.18.x, 1.19.x, 1.20.x] 16 | platform: [ubuntu-latest, macos-latest, windows-latest] 17 | runs-on: ${{ matrix.platform }} 18 | steps: 19 | - name: Install Go 20 | uses: actions/setup-go@v3 21 | with: 22 | go-version: ${{ matrix.go-version }} 23 | - name: Checkout code 24 | uses: actions/checkout@v3 25 | - name: Go Unittest (SIMD) 26 | run: go test ./... 27 | - name: Go Unittest (non-SIMD) 28 | run: go test -tags="nosimd" ./... 29 | fuzz: 30 | strategy: 31 | matrix: 32 | go-version: [1.18.x, 1.19.x, 1.20.x] 33 | platform: [ubuntu-latest] 34 | tags: [ "", "nosimd"] 35 | runs-on: ${{ matrix.platform }} 36 | steps: 37 | - name: Install Go 38 | uses: actions/setup-go@v3 39 | with: 40 | go-version: ${{ matrix.go-version }} 41 | - name: Checkout code 42 | uses: actions/checkout@v3 43 | - name: Fuzz Test All 44 | working-directory: . 45 | run: | 46 | files=$(grep -r --include='**_test.go' --files-with-matches 'func Fuzz' .) 47 | for file in ${files} 48 | do 49 | funcs=$(grep -oP 'func \K(Fuzz\w*)' $file) 50 | tag=${{ matrix.tags }} 51 | for func in ${funcs} 52 | do 53 | echo "Fuzzing $func in $file" 54 | parentDir=$(dirname $file) 55 | go test $parentDir -tags=$tag -run=$func -fuzz=$func -fuzztime=5s 56 | done 57 | done 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This Repository Is Archived 2 | 3 | This repository has been archived. Go 1.24+ uses swiss tables for its native maps. Please see [our announcement blog about our decision to archive `dolthub/swiss`](https://www.dolthub.com/blog/2025-03-07-archiving-the-dolthub-swiss-github-repository/). 4 | 5 | # SwissMap 6 | 7 | SwissMap is a hash table adapated from the "SwissTable" family of hash tables from [Abseil](https://abseil.io/blog/20180927-swisstables). It uses [AES](https://github.com/dolthub/maphash) instructions for fast-hashing and performs key lookups in parallel using [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) instructions. Because of these optimizations, SwissMap is faster and more memory efficient than Golang's built-in `map`. If you'd like to learn more about its design and implementation, check out this [blog post](https://www.dolthub.com/blog/2023-03-28-swiss-map/) announcing its release. 8 | 9 | 10 | ## Example 11 | 12 | SwissMap exposes the same interface as the built-in `map`. Give it a try using this [Go playground](https://go.dev/play/p/JPDC5WhYN7g). 13 | 14 | ```go 15 | package main 16 | 17 | import "github.com/dolthub/swiss" 18 | 19 | func main() { 20 | m := swiss.NewMap[string, int](42) 21 | 22 | m.Put("foo", 1) 23 | m.Put("bar", 2) 24 | 25 | m.Iter(func(k string, v int) (stop bool) { 26 | println("iter", k, v) 27 | return false // continue 28 | }) 29 | 30 | if x, ok := m.Get("foo"); ok { 31 | println(x) 32 | } 33 | if m.Has("bar") { 34 | x, _ := m.Get("bar") 35 | println(x) 36 | } 37 | 38 | m.Put("foo", -1) 39 | m.Delete("bar") 40 | 41 | if x, ok := m.Get("foo"); ok { 42 | println(x) 43 | } 44 | if m.Has("bar") { 45 | x, _ := m.Get("bar") 46 | println(x) 47 | } 48 | 49 | m.Clear() 50 | 51 | // Output: 52 | // iter foo 1 53 | // iter bar 2 54 | // 1 55 | // 2 56 | // -1 57 | } 58 | ``` 59 | -------------------------------------------------------------------------------- /map_fuzz_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package swiss 16 | 17 | import ( 18 | "testing" 19 | "unsafe" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | func FuzzStringMap(f *testing.F) { 25 | f.Add(uint8(1), 14, 50) 26 | f.Add(uint8(2), 1, 1) 27 | f.Add(uint8(2), 14, 14) 28 | f.Add(uint8(2), 14, 15) 29 | f.Add(uint8(2), 25, 100) 30 | f.Add(uint8(2), 25, 1000) 31 | f.Add(uint8(8), 0, 1) 32 | f.Add(uint8(8), 1, 1) 33 | f.Add(uint8(8), 14, 14) 34 | f.Add(uint8(8), 14, 15) 35 | f.Add(uint8(8), 25, 100) 36 | f.Add(uint8(8), 25, 1000) 37 | f.Fuzz(func(t *testing.T, keySz uint8, init, count int) { 38 | // smaller key sizes generate more overwrites 39 | fuzzTestStringMap(t, uint32(keySz), uint32(init), uint32(count)) 40 | }) 41 | } 42 | 43 | func fuzzTestStringMap(t *testing.T, keySz, init, count uint32) { 44 | const limit = 1024 * 1024 45 | if count > limit || init > limit { 46 | t.Skip() 47 | } 48 | m := NewMap[string, int](init) 49 | if count == 0 { 50 | return 51 | } 52 | // make tests deterministic 53 | setConstSeed(m, 1) 54 | 55 | keys := genStringData(int(keySz), int(count)) 56 | golden := make(map[string]int, init) 57 | for i, k := range keys { 58 | m.Put(k, i) 59 | golden[k] = i 60 | } 61 | assert.Equal(t, len(golden), m.Count()) 62 | 63 | for k, exp := range golden { 64 | act, ok := m.Get(k) 65 | assert.True(t, ok) 66 | assert.Equal(t, exp, act) 67 | } 68 | for _, k := range keys { 69 | _, ok := golden[k] 70 | assert.True(t, ok) 71 | assert.True(t, m.Has(k)) 72 | } 73 | 74 | deletes := keys[:count/2] 75 | for _, k := range deletes { 76 | delete(golden, k) 77 | m.Delete(k) 78 | } 79 | assert.Equal(t, len(golden), m.Count()) 80 | 81 | for _, k := range deletes { 82 | assert.False(t, m.Has(k)) 83 | } 84 | for k, exp := range golden { 85 | act, ok := m.Get(k) 86 | assert.True(t, ok) 87 | assert.Equal(t, exp, act) 88 | } 89 | } 90 | 91 | type hasher struct { 92 | hash func() 93 | seed uintptr 94 | } 95 | 96 | func setConstSeed[K comparable, V any](m *Map[K, V], seed uintptr) { 97 | h := (*hasher)((unsafe.Pointer)(&m.hash)) 98 | h.seed = seed 99 | } 100 | -------------------------------------------------------------------------------- /bits_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package swiss 16 | 17 | import ( 18 | "math/bits" 19 | "math/rand" 20 | "testing" 21 | "unsafe" 22 | 23 | "github.com/stretchr/testify/assert" 24 | ) 25 | 26 | func TestMatchMetadata(t *testing.T) { 27 | var meta metadata 28 | for i := range meta { 29 | meta[i] = int8(i) 30 | } 31 | t.Run("metaMatchH2", func(t *testing.T) { 32 | for _, x := range meta { 33 | mask := metaMatchH2(&meta, h2(x)) 34 | assert.NotZero(t, mask) 35 | assert.Equal(t, uint32(x), nextMatch(&mask)) 36 | } 37 | }) 38 | t.Run("metaMatchEmpty", func(t *testing.T) { 39 | mask := metaMatchEmpty(&meta) 40 | assert.Equal(t, mask, bitset(0)) 41 | for i := range meta { 42 | meta[i] = empty 43 | mask = metaMatchEmpty(&meta) 44 | assert.NotZero(t, mask) 45 | assert.Equal(t, uint32(i), nextMatch(&mask)) 46 | meta[i] = int8(i) 47 | } 48 | }) 49 | t.Run("nextMatch", func(t *testing.T) { 50 | // test iterating multiple matches 51 | meta = newEmptyMetadata() 52 | mask := metaMatchEmpty(&meta) 53 | for i := range meta { 54 | assert.Equal(t, uint32(i), nextMatch(&mask)) 55 | } 56 | for i := 0; i < len(meta); i += 2 { 57 | meta[i] = int8(42) 58 | } 59 | mask = metaMatchH2(&meta, h2(42)) 60 | for i := 0; i < len(meta); i += 2 { 61 | assert.Equal(t, uint32(i), nextMatch(&mask)) 62 | } 63 | }) 64 | } 65 | 66 | func BenchmarkMatchMetadata(b *testing.B) { 67 | var meta metadata 68 | for i := range meta { 69 | meta[i] = int8(i) 70 | } 71 | var mask bitset 72 | for i := 0; i < b.N; i++ { 73 | mask = metaMatchH2(&meta, h2(i)) 74 | } 75 | b.Log(mask) 76 | } 77 | 78 | func TestNextPow2(t *testing.T) { 79 | assert.Equal(t, 0, int(nextPow2(0))) 80 | assert.Equal(t, 1, int(nextPow2(1))) 81 | assert.Equal(t, 2, int(nextPow2(2))) 82 | assert.Equal(t, 4, int(nextPow2(3))) 83 | assert.Equal(t, 8, int(nextPow2(7))) 84 | assert.Equal(t, 8, int(nextPow2(8))) 85 | assert.Equal(t, 16, int(nextPow2(9))) 86 | } 87 | 88 | func nextPow2(x uint32) uint32 { 89 | return 1 << (32 - bits.LeadingZeros32(x-1)) 90 | } 91 | 92 | func TestConstants(t *testing.T) { 93 | c1, c2 := empty, tombstone 94 | assert.Equal(t, byte(0b1000_0000), byte(c1)) 95 | assert.Equal(t, byte(0b1000_0000), reinterpretCast(c1)) 96 | assert.Equal(t, byte(0b1111_1110), byte(c2)) 97 | assert.Equal(t, byte(0b1111_1110), reinterpretCast(c2)) 98 | } 99 | 100 | func reinterpretCast(i int8) byte { 101 | return *(*byte)(unsafe.Pointer(&i)) 102 | } 103 | 104 | func TestFastMod(t *testing.T) { 105 | t.Run("n=10", func(t *testing.T) { 106 | testFastMod(t, 10) 107 | }) 108 | t.Run("n=100", func(t *testing.T) { 109 | testFastMod(t, 100) 110 | }) 111 | t.Run("n=1000", func(t *testing.T) { 112 | testFastMod(t, 1000) 113 | }) 114 | } 115 | 116 | func testFastMod(t *testing.T, n uint32) { 117 | const trials = 32 * 1024 118 | for i := 0; i < trials; i++ { 119 | x := rand.Uint32() 120 | y := fastModN(x, n) 121 | assert.Less(t, y, n) 122 | t.Logf("fastMod(%d, %d): %d", x, n, y) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /map_bench_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package swiss 16 | 17 | import ( 18 | "math/bits" 19 | "math/rand" 20 | "strconv" 21 | "testing" 22 | 23 | "github.com/stretchr/testify/require" 24 | 25 | "github.com/stretchr/testify/assert" 26 | ) 27 | 28 | func BenchmarkStringMaps(b *testing.B) { 29 | const keySz = 8 30 | sizes := []int{16, 128, 1024, 8192, 131072} 31 | for _, n := range sizes { 32 | b.Run("n="+strconv.Itoa(n), func(b *testing.B) { 33 | b.Run("runtime map", func(b *testing.B) { 34 | benchmarkRuntimeMap(b, genStringData(keySz, n)) 35 | }) 36 | b.Run("swiss.Map", func(b *testing.B) { 37 | benchmarkSwissMap(b, genStringData(keySz, n)) 38 | }) 39 | }) 40 | } 41 | } 42 | 43 | func BenchmarkInt64Maps(b *testing.B) { 44 | sizes := []int{16, 128, 1024, 8192, 131072} 45 | for _, n := range sizes { 46 | b.Run("n="+strconv.Itoa(n), func(b *testing.B) { 47 | b.Run("runtime map", func(b *testing.B) { 48 | benchmarkRuntimeMap(b, generateInt64Data(n)) 49 | }) 50 | b.Run("swiss.Map", func(b *testing.B) { 51 | benchmarkSwissMap(b, generateInt64Data(n)) 52 | }) 53 | }) 54 | } 55 | } 56 | 57 | func TestMemoryFootprint(t *testing.T) { 58 | t.Skip("unskip for memory footprint stats") 59 | var samples []float64 60 | for n := 10; n <= 10_000; n += 10 { 61 | b1 := testing.Benchmark(func(b *testing.B) { 62 | // max load factor 7/8 63 | m := NewMap[int, int](uint32(n)) 64 | require.NotNil(b, m) 65 | }) 66 | b2 := testing.Benchmark(func(b *testing.B) { 67 | // max load factor 6.5/8 68 | m := make(map[int]int, n) 69 | require.NotNil(b, m) 70 | }) 71 | x := float64(b1.MemBytes) / float64(b2.MemBytes) 72 | samples = append(samples, x) 73 | } 74 | t.Logf("mean size ratio: %.3f", mean(samples)) 75 | } 76 | 77 | func benchmarkRuntimeMap[K comparable](b *testing.B, keys []K) { 78 | n := uint32(len(keys)) 79 | mod := n - 1 // power of 2 fast modulus 80 | require.Equal(b, 1, bits.OnesCount32(n)) 81 | m := make(map[K]K, n) 82 | for _, k := range keys { 83 | m[k] = k 84 | } 85 | b.ResetTimer() 86 | var ok bool 87 | for i := 0; i < b.N; i++ { 88 | _, ok = m[keys[uint32(i)&mod]] 89 | } 90 | assert.True(b, ok) 91 | b.ReportAllocs() 92 | } 93 | 94 | func benchmarkSwissMap[K comparable](b *testing.B, keys []K) { 95 | n := uint32(len(keys)) 96 | mod := n - 1 // power of 2 fast modulus 97 | require.Equal(b, 1, bits.OnesCount32(n)) 98 | m := NewMap[K, K](n) 99 | for _, k := range keys { 100 | m.Put(k, k) 101 | } 102 | b.ResetTimer() 103 | var ok bool 104 | for i := 0; i < b.N; i++ { 105 | _, ok = m.Get(keys[uint32(i)&mod]) 106 | } 107 | assert.True(b, ok) 108 | b.ReportAllocs() 109 | } 110 | 111 | func generateInt64Data(n int) (data []int64) { 112 | data = make([]int64, n) 113 | var x int64 114 | for i := range data { 115 | x += rand.Int63n(128) + 1 116 | data[i] = x 117 | } 118 | return 119 | } 120 | 121 | func mean(samples []float64) (m float64) { 122 | for _, s := range samples { 123 | m += s 124 | } 125 | return m / float64(len(samples)) 126 | } 127 | -------------------------------------------------------------------------------- /map.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package swiss 16 | 17 | import ( 18 | "github.com/dolthub/maphash" 19 | ) 20 | 21 | const ( 22 | maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize) 23 | ) 24 | 25 | // Map is an open-addressing hash map 26 | // based on Abseil's flat_hash_map. 27 | type Map[K comparable, V any] struct { 28 | ctrl []metadata 29 | groups []group[K, V] 30 | hash maphash.Hasher[K] 31 | resident uint32 32 | dead uint32 33 | limit uint32 34 | } 35 | 36 | // metadata is the h2 metadata array for a group. 37 | // find operations first probe the controls bytes 38 | // to filter candidates before matching keys 39 | type metadata [groupSize]int8 40 | 41 | // group is a group of 16 key-value pairs 42 | type group[K comparable, V any] struct { 43 | keys [groupSize]K 44 | values [groupSize]V 45 | } 46 | 47 | const ( 48 | h1Mask uint64 = 0xffff_ffff_ffff_ff80 49 | h2Mask uint64 = 0x0000_0000_0000_007f 50 | empty int8 = -128 // 0b1000_0000 51 | tombstone int8 = -2 // 0b1111_1110 52 | ) 53 | 54 | // h1 is a 57 bit hash prefix 55 | type h1 uint64 56 | 57 | // h2 is a 7 bit hash suffix 58 | type h2 int8 59 | 60 | // NewMap constructs a Map. 61 | func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) { 62 | groups := numGroups(sz) 63 | m = &Map[K, V]{ 64 | ctrl: make([]metadata, groups), 65 | groups: make([]group[K, V], groups), 66 | hash: maphash.NewHasher[K](), 67 | limit: groups * maxAvgGroupLoad, 68 | } 69 | for i := range m.ctrl { 70 | m.ctrl[i] = newEmptyMetadata() 71 | } 72 | return 73 | } 74 | 75 | // Has returns true if |key| is present in |m|. 76 | func (m *Map[K, V]) Has(key K) (ok bool) { 77 | hi, lo := splitHash(m.hash.Hash(key)) 78 | g := probeStart(hi, len(m.groups)) 79 | for { // inlined find loop 80 | matches := metaMatchH2(&m.ctrl[g], lo) 81 | for matches != 0 { 82 | s := nextMatch(&matches) 83 | if key == m.groups[g].keys[s] { 84 | ok = true 85 | return 86 | } 87 | } 88 | // |key| is not in group |g|, 89 | // stop probing if we see an empty slot 90 | matches = metaMatchEmpty(&m.ctrl[g]) 91 | if matches != 0 { 92 | ok = false 93 | return 94 | } 95 | g += 1 // linear probing 96 | if g >= uint32(len(m.groups)) { 97 | g = 0 98 | } 99 | } 100 | } 101 | 102 | // Get returns the |value| mapped by |key| if one exists. 103 | func (m *Map[K, V]) Get(key K) (value V, ok bool) { 104 | hi, lo := splitHash(m.hash.Hash(key)) 105 | g := probeStart(hi, len(m.groups)) 106 | for { // inlined find loop 107 | matches := metaMatchH2(&m.ctrl[g], lo) 108 | for matches != 0 { 109 | s := nextMatch(&matches) 110 | if key == m.groups[g].keys[s] { 111 | value, ok = m.groups[g].values[s], true 112 | return 113 | } 114 | } 115 | // |key| is not in group |g|, 116 | // stop probing if we see an empty slot 117 | matches = metaMatchEmpty(&m.ctrl[g]) 118 | if matches != 0 { 119 | ok = false 120 | return 121 | } 122 | g += 1 // linear probing 123 | if g >= uint32(len(m.groups)) { 124 | g = 0 125 | } 126 | } 127 | } 128 | 129 | // Put attempts to insert |key| and |value| 130 | func (m *Map[K, V]) Put(key K, value V) { 131 | if m.resident >= m.limit { 132 | m.rehash(m.nextSize()) 133 | } 134 | hi, lo := splitHash(m.hash.Hash(key)) 135 | g := probeStart(hi, len(m.groups)) 136 | for { // inlined find loop 137 | matches := metaMatchH2(&m.ctrl[g], lo) 138 | for matches != 0 { 139 | s := nextMatch(&matches) 140 | if key == m.groups[g].keys[s] { // update 141 | m.groups[g].keys[s] = key 142 | m.groups[g].values[s] = value 143 | return 144 | } 145 | } 146 | // |key| is not in group |g|, 147 | // stop probing if we see an empty slot 148 | matches = metaMatchEmpty(&m.ctrl[g]) 149 | if matches != 0 { // insert 150 | s := nextMatch(&matches) 151 | m.groups[g].keys[s] = key 152 | m.groups[g].values[s] = value 153 | m.ctrl[g][s] = int8(lo) 154 | m.resident++ 155 | return 156 | } 157 | g += 1 // linear probing 158 | if g >= uint32(len(m.groups)) { 159 | g = 0 160 | } 161 | } 162 | } 163 | 164 | // Delete attempts to remove |key|, returns true successful. 165 | func (m *Map[K, V]) Delete(key K) (ok bool) { 166 | hi, lo := splitHash(m.hash.Hash(key)) 167 | g := probeStart(hi, len(m.groups)) 168 | for { 169 | matches := metaMatchH2(&m.ctrl[g], lo) 170 | for matches != 0 { 171 | s := nextMatch(&matches) 172 | if key == m.groups[g].keys[s] { 173 | ok = true 174 | // optimization: if |m.ctrl[g]| contains any empty 175 | // metadata bytes, we can physically delete |key| 176 | // rather than placing a tombstone. 177 | // The observation is that any probes into group |g| 178 | // would already be terminated by the existing empty 179 | // slot, and therefore reclaiming slot |s| will not 180 | // cause premature termination of probes into |g|. 181 | if metaMatchEmpty(&m.ctrl[g]) != 0 { 182 | m.ctrl[g][s] = empty 183 | m.resident-- 184 | } else { 185 | m.ctrl[g][s] = tombstone 186 | m.dead++ 187 | } 188 | var k K 189 | var v V 190 | m.groups[g].keys[s] = k 191 | m.groups[g].values[s] = v 192 | return 193 | } 194 | } 195 | // |key| is not in group |g|, 196 | // stop probing if we see an empty slot 197 | matches = metaMatchEmpty(&m.ctrl[g]) 198 | if matches != 0 { // |key| absent 199 | ok = false 200 | return 201 | } 202 | g += 1 // linear probing 203 | if g >= uint32(len(m.groups)) { 204 | g = 0 205 | } 206 | } 207 | } 208 | 209 | // Iter iterates the elements of the Map, passing them to the callback. 210 | // It guarantees that any key in the Map will be visited only once, and 211 | // for un-mutated Maps, every key will be visited once. If the Map is 212 | // Mutated during iteration, mutations will be reflected on return from 213 | // Iter, but the set of keys visited by Iter is non-deterministic. 214 | func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) { 215 | // take a consistent view of the table in case 216 | // we rehash during iteration 217 | ctrl, groups := m.ctrl, m.groups 218 | // pick a random starting group 219 | g := randIntN(len(groups)) 220 | for n := 0; n < len(groups); n++ { 221 | for s, c := range ctrl[g] { 222 | if c == empty || c == tombstone { 223 | continue 224 | } 225 | k, v := groups[g].keys[s], groups[g].values[s] 226 | if stop := cb(k, v); stop { 227 | return 228 | } 229 | } 230 | g++ 231 | if g >= uint32(len(groups)) { 232 | g = 0 233 | } 234 | } 235 | } 236 | 237 | // Clear removes all elements from the Map. 238 | func (m *Map[K, V]) Clear() { 239 | for i, c := range m.ctrl { 240 | for j := range c { 241 | m.ctrl[i][j] = empty 242 | } 243 | } 244 | var k K 245 | var v V 246 | for i := range m.groups { 247 | g := &m.groups[i] 248 | for i := range g.keys { 249 | g.keys[i] = k 250 | g.values[i] = v 251 | } 252 | } 253 | m.resident, m.dead = 0, 0 254 | } 255 | 256 | // Count returns the number of elements in the Map. 257 | func (m *Map[K, V]) Count() int { 258 | return int(m.resident - m.dead) 259 | } 260 | 261 | // Capacity returns the number of additional elements 262 | // the can be added to the Map before resizing. 263 | func (m *Map[K, V]) Capacity() int { 264 | return int(m.limit - m.resident) 265 | } 266 | 267 | // find returns the location of |key| if present, or its insertion location if absent. 268 | // for performance, find is manually inlined into public methods. 269 | func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) { 270 | g = probeStart(hi, len(m.groups)) 271 | for { 272 | matches := metaMatchH2(&m.ctrl[g], lo) 273 | for matches != 0 { 274 | s = nextMatch(&matches) 275 | if key == m.groups[g].keys[s] { 276 | return g, s, true 277 | } 278 | } 279 | // |key| is not in group |g|, 280 | // stop probing if we see an empty slot 281 | matches = metaMatchEmpty(&m.ctrl[g]) 282 | if matches != 0 { 283 | s = nextMatch(&matches) 284 | return g, s, false 285 | } 286 | g += 1 // linear probing 287 | if g >= uint32(len(m.groups)) { 288 | g = 0 289 | } 290 | } 291 | } 292 | 293 | func (m *Map[K, V]) nextSize() (n uint32) { 294 | n = uint32(len(m.groups)) * 2 295 | if m.dead >= (m.resident / 2) { 296 | n = uint32(len(m.groups)) 297 | } 298 | return 299 | } 300 | 301 | func (m *Map[K, V]) rehash(n uint32) { 302 | groups, ctrl := m.groups, m.ctrl 303 | m.groups = make([]group[K, V], n) 304 | m.ctrl = make([]metadata, n) 305 | for i := range m.ctrl { 306 | m.ctrl[i] = newEmptyMetadata() 307 | } 308 | m.hash = maphash.NewSeed(m.hash) 309 | m.limit = n * maxAvgGroupLoad 310 | m.resident, m.dead = 0, 0 311 | for g := range ctrl { 312 | for s := range ctrl[g] { 313 | c := ctrl[g][s] 314 | if c == empty || c == tombstone { 315 | continue 316 | } 317 | m.Put(groups[g].keys[s], groups[g].values[s]) 318 | } 319 | } 320 | } 321 | 322 | func (m *Map[K, V]) loadFactor() float32 { 323 | slots := float32(len(m.groups) * groupSize) 324 | return float32(m.resident-m.dead) / slots 325 | } 326 | 327 | // numGroups returns the minimum number of groups needed to store |n| elems. 328 | func numGroups(n uint32) (groups uint32) { 329 | groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad 330 | if groups == 0 { 331 | groups = 1 332 | } 333 | return 334 | } 335 | 336 | func newEmptyMetadata() (meta metadata) { 337 | for i := range meta { 338 | meta[i] = empty 339 | } 340 | return 341 | } 342 | 343 | func splitHash(h uint64) (h1, h2) { 344 | return h1((h & h1Mask) >> 7), h2(h & h2Mask) 345 | } 346 | 347 | func probeStart(hi h1, groups int) uint32 { 348 | return fastModN(uint32(hi), uint32(groups)) 349 | } 350 | 351 | // lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ 352 | func fastModN(x, n uint32) uint32 { 353 | return uint32((uint64(x) * uint64(n)) >> 32) 354 | } 355 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /map_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Dolthub, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package swiss 16 | 17 | import ( 18 | "fmt" 19 | "math" 20 | "math/rand" 21 | "testing" 22 | 23 | "github.com/stretchr/testify/require" 24 | 25 | "github.com/stretchr/testify/assert" 26 | ) 27 | 28 | func TestSwissMap(t *testing.T) { 29 | t.Run("strings=0", func(t *testing.T) { 30 | testSwissMap(t, genStringData(16, 0)) 31 | }) 32 | t.Run("strings=100", func(t *testing.T) { 33 | testSwissMap(t, genStringData(16, 100)) 34 | }) 35 | t.Run("strings=1000", func(t *testing.T) { 36 | testSwissMap(t, genStringData(16, 1000)) 37 | }) 38 | t.Run("strings=10_000", func(t *testing.T) { 39 | testSwissMap(t, genStringData(16, 10_000)) 40 | }) 41 | t.Run("strings=100_000", func(t *testing.T) { 42 | testSwissMap(t, genStringData(16, 100_000)) 43 | }) 44 | t.Run("uint32=0", func(t *testing.T) { 45 | testSwissMap(t, genUint32Data(0)) 46 | }) 47 | t.Run("uint32=100", func(t *testing.T) { 48 | testSwissMap(t, genUint32Data(100)) 49 | }) 50 | t.Run("uint32=1000", func(t *testing.T) { 51 | testSwissMap(t, genUint32Data(1000)) 52 | }) 53 | t.Run("uint32=10_000", func(t *testing.T) { 54 | testSwissMap(t, genUint32Data(10_000)) 55 | }) 56 | t.Run("uint32=100_000", func(t *testing.T) { 57 | testSwissMap(t, genUint32Data(100_000)) 58 | }) 59 | t.Run("string capacity", func(t *testing.T) { 60 | testSwissMapCapacity(t, func(n int) []string { 61 | return genStringData(16, n) 62 | }) 63 | }) 64 | t.Run("uint32 capacity", func(t *testing.T) { 65 | testSwissMapCapacity(t, genUint32Data) 66 | }) 67 | } 68 | 69 | func testSwissMap[K comparable](t *testing.T, keys []K) { 70 | // sanity check 71 | require.Equal(t, len(keys), len(uniq(keys)), keys) 72 | t.Run("put", func(t *testing.T) { 73 | testMapPut(t, keys) 74 | }) 75 | t.Run("has", func(t *testing.T) { 76 | testMapHas(t, keys) 77 | }) 78 | t.Run("get", func(t *testing.T) { 79 | testMapGet(t, keys) 80 | }) 81 | t.Run("delete", func(t *testing.T) { 82 | testMapDelete(t, keys) 83 | }) 84 | t.Run("clear", func(t *testing.T) { 85 | testMapClear(t, keys) 86 | }) 87 | t.Run("iter", func(t *testing.T) { 88 | testMapIter(t, keys) 89 | }) 90 | t.Run("grow", func(t *testing.T) { 91 | testMapGrow(t, keys) 92 | }) 93 | t.Run("probe stats", func(t *testing.T) { 94 | testProbeStats(t, keys) 95 | }) 96 | } 97 | 98 | func uniq[K comparable](keys []K) []K { 99 | s := make(map[K]struct{}, len(keys)) 100 | for _, k := range keys { 101 | s[k] = struct{}{} 102 | } 103 | u := make([]K, 0, len(keys)) 104 | for k := range s { 105 | u = append(u, k) 106 | } 107 | return u 108 | } 109 | 110 | func genStringData(size, count int) (keys []string) { 111 | src := rand.New(rand.NewSource(int64(size * count))) 112 | letters := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 113 | r := make([]rune, size*count) 114 | for i := range r { 115 | r[i] = letters[src.Intn(len(letters))] 116 | } 117 | keys = make([]string, count) 118 | for i := range keys { 119 | keys[i] = string(r[:size]) 120 | r = r[size:] 121 | } 122 | return 123 | } 124 | 125 | func genUint32Data(count int) (keys []uint32) { 126 | keys = make([]uint32, count) 127 | var x uint32 128 | for i := range keys { 129 | x += (rand.Uint32() % 128) + 1 130 | keys[i] = x 131 | } 132 | return 133 | } 134 | 135 | func testMapPut[K comparable](t *testing.T, keys []K) { 136 | m := NewMap[K, int](uint32(len(keys))) 137 | assert.Equal(t, 0, m.Count()) 138 | for i, key := range keys { 139 | m.Put(key, i) 140 | } 141 | assert.Equal(t, len(keys), m.Count()) 142 | // overwrite 143 | for i, key := range keys { 144 | m.Put(key, -i) 145 | } 146 | assert.Equal(t, len(keys), m.Count()) 147 | for i, key := range keys { 148 | act, ok := m.Get(key) 149 | assert.True(t, ok) 150 | assert.Equal(t, -i, act) 151 | } 152 | assert.Equal(t, len(keys), int(m.resident)) 153 | } 154 | 155 | func testMapHas[K comparable](t *testing.T, keys []K) { 156 | m := NewMap[K, int](uint32(len(keys))) 157 | for i, key := range keys { 158 | m.Put(key, i) 159 | } 160 | for _, key := range keys { 161 | ok := m.Has(key) 162 | assert.True(t, ok) 163 | } 164 | } 165 | 166 | func testMapGet[K comparable](t *testing.T, keys []K) { 167 | m := NewMap[K, int](uint32(len(keys))) 168 | for i, key := range keys { 169 | m.Put(key, i) 170 | } 171 | for i, key := range keys { 172 | act, ok := m.Get(key) 173 | assert.True(t, ok) 174 | assert.Equal(t, i, act) 175 | } 176 | } 177 | 178 | func testMapDelete[K comparable](t *testing.T, keys []K) { 179 | m := NewMap[K, int](uint32(len(keys))) 180 | assert.Equal(t, 0, m.Count()) 181 | for i, key := range keys { 182 | m.Put(key, i) 183 | } 184 | assert.Equal(t, len(keys), m.Count()) 185 | for _, key := range keys { 186 | m.Delete(key) 187 | ok := m.Has(key) 188 | assert.False(t, ok) 189 | } 190 | assert.Equal(t, 0, m.Count()) 191 | // put keys back after deleting them 192 | for i, key := range keys { 193 | m.Put(key, i) 194 | } 195 | assert.Equal(t, len(keys), m.Count()) 196 | } 197 | 198 | func testMapClear[K comparable](t *testing.T, keys []K) { 199 | m := NewMap[K, int](0) 200 | assert.Equal(t, 0, m.Count()) 201 | for i, key := range keys { 202 | m.Put(key, i) 203 | } 204 | assert.Equal(t, len(keys), m.Count()) 205 | m.Clear() 206 | assert.Equal(t, 0, m.Count()) 207 | for _, key := range keys { 208 | ok := m.Has(key) 209 | assert.False(t, ok) 210 | _, ok = m.Get(key) 211 | assert.False(t, ok) 212 | } 213 | var calls int 214 | m.Iter(func(k K, v int) (stop bool) { 215 | calls++ 216 | return 217 | }) 218 | assert.Equal(t, 0, calls) 219 | 220 | // Assert that the map was actually cleared... 221 | var k K 222 | for _, g := range m.groups { 223 | for i := range g.keys { 224 | assert.Equal(t, k, g.keys[i]) 225 | assert.Equal(t, 0, g.values[i]) 226 | } 227 | } 228 | } 229 | 230 | func testMapIter[K comparable](t *testing.T, keys []K) { 231 | m := NewMap[K, int](uint32(len(keys))) 232 | for i, key := range keys { 233 | m.Put(key, i) 234 | } 235 | visited := make(map[K]uint, len(keys)) 236 | m.Iter(func(k K, v int) (stop bool) { 237 | visited[k] = 0 238 | stop = true 239 | return 240 | }) 241 | if len(keys) == 0 { 242 | assert.Equal(t, len(visited), 0) 243 | } else { 244 | assert.Equal(t, len(visited), 1) 245 | } 246 | for _, k := range keys { 247 | visited[k] = 0 248 | } 249 | m.Iter(func(k K, v int) (stop bool) { 250 | visited[k]++ 251 | return 252 | }) 253 | for _, c := range visited { 254 | assert.Equal(t, c, uint(1)) 255 | } 256 | // mutate on iter 257 | m.Iter(func(k K, v int) (stop bool) { 258 | m.Put(k, -v) 259 | return 260 | }) 261 | for i, key := range keys { 262 | act, ok := m.Get(key) 263 | assert.True(t, ok) 264 | assert.Equal(t, -i, act) 265 | } 266 | } 267 | 268 | func testMapGrow[K comparable](t *testing.T, keys []K) { 269 | n := uint32(len(keys)) 270 | m := NewMap[K, int](n / 10) 271 | for i, key := range keys { 272 | m.Put(key, i) 273 | } 274 | for i, key := range keys { 275 | act, ok := m.Get(key) 276 | assert.True(t, ok) 277 | assert.Equal(t, i, act) 278 | } 279 | } 280 | 281 | func testSwissMapCapacity[K comparable](t *testing.T, gen func(n int) []K) { 282 | // Capacity() behavior depends on |groupSize| 283 | // which varies by processor architecture. 284 | caps := []uint32{ 285 | 1 * maxAvgGroupLoad, 286 | 2 * maxAvgGroupLoad, 287 | 3 * maxAvgGroupLoad, 288 | 4 * maxAvgGroupLoad, 289 | 5 * maxAvgGroupLoad, 290 | 10 * maxAvgGroupLoad, 291 | 25 * maxAvgGroupLoad, 292 | 50 * maxAvgGroupLoad, 293 | 100 * maxAvgGroupLoad, 294 | } 295 | for _, c := range caps { 296 | m := NewMap[K, K](c) 297 | assert.Equal(t, int(c), m.Capacity()) 298 | keys := gen(rand.Intn(int(c))) 299 | for _, k := range keys { 300 | m.Put(k, k) 301 | } 302 | assert.Equal(t, int(c)-len(keys), m.Capacity()) 303 | assert.Equal(t, int(c), m.Count()+m.Capacity()) 304 | } 305 | } 306 | 307 | func testProbeStats[K comparable](t *testing.T, keys []K) { 308 | runTest := func(load float32) { 309 | n := uint32(len(keys)) 310 | sz, k := loadFactorSample(n, load) 311 | m := NewMap[K, int](sz) 312 | for i, key := range keys[:k] { 313 | m.Put(key, i) 314 | } 315 | // todo: assert stat invariants? 316 | stats := getProbeStats(t, m, keys) 317 | t.Log(fmtProbeStats(stats)) 318 | } 319 | t.Run("load_factor=0.5", func(t *testing.T) { 320 | runTest(0.5) 321 | }) 322 | t.Run("load_factor=0.75", func(t *testing.T) { 323 | runTest(0.75) 324 | }) 325 | t.Run("load_factor=max", func(t *testing.T) { 326 | runTest(maxLoadFactor) 327 | }) 328 | } 329 | 330 | // calculates the sample size and map size necessary to 331 | // create a load factor of |load| given |n| data points 332 | func loadFactorSample(n uint32, targetLoad float32) (mapSz, sampleSz uint32) { 333 | if targetLoad > maxLoadFactor { 334 | targetLoad = maxLoadFactor 335 | } 336 | // tables are assumed to be power of two 337 | sampleSz = uint32(float32(n) * targetLoad) 338 | mapSz = uint32(float32(n) * maxLoadFactor) 339 | return 340 | } 341 | 342 | type probeStats struct { 343 | groups uint32 344 | loadFactor float32 345 | presentCnt uint32 346 | presentMin uint32 347 | presentMax uint32 348 | presentAvg float32 349 | absentCnt uint32 350 | absentMin uint32 351 | absentMax uint32 352 | absentAvg float32 353 | } 354 | 355 | func fmtProbeStats(s probeStats) string { 356 | g := fmt.Sprintf("groups=%d load=%f\n", s.groups, s.loadFactor) 357 | p := fmt.Sprintf("present(n=%d): min=%d max=%d avg=%f\n", 358 | s.presentCnt, s.presentMin, s.presentMax, s.presentAvg) 359 | a := fmt.Sprintf("absent(n=%d): min=%d max=%d avg=%f\n", 360 | s.absentCnt, s.absentMin, s.absentMax, s.absentAvg) 361 | return g + p + a 362 | } 363 | 364 | func getProbeLength[K comparable, V any](t *testing.T, m *Map[K, V], key K) (length uint32, ok bool) { 365 | var end uint32 366 | hi, lo := splitHash(m.hash.Hash(key)) 367 | start := probeStart(hi, len(m.groups)) 368 | end, _, ok = m.find(key, hi, lo) 369 | if end < start { // wrapped 370 | end += uint32(len(m.groups)) 371 | } 372 | length = (end - start) + 1 373 | require.True(t, length > 0) 374 | return 375 | } 376 | 377 | func getProbeStats[K comparable, V any](t *testing.T, m *Map[K, V], keys []K) (stats probeStats) { 378 | stats.groups = uint32(len(m.groups)) 379 | stats.loadFactor = m.loadFactor() 380 | var presentSum, absentSum float32 381 | stats.presentMin = math.MaxInt32 382 | stats.absentMin = math.MaxInt32 383 | for _, key := range keys { 384 | l, ok := getProbeLength(t, m, key) 385 | if ok { 386 | stats.presentCnt++ 387 | presentSum += float32(l) 388 | if stats.presentMin > l { 389 | stats.presentMin = l 390 | } 391 | if stats.presentMax < l { 392 | stats.presentMax = l 393 | } 394 | } else { 395 | stats.absentCnt++ 396 | absentSum += float32(l) 397 | if stats.absentMin > l { 398 | stats.absentMin = l 399 | } 400 | if stats.absentMax < l { 401 | stats.absentMax = l 402 | } 403 | } 404 | } 405 | if stats.presentCnt == 0 { 406 | stats.presentMin = 0 407 | } else { 408 | stats.presentAvg = presentSum / float32(stats.presentCnt) 409 | } 410 | if stats.absentCnt == 0 { 411 | stats.absentMin = 0 412 | } else { 413 | stats.absentAvg = absentSum / float32(stats.absentCnt) 414 | } 415 | return 416 | } 417 | 418 | func TestNumGroups(t *testing.T) { 419 | assert.Equal(t, expected(0), numGroups(0)) 420 | assert.Equal(t, expected(1), numGroups(1)) 421 | // max load factor 0.875 422 | assert.Equal(t, expected(14), numGroups(14)) 423 | assert.Equal(t, expected(15), numGroups(15)) 424 | assert.Equal(t, expected(28), numGroups(28)) 425 | assert.Equal(t, expected(29), numGroups(29)) 426 | assert.Equal(t, expected(56), numGroups(56)) 427 | assert.Equal(t, expected(57), numGroups(57)) 428 | } 429 | 430 | func expected(x int) (groups uint32) { 431 | groups = uint32(math.Ceil(float64(x) / float64(maxAvgGroupLoad))) 432 | if groups == 0 { 433 | groups = 1 434 | } 435 | return 436 | } 437 | --------------------------------------------------------------------------------