├── assets
    └── logo.JPG
├── go.mod
├── internal
    ├── mathutil
    │   └── mathutil.go
    └── httpcache
    │   ├── index.go
    │   └── index_test.go
├── .gitignore
├── benchmarks
    ├── cluster
    │   ├── runner
    │   │   └── Dockerfile
    │   ├── direct
    │   │   ├── Dockerfile
    │   │   └── main.go
    │   ├── node
    │   │   ├── Dockerfile
    │   │   └── main.go
    │   ├── README.md
    │   └── docker-compose.yml
    ├── go.mod
    ├── go.sum
    ├── benchmark_runner.go
    └── README.md
├── go.sum
├── cluster
    ├── limiter_test.go
    ├── codec.go
    ├── rendezvous_test.go
    ├── weights.go
    ├── bufpool_test.go
    ├── hlc_test.go
    ├── bf_msg.go
    ├── membership_test.go
    ├── weights_test.go
    ├── limiter.go
    ├── errors.go
    ├── lease_test.go
    ├── codec_test.go
    ├── bufpool.go
    ├── keycodec.go
    ├── migrate.go
    ├── keycodec_test.go
    ├── lease.go
    ├── hlc.go
    ├── membership.go
    ├── wire.go
    ├── adapter.go
    ├── heat.go
    ├── bf_rpc.go
    ├── rendezvous.go
    ├── config.go
    ├── replication.go
    ├── bf_join.go
    └── transport.go
├── fnv.go
├── errors.go
├── .github
    └── workflows
    │   └── test.yml
├── Makefile
├── _examples
    ├── basic
    │   └── main.go
    └── advanced
    │   └── main.go
├── snapshot.go
├── shard.go
├── hash_test.go
├── lfu.go
├── manager.go
├── hash.go
├── CHANGELOG.md
├── README.md
├── eviction.go
├── fnv_test.go
└── CLUSTER.md


/assets/logo.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unkn0wn-root/kioshun/HEAD/assets/logo.JPG


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/unkn0wn-root/kioshun
 2 | 
 3 | go 1.24
 4 | 
 5 | require (
 6 | 	github.com/cespare/xxhash/v2 v2.3.0
 7 | 	github.com/fxamacker/cbor/v2 v2.6.0
 8 | )
 9 | 
10 | require github.com/x448/float16 v0.8.4 // indirect
11 | 


--------------------------------------------------------------------------------
/internal/mathutil/mathutil.go:
--------------------------------------------------------------------------------
 1 | package mathutil
 2 | 
 3 | import "math/bits"
 4 | 
 5 | // NextPowerOf2 returns the next power of 2 greater than or equal to n.
 6 | func NextPowerOf2(n int) int {
 7 | 	if n <= 1 {
 8 | 		return 1
 9 | 	}
10 | 	return 1 << bits.Len(uint(n-1))
11 | }
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.so
 2 | *.dylib
 3 | *.test
 4 | *.out
 5 | 
 6 | go.work
 7 | 
 8 | *.tmp
 9 | *.temp
10 | *.log
11 | 
12 | /dist/
13 | /build/
14 | /bin/
15 | 
16 | coverage.out
17 | coverage.html
18 | 
19 | *.prof
20 | 
21 | .env
22 | .env.local
23 | .env.*.local
24 | 
25 | debug
26 | debug.test
27 | __debug_bin*
28 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/runner/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.22-alpine AS build
 2 | WORKDIR /src
 3 | COPY . .
 4 | RUN --mount=type=cache,target=/go/pkg/mod --mount=type=cache,target=/root/.cache/go-build \
 5 |     set -eux; \
 6 |     cd _benchmarks; \
 7 |     go mod tidy; \
 8 |     cd cluster/runner && go build -o /out/runner ./
 9 | 
10 | FROM alpine:3.19
11 | RUN apk add --no-cache ca-certificates && adduser -D app
12 | USER app
13 | WORKDIR /app
14 | COPY --from=build /out/runner /app/runner
15 | ENTRYPOINT ["/app/runner"]
16 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
2 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
3 | github.com/fxamacker/cbor/v2 v2.6.0 h1:sU6J2usfADwWlYDAFhZBQ6TnLFBHxgesMrQfQgk1tWA=
4 | github.com/fxamacker/cbor/v2 v2.6.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
5 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
6 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
7 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/direct/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.22-alpine AS build
 2 | WORKDIR /src
 3 | COPY . .
 4 | RUN --mount=type=cache,target=/go/pkg/mod --mount=type=cache,target=/root/.cache/go-build \
 5 |     set -eux; \
 6 |     cd _benchmarks; \
 7 |     go mod tidy; \
 8 |     cd cluster/direct && go build -o /out/direct-runner ./
 9 | 
10 | FROM alpine:3.19
11 | RUN apk add --no-cache ca-certificates && adduser -D app
12 | USER app
13 | WORKDIR /app
14 | COPY --from=build /out/direct-runner /app/runner
15 | ENTRYPOINT ["/app/runner"]
16 | 
17 | 


--------------------------------------------------------------------------------
/cluster/limiter_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func TestRateLimiterTokensAndRefill(t *testing.T) {
 9 | 	rl := newRateLimiter(2, 50*time.Millisecond)
10 | 	defer rl.Stop()
11 | 
12 | 	if !rl.Allow() || !rl.Allow() {
13 | 		t.Fatalf("expected first two allows to pass")
14 | 	}
15 | 
16 | 	if rl.Allow() {
17 | 		t.Fatalf("expected third allow to be rate-limited")
18 | 	}
19 | 
20 | 	time.Sleep(60 * time.Millisecond)
21 | 	if !rl.Allow() {
22 | 		t.Fatalf("expected allow after refill")
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/node/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM golang:1.22-alpine AS build
 2 | WORKDIR /src
 3 | COPY . .
 4 | RUN --mount=type=cache,target=/go/pkg/mod --mount=type=cache,target=/root/.cache/go-build \
 5 |     set -eux; \
 6 |     cd _benchmarks; \
 7 |     go mod tidy; \
 8 |     cd cluster/node && go build -o /out/meshnode ./
 9 | 
10 | FROM alpine:3.19
11 | RUN apk add --no-cache wget && adduser -D app
12 | USER app
13 | WORKDIR /app
14 | COPY --from=build /out/meshnode /app/meshnode
15 | EXPOSE 8081 8082 8083 5011 5012 5013
16 | ENTRYPOINT ["/app/meshnode"]
17 | 


--------------------------------------------------------------------------------
/benchmarks/go.mod:
--------------------------------------------------------------------------------
 1 | module benchmark
 2 | 
 3 | go 1.21
 4 | 
 5 | require (
 6 | 	github.com/allegro/bigcache/v3 v3.1.0
 7 | 	github.com/coocood/freecache v1.2.4
 8 | 	github.com/dgraph-io/ristretto v0.1.1
 9 | 	github.com/patrickmn/go-cache v2.1.0+incompatible
10 | 	github.com/unkn0wn-root/kioshun v0.0.3
11 | 	github.com/redis/go-redis/v9 v9.5.2
12 | )
13 | 
14 | require (
15 | 	github.com/cespare/xxhash/v2 v2.1.2 // indirect
16 | 	github.com/dustin/go-humanize v1.0.0 // indirect
17 | 	github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b // indirect
18 | 	github.com/pkg/errors v0.9.1 // indirect
19 | 	golang.org/x/sys v0.0.0-20221010170243-090e33056c14 // indirect
20 | )
21 | 
22 | replace github.com/unkn0wn-root/kioshun => ../
23 | 


--------------------------------------------------------------------------------
/fnv.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | const (
 4 | 	// FNV-1a
 5 | 	fnvOffset64 = 14695981039346656037
 6 | 	fnvPrime64  = 1099511628211
 7 | )
 8 | 
 9 | // fnvHash64 implements FNV-1a hash algorithm with XOR-folding.
10 | //
11 | // Standard FNV-1a algorithm:
12 | // 1. Initialize hash with FNV offset basis
13 | // 2. For each byte: XOR byte with current hash, then multiply by FNV prime
14 | // 3. XOR-before-multiply order distinguishes FNV-1a from FNV-1
15 | //
16 | // Kioshun XOR-folding:
17 | // - Combines upper and lower 32 bits via h ^ (h >> 32)
18 | // - Better hash distribution for shard selection
19 | // - Reduces clustering when using power-of-2 table sizes
20 | func fnvHash64(s string) uint64 {
21 | 	h := uint64(fnvOffset64)
22 | 	for i := 0; i < len(s); i++ {
23 | 		h ^= uint64(s[i])
24 | 		h *= fnvPrime64
25 | 	}
26 | 	return h ^ (h >> 32)
27 | }
28 | 


--------------------------------------------------------------------------------
/cluster/codec.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	cbor "github.com/fxamacker/cbor/v2"
 5 | )
 6 | 
 7 | // Codec abstracts value encoding for the wire. Must be
 8 | // deterministic and stable across nodes to allow backfill/replication.
 9 | type Codec[V any] interface {
10 | 	Encode(V) ([]byte, error)
11 | 	Decode([]byte) (V, error)
12 | }
13 | 
14 | // BytesCodec: pass-through []byte (no copy on Encode; Decode returns a copy).
15 | type BytesCodec struct{}
16 | 
17 | func (BytesCodec) Encode(v []byte) ([]byte, error) { return v, nil }
18 | func (BytesCodec) Decode(b []byte) ([]byte, error) { out := append([]byte(nil), b...); return out, nil }
19 | 
20 | type CBORCodec[V any] struct{}
21 | 
22 | func (CBORCodec[V]) Encode(v V) ([]byte, error) { return cbor.Marshal(v) }
23 | func (CBORCodec[V]) Decode(b []byte) (V, error) {
24 | 	var v V
25 | 	err := cbor.Unmarshal(b, &v)
26 | 	return v, err
27 | }
28 | 


--------------------------------------------------------------------------------
/cluster/rendezvous_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"sync/atomic"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestRingOwnersWeightedFirstIsHeaviest(t *testing.T) {
 9 | 	r := newRing(2)
10 | 	a := newMeta(NodeID("A"), "a")
11 | 	b := newMeta(NodeID("B"), "b")
12 | 	c := newMeta(NodeID("C"), "c")
13 | 
14 | 	// make A overwhelmingly heavy so it always wins top rank.
15 | 	atomic.StoreUint64(&a.weight, 1_000_000)
16 | 	atomic.StoreUint64(&b.weight, 1)
17 | 	atomic.StoreUint64(&c.weight, 1)
18 | 	r.nodes = []*nodeMeta{a, b, c}
19 | 
20 | 	owners := r.ownersFromKeyHash(12345)
21 | 	if len(owners) == 0 || owners[0] != a {
22 | 		t.Fatalf("expected A as first owner, got %#v", owners)
23 | 	}
24 | 
25 | 	top := r.ownersTopNFromKeyHash(12345, 3)
26 | 	if len(top) != 3 {
27 | 		t.Fatalf("expected 3 candidates, got %d", len(top))
28 | 	}
29 | 	if top[0] != a {
30 | 		t.Fatalf("expected A as first candidate, got %#v", top)
31 | 	}
32 | }
33 | 


--------------------------------------------------------------------------------
/cluster/weights.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import "math"
 4 | 
 5 | const (
 6 | 	weightMax = 1_000_000
 7 | 	memRef    = 8 << 30 // 8 GiB reference for normalization
 8 | )
 9 | 
10 | // computeWeight converts node load into a rendezvous weight (1..1_000_000).
11 | // Higher free memory and lower CPU/evictions/size produce larger weights.
12 | func computeWeight(load NodeLoad) uint64 {
13 | 	var fm float64 = 0.5
14 | 	if load.FreeMemBytes > 0 {
15 | 		fm = clamp(float64(load.FreeMemBytes) / float64(memRef))
16 | 	}
17 | 
18 | 	cpu := 0.5
19 | 	if load.CPUu16 > 0 {
20 | 		cpu = 1.0 - clamp(float64(load.CPUu16)/10000.0)
21 | 	}
22 | 
23 | 	ev := 1.0 / (1.0 + float64(load.Evictions))
24 | 	sz := 1.0 / (1.0 + clamp(float64(load.Size)/1_000_000.0))
25 | 	s := fm*0.35 + cpu*0.35 + ev*0.2 + sz*0.1
26 | 	w := uint64(s * weightMax)
27 | 	if w < 1 {
28 | 		w = 1
29 | 	}
30 | 	return w
31 | }
32 | func clamp(v float64) float64 {
33 | 	return math.Max(0, math.Min(1, v))
34 | }
35 | 


--------------------------------------------------------------------------------
/cluster/bufpool_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestBufPoolGetPut(t *testing.T) {
 6 | 	bp := newBufPool([]int{64, 128})
 7 | 
 8 | 	// small buffer uses 64 bucket
 9 | 	b := bp.get(50)
10 | 	if len(b) != 50 || cap(b) != 64 {
11 | 		t.Fatalf("unexpected buf: len=%d cap=%d", len(b), cap(b))
12 | 	}
13 | 	bp.put(b)
14 | 
15 | 	// large buffer gets exact allocation (> largest bucket)
16 | 	big := bp.get(256)
17 | 	if len(big) != 256 || cap(big) != 256 {
18 | 		t.Fatalf("unexpected big buf: len=%d cap=%d", len(big), cap(big))
19 | 	}
20 | }
21 | 
22 | func TestBufPoolClass(t *testing.T) {
23 | 	bp := newBufPool([]int{64, 128})
24 | 	if got := bp.class(1); got != 0 {
25 | 		t.Fatalf("class(1)=%d", got)
26 | 	}
27 | 	if got := bp.class(64); got != 0 {
28 | 		t.Fatalf("class(64)=%d", got)
29 | 	}
30 | 	if got := bp.class(65); got != 1 {
31 | 		t.Fatalf("class(65)=%d", got)
32 | 	}
33 | 	if got := bp.class(129); got != -1 {
34 | 		t.Fatalf("class(129)=%d", got)
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/cluster/hlc_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func TestHLCNextMonotonic(t *testing.T) {
 9 | 	h := newHLC(1)
10 | 	a := h.Next()
11 | 	b := h.Next()
12 | 	if b <= a {
13 | 		t.Fatalf("Next not monotonic: a=%d b=%d", a, b)
14 | 	}
15 | }
16 | 
17 | func TestHLCObserveRemoteAhead(t *testing.T) {
18 | 	h := newHLC(1)
19 | 	_ = h.Next() // initialize
20 | 	rp := time.Now().UnixMilli() + 5
21 | 	remote := packHLC(rp, 3, 0)
22 | 	h.Observe(remote)
23 | 	after := h.Next()
24 | 	ap, _ := unpackHLC(after)
25 | 	if ap < rp { // should catch up to remote physical time
26 | 		t.Fatalf("did not catch up: ap=%d rp=%d", ap, rp)
27 | 	}
28 | }
29 | 
30 | func TestHLCObserveRemoteBehindNoRegression(t *testing.T) {
31 | 	h := newHLC(1)
32 | 	a := h.Next()
33 | 	ap, _ := unpackHLC(a)
34 | 	// remote behind current physical time
35 | 	remote := packHLC(ap-10, 0, 0)
36 | 	h.Observe(remote)
37 | 	b := h.Next()
38 | 	if b <= a {
39 | 		t.Fatalf("regressed: a=%d b=%d", a, b)
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/errors.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"fmt"
 6 | )
 7 | 
 8 | var (
 9 | 	ErrCacheExists   = errors.New("cache already exists")
10 | 	ErrCacheNotFound = errors.New("cache not found")
11 | 	ErrTypeMismatch  = errors.New("cache type mismatch")
12 | 	ErrInvalidConfig = errors.New("invalid cache configuration")
13 | 	ErrCacheClosed   = errors.New("cache is closed")
14 | )
15 | 
16 | type CacheError struct {
17 | 	Op    string
18 | 	Name  string
19 | 	Cause error
20 | }
21 | 
22 | func (e *CacheError) Error() string {
23 | 	if e.Name != "" {
24 | 		return fmt.Sprintf("cache %s %s: %v", e.Op, e.Name, e.Cause)
25 | 	}
26 | 	return fmt.Sprintf("cache %s: %v", e.Op, e.Cause)
27 | }
28 | 
29 | func (e *CacheError) Unwrap() error {
30 | 	return e.Cause
31 | }
32 | 
33 | func newCacheError(op, name string, cause error) *CacheError {
34 | 	return &CacheError{
35 | 		Op:    op,
36 | 		Name:  name,
37 | 		Cause: cause,
38 | 	}
39 | }
40 | 
41 | func wrapError(op string, err error) *CacheError {
42 | 	return &CacheError{
43 | 		Op:    op,
44 | 		Cause: err,
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/cluster/bf_msg.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | // Digest for a key-hash prefix bucket; used to detect divergent buckets
 4 | // without transferring all keys.
 5 | type BucketDigest struct {
 6 | 	Prefix []byte `cbor:"p"` // first Depth bytes of key-hash (big-endian)
 7 | 	Count  uint32 `cbor:"c"`
 8 | 	Hash64 uint64 `cbor:"h"`
 9 | }
10 | 
11 | type MsgBackfillDigestReq struct {
12 | 	Base
13 | 	TargetID string `cbor:"tid"` // joiner ID
14 | 	Depth    uint8  `cbor:"d"`   // bytes of prefix (1..8)
15 | }
16 | 
17 | type MsgBackfillDigestResp struct {
18 | 	Base
19 | 	Depth     uint8          `cbor:"d"`
20 | 	Buckets   []BucketDigest `cbor:"b"`
21 | 	NotInRing bool           `cbor:"nr,omitempty"`
22 | }
23 | 
24 | type MsgBackfillKeysReq struct {
25 | 	Base
26 | 	TargetID string `cbor:"tid"` // joiner ID
27 | 	Prefix   []byte `cbor:"p"`   // len == Depth
28 | 	Limit    int    `cbor:"l"`   // page size
29 | 	Cursor   []byte `cbor:"u"`   // last 8B key-hash (big-endian) for pagination
30 | }
31 | 
32 | type MsgBackfillKeysResp struct {
33 | 	Base
34 | 	Items      []KV   `cbor:"i"`
35 | 	NextCursor []byte `cbor:"u"` // nil when done
36 | 	Done       bool   `cbor:"o"`
37 | 	NotInRing  bool   `cbor:"nr,omitempty"`
38 | }
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         os: [ubuntu-latest, macos-latest]
15 |         go-version: ['1.21', '1.22']
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 | 
20 |     - name: Set up Go
21 |       uses: actions/setup-go@v4
22 |       with:
23 |         go-version: ${{ matrix.go-version }}
24 | 
25 |     - name: Cache Go modules
26 |       uses: actions/cache@v4
27 |       with:
28 |         path: |
29 |           ~/.cache/go-build
30 |           ~/go/pkg/mod
31 |         key: ${{ runner.os }}-go-${{ matrix.go-version }}-${{ hashFiles('**/go.sum') }}
32 |         restore-keys: |
33 |           ${{ runner.os }}-go-${{ matrix.go-version }}-
34 | 
35 |     - name: Format check
36 |       run: |
37 |         if [ "$(gofmt -s -l . | wc -l)" -gt 0 ]; then
38 |           echo "Code is not formatted:"
39 |           gofmt -s -l .
40 |           exit 1
41 |         fi
42 | 
43 |     - name: Lint
44 |       run: go vet ./...
45 | 
46 |     - name: Run tests
47 |       run: go test -v -race ./...
48 | 


--------------------------------------------------------------------------------
/cluster/membership_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func TestMembershipIntegrateAlivePrune(t *testing.T) {
 9 | 	m := newMembership()
10 | 	now := time.Now().UnixNano()
11 | 
12 | 	// integrate gossip from A, referencing B as known peer.
13 | 	m.integrate(NodeID("A"), "A", []PeerInfo{{ID: "B", Addr: "B"}}, map[string]int64{"B": now}, 10, now)
14 | 
15 | 	if m.epoch != 10 {
16 | 		t.Fatalf("epoch not updated: %d", m.epoch)
17 | 	}
18 | 
19 | 	al := m.alive(now, 1*time.Second)
20 | 	got := make(map[NodeID]bool)
21 | 	for _, nm := range al {
22 | 		got[nm.ID] = true
23 | 	}
24 | 
25 | 	if !got[NodeID("A")] || !got[NodeID("B")] {
26 | 		t.Fatalf("alive missing A or B: %+v", got)
27 | 	}
28 | 
29 | 	// lower epoch should not decrease stored epoch.
30 | 	m.integrate(NodeID("A"), "A", nil, nil, 5, now)
31 | 	if m.epoch != 10 {
32 | 		t.Fatalf("epoch regressed: %d", m.epoch)
33 | 	}
34 | 
35 | 	// make B stale and prune tombstones.
36 | 	m.mu.Lock()
37 | 	m.seen[NodeID("B")] = now - int64(10*time.Second)
38 | 	m.mu.Unlock()
39 | 	m.pruneTombstones(now, 5*time.Second)
40 | 
41 | 	m.mu.RLock()
42 | 	_, okB := m.peers[NodeID("B")]
43 | 	m.mu.RUnlock()
44 | 	if okB {
45 | 		t.Fatalf("expected B to be pruned")
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/cluster/weights_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestComputeWeightBounds(t *testing.T) {
 6 | 	w := computeWeight(NodeLoad{})
 7 | 	if w < 1 || w > weightMax {
 8 | 		t.Fatalf("weight out of bounds: %d", w)
 9 | 	}
10 | }
11 | 
12 | func TestComputeWeightSensitivity(t *testing.T) {
13 | 	base := NodeLoad{Size: 1_000_000, Evictions: 0, FreeMemBytes: 4 << 30, CPUu16: 5000}
14 | 	w1 := computeWeight(base)
15 | 
16 | 	// more free memory => higher weight
17 | 	w2 := computeWeight(NodeLoad{Size: base.Size, Evictions: base.Evictions, FreeMemBytes: 8 << 30, CPUu16: base.CPUu16})
18 | 	if w2 <= w1 {
19 | 		t.Fatalf("expected weight to increase with free memory: %d -> %d", w1, w2)
20 | 	}
21 | 
22 | 	// higher CPU usage => lower weight
23 | 	w3 := computeWeight(NodeLoad{Size: base.Size, Evictions: base.Evictions, FreeMemBytes: base.FreeMemBytes, CPUu16: 9000})
24 | 	if w3 >= w1 {
25 | 		t.Fatalf("expected weight to decrease with CPU load: %d -> %d", w1, w3)
26 | 	}
27 | 
28 | 	// more evictions => lower weight
29 | 	w4 := computeWeight(NodeLoad{Size: base.Size, Evictions: 100, FreeMemBytes: base.FreeMemBytes, CPUu16: base.CPUu16})
30 | 	if w4 >= w1 {
31 | 		t.Fatalf("expected weight to decrease with evictions: %d -> %d", w1, w4)
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/cluster/limiter.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | type rateLimiter struct {
 9 | 	mu       sync.Mutex
10 | 	max      int
11 | 	tokens   int
12 | 	interval time.Duration
13 | 	stopCh   chan struct{}
14 | }
15 | 
16 | // newRateLimiter implements a simple token bucket with fixed window refill.
17 | func newRateLimiter(max int, interval time.Duration) *rateLimiter {
18 | 	rl := &rateLimiter{
19 | 		max:      max,
20 | 		tokens:   max,
21 | 		interval: interval,
22 | 		stopCh:   make(chan struct{}),
23 | 	}
24 | 	go rl.refill()
25 | 	return rl
26 | }
27 | 
28 | // refill resets the available tokens to max at fixed intervals.
29 | func (r *rateLimiter) refill() {
30 | 	t := time.NewTicker(r.interval)
31 | 	defer t.Stop()
32 | 	for {
33 | 		select {
34 | 		case <-t.C:
35 | 			r.mu.Lock()
36 | 			r.tokens = r.max
37 | 			r.mu.Unlock()
38 | 		case <-r.stopCh:
39 | 			return
40 | 		}
41 | 	}
42 | }
43 | 
44 | // Allow consumes a token if available; returns false when rate-limited.
45 | func (r *rateLimiter) Allow() bool {
46 | 	r.mu.Lock()
47 | 	defer r.mu.Unlock()
48 | 	if r.tokens <= 0 {
49 | 		return false
50 | 	}
51 | 	r.tokens--
52 | 	return true
53 | }
54 | 
55 | // Stop terminates the refill goroutine.
56 | func (r *rateLimiter) Stop() { close(r.stopCh) }
57 | 


--------------------------------------------------------------------------------
/cluster/errors.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"io"
 6 | 	"net"
 7 | 	"syscall"
 8 | )
 9 | 
10 | const ErrNotFound = "notfound"
11 | 
12 | var (
13 | 	ErrNoOwner      = errors.New("no owner for key")
14 | 	ErrTimeout      = errors.New("timeout")
15 | 	ErrClosed       = errors.New("cluster closed")
16 | 	ErrBadPeer      = errors.New("bad peer response")
17 | 	ErrNoLoader     = errors.New("no loader configured on primary")
18 | 	ErrLeaseTimeout = errors.New("lease timeout")
19 | 	ErrPeerClosed   = errors.New("peer closed")
20 | )
21 | 
22 | // isFatalTransport reports whether an error indicates a broken or unusable
23 | // transport that should trigger a peer reset/redial.
24 | // Timeouts and application errors are considered non-fatal.
25 | func isFatalTransport(err error) bool {
26 | 	if err == nil {
27 | 		return false
28 | 	}
29 | 
30 | 	if errors.Is(err, ErrTimeout) {
31 | 		return false
32 | 	}
33 | 
34 | 	if errors.Is(err, ErrPeerClosed) || errors.Is(err, net.ErrClosed) || errors.Is(err, io.EOF) {
35 | 		return true
36 | 	}
37 | 
38 | 	var nerr net.Error
39 | 	if errors.As(err, &nerr) {
40 | 		return !nerr.Timeout()
41 | 	}
42 | 
43 | 	if errors.Is(err, syscall.ECONNRESET) || errors.Is(err, syscall.EPIPE) || errors.Is(err, syscall.ECONNABORTED) {
44 | 		return true
45 | 	}
46 | 	return false
47 | }
48 | 


--------------------------------------------------------------------------------
/cluster/lease_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"testing"
 6 | 	"time"
 7 | )
 8 | 
 9 | func TestLeaseTableAcquireReleaseWait(t *testing.T) {
10 | 	lt := newLeaseTable(200 * time.Millisecond)
11 | 	defer lt.Stop()
12 | 
13 | 	f1, acq1 := lt.acquire("k")
14 | 	if !acq1 || f1 == nil {
15 | 		t.Fatalf("expected first acquire to create inflight")
16 | 	}
17 | 
18 | 	f2, acq2 := lt.acquire("k")
19 | 	if acq2 || f2 == nil || f2 != f1 {
20 | 		t.Fatalf("expected second acquire to wait on same inflight")
21 | 	}
22 | 
23 | 	done := make(chan error, 1)
24 | 	go func() {
25 | 		done <- lt.wait(context.Background(), "k")
26 | 	}()
27 | 
28 | 	time.Sleep(20 * time.Millisecond)
29 | 	lt.release("k", nil)
30 | 	select {
31 | 	case err := <-done:
32 | 		if err != nil {
33 | 			t.Fatalf("unexpected wait error: %v", err)
34 | 		}
35 | 	case <-time.After(1 * time.Second):
36 | 		t.Fatalf("wait timed out")
37 | 	}
38 | }
39 | 
40 | func TestLeaseTableTimeout(t *testing.T) {
41 | 	lt := newLeaseTable(50 * time.Millisecond)
42 | 	defer lt.Stop()
43 | 
44 | 	_, acq := lt.acquire("x")
45 | 	if !acq {
46 | 		t.Fatalf("expected acquire")
47 | 	}
48 | 
49 | 	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
50 | 	defer cancel()
51 | 	err := lt.wait(ctx, "x")
52 | 	if err == nil {
53 | 		t.Fatalf("expected timeout error")
54 | 	}
55 | }
56 | 


--------------------------------------------------------------------------------
/cluster/codec_test.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestBytesCodecPassAndCopyOnDecode(t *testing.T) {
 9 | 	var bc BytesCodec
10 | 
11 | 	// encode should be pass-through
12 | 	v := []byte{1, 2, 3}
13 | 	enc, err := bc.Encode(v)
14 | 	if err != nil {
15 | 		t.Fatalf("encode error: %v", err)
16 | 	}
17 | 	v[0] = 9
18 | 	if enc[0] != 9 {
19 | 		t.Fatalf("encode not pass-through: got %v", enc)
20 | 	}
21 | 
22 | 	// decode should return a copy detached from input.
23 | 	in := []byte{4, 5, 6}
24 | 	out, err := bc.Decode(in)
25 | 	if err != nil {
26 | 		t.Fatalf("decode error: %v", err)
27 | 	}
28 | 	if !reflect.DeepEqual(out, in) {
29 | 		t.Fatalf("decode mismatch: got %v want %v", out, in)
30 | 	}
31 | 	in[0] = 7
32 | 	if out[0] == in[0] {
33 | 		t.Fatalf("decode did not copy. Out mutated: %v vs %v", out, in)
34 | 	}
35 | }
36 | 
37 | func TestCBORCodecRoundTrip(t *testing.T) {
38 | 	type S struct {
39 | 		A int
40 | 		B string
41 | 	}
42 | 	var c CBORCodec[S]
43 | 	orig := S{A: 42, B: "x"}
44 | 	b, err := c.Encode(orig)
45 | 	if err != nil {
46 | 		t.Fatalf("encode error: %v", err)
47 | 	}
48 | 	got, err := c.Decode(b)
49 | 	if err != nil {
50 | 		t.Fatalf("decode error: %v", err)
51 | 	}
52 | 	if !reflect.DeepEqual(got, orig) {
53 | 		t.Fatalf("round-trip mismatch: got %+v want %+v", got, orig)
54 | 	}
55 | }
56 | 


--------------------------------------------------------------------------------
/cluster/bufpool.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import "sync"
 4 | 
 5 | type bufPool struct {
 6 | 	sizes       []int
 7 | 	pools       []sync.Pool
 8 | 	indexBySize map[int]int
 9 | }
10 | 
11 | // newBufPool creates fixed-size byte slice pools for a small set of common
12 | // buffer sizes to reduce allocations on hot paths (framed I/O).
13 | func newBufPool(sizes []int) *bufPool {
14 | 	bp := &bufPool{
15 | 		sizes:       sizes,
16 | 		pools:       make([]sync.Pool, len(sizes)),
17 | 		indexBySize: make(map[int]int, len(sizes)),
18 | 	}
19 | 	for i, sz := range sizes {
20 | 		size := sz
21 | 		bp.pools[i].New = func() any {
22 | 			b := make([]byte, size)
23 | 			return b
24 | 		}
25 | 		bp.indexBySize[sz] = i
26 | 	}
27 | 	return bp
28 | }
29 | 
30 | // class returns the index of the first bucket that can hold n bytes.
31 | func (bp *bufPool) class(n int) int {
32 | 	for i, sz := range bp.sizes {
33 | 		if n <= sz {
34 | 			return i
35 | 		}
36 | 	}
37 | 	return -1
38 | }
39 | 
40 | // get returns a slice of length n from an appropriate bucket (or an exact
41 | // allocation if n exceeds the largest bucket size).
42 | func (bp *bufPool) get(n int) []byte {
43 | 	if i := bp.class(n); i >= 0 {
44 | 		b := bp.pools[i].Get().([]byte)
45 | 		// return a slice of length n, capacity bucket size.
46 | 		return b[:n]
47 | 	}
48 | 	// big frame (> largest bucket): allocate exact.
49 | 	return make([]byte, n)
50 | }
51 | 
52 | // put returns a buffer to the matching bucket by capacity. non-pooled sizes
53 | // are dropped on the floor to avoid unbounded pool growth.
54 | func (bp *bufPool) put(b []byte) {
55 | 	if i, ok := bp.indexBySize[cap(b)]; ok {
56 | 		// restore to full capacity before putting back.
57 | 		b = b[:bp.sizes[i]]
58 | 		bp.pools[i].Put(b)
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | GOCMD=go
 2 | GOBUILD=$(GOCMD) build
 3 | GOCLEAN=$(GOCMD) clean
 4 | GOTEST=$(GOCMD) test
 5 | GOGET=$(GOCMD) get
 6 | GOMOD=$(GOCMD) mod
 7 | GOFMT=$(GOCMD) fmt
 8 | GOVET=$(GOCMD) vet
 9 | GOLINT=golint
10 | 
11 | BINARY_NAME=kioshun
12 | PACKAGE_NAME=github.com/unkn0wn-root/kioshun
13 | 
14 | .PHONY: all
15 | all: test build
16 | 
17 | .PHONY: build
18 | build:
19 | 	$(GOBUILD) -v ./...
20 | 
21 | .PHONY: test
22 | test:
23 | 	$(GOTEST) -v -race -coverprofile=coverage.out ./...
24 | 
25 | .PHONY: bench-deps
26 | bench-deps:
27 | 	cd _benchmarks && $(GOMOD) tidy && $(GOMOD) download
28 | 
29 | .PHONY: bench-runner
30 | bench-runner: bench-deps
31 | 	cd _benchmarks && timeout 600 go run benchmark_runner.go
32 | 
33 | .PHONY: bench
34 | bench: bench-deps
35 | 	cd _benchmarks && $(GOTEST) -bench=. -benchmem -run=^$$ ./...
36 | 
37 | .PHONY: bench-full
38 | bench-full: bench-deps
39 | 	cd _benchmarks && $(GOTEST) -bench=. -benchmem -benchtime=10s -run=^$$ ./...
40 | 
41 | .PHONY: bench-compare
42 | bench-compare: bench-deps
43 | 	@echo "Running performance comparison..."
44 | 	cd _benchmarks && $(GOTEST) -bench=BenchmarkCacheShardComparison -benchmem -run=^$$ ./...
45 | 	cd _benchmarks && $(GOTEST) -bench=BenchmarkCacheEvictionPolicyComparison -benchmem -run=^$$ ./...
46 | 
47 | .PHONY: lint
48 | lint:
49 | 	$(GOVET) ./...
50 | 	$(GOLINT) ./...
51 | 
52 | .PHONY: fmt
53 | fmt:
54 | 	$(GOFMT) ./...
55 | 
56 | .PHONY: clean
57 | clean:
58 | 	$(GOCLEAN)
59 | 	rm -f $(BINARY_NAME)
60 | 
61 | .PHONY: tidy
62 | tidy:
63 | 	$(GOMOD) tidy
64 | 
65 | .PHONY: deps
66 | deps:
67 | 	$(GOMOD) download
68 | 
69 | .PHONY: check
70 | check: fmt lint test
71 | 
72 | .PHONY: stress-test
73 | stress-test: bench-deps
74 | 	@echo "Running stress test..."
75 | 	cd _benchmarks && $(GOTEST) -bench=BenchmarkCacheScalability -benchmem -benchtime=30s -run=^$$ ./...
76 | 
77 | .PHONY: mem-analysis
78 | mem-analysis: bench-deps
79 | 	@echo "Running memory usage analysis..."
80 | 	cd _benchmarks && $(GOTEST) -bench=BenchmarkCacheMemoryUsage -benchmem -run=^$$ ./...
81 | 
82 | .PHONY: install-tools
83 | install-tools:
84 | 	$(GOGET) -u golang.org/x/lint/golint
85 | 	$(GOGET) -u golang.org/x/tools/cmd/goimports
86 | 	$(GOGET) -u github.com/kisielk/errcheck
87 | 


--------------------------------------------------------------------------------
/_examples/basic/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"time"
 6 | 
 7 | 	"github.com/unkn0wn-root/kioshun"
 8 | )
 9 | 
10 | func main() {
11 | 	fmt.Println("=== Basic Cache Usage ===")
12 | 
13 | 	// Create a cache with default configuration
14 | 	cache := cache.NewWithDefaults[string, string]()
15 | 	defer cache.Close()
16 | 
17 | 	// Basic Set and Get operations
18 | 	fmt.Println("\n1. Basic Set and Get ops:")
19 | 	cache.Set("user:123", "David Kier", 5*time.Minute)
20 | 	cache.Set("user:456", "Michael Ballack", 5*time.Minute)
21 | 	cache.Set("user:789", "Cristiano Bombaldo", 5*time.Minute)
22 | 
23 | 	if value, found := cache.Get("user:123"); found {
24 | 		fmt.Printf("Found user: %s\n", value)
25 | 	}
26 | 
27 | 	// Get with TTL information
28 | 	fmt.Println("\n2. Get with TTL:")
29 | 	if value, ttl, found := cache.GetWithTTL("user:123"); found {
30 | 		fmt.Printf("User: %s, TTL remaining: %s\n", value, ttl)
31 | 	}
32 | 
33 | 	// Check existence without updating access time
34 | 	fmt.Println("\n3. Check existence:")
35 | 	if cache.Exists("user:123") {
36 | 		fmt.Println("User 123 exists in cache")
37 | 	}
38 | 
39 | 	// Delete operation
40 | 	fmt.Println("\n4. Delete ops:")
41 | 	if cache.Delete("user:456") {
42 | 		fmt.Println("User 456 deleted from cache")
43 | 	}
44 | 
45 | 	// Check size
46 | 	fmt.Println("\n5. Cache size:")
47 | 	fmt.Printf("Current cache size: %d\n", cache.Size())
48 | 
49 | 	// Get all keys
50 | 	fmt.Println("\n6. All keys:")
51 | 	keys := cache.Keys()
52 | 	for _, key := range keys {
53 | 		fmt.Printf("Key: %s\n", key)
54 | 	}
55 | 
56 | 	// Cache statistics
57 | 	fmt.Println("\n7. Cache statistics:")
58 | 	stats := cache.Stats()
59 | 	fmt.Printf("Hits: %d, Misses: %d, Size: %d, Hit Ratio: %.2f%%\n",
60 | 		stats.Hits, stats.Misses, stats.Size, stats.HitRatio*100)
61 | 
62 | 	// Set with callback on expiration
63 | 	fmt.Println("\n8. Set with expiration callback:")
64 | 	cache.SetWithCallback("temp:data", "temporary value", 2*time.Second, func(key string, value string) {
65 | 		fmt.Printf("Key %s expired with value: %s\n", key, value)
66 | 	})
67 | 
68 | 	// Wait for expiration
69 | 	time.Sleep(3 * time.Second)
70 | 
71 | 	// Clear all
72 | 	fmt.Println("\n9. Clear all:")
73 | 	cache.Clear()
74 | 	fmt.Printf("Cache size after clear: %d\n", cache.Size())
75 | 
76 | 	fmt.Println("\n=== Example completed ===")
77 | }
78 | 


--------------------------------------------------------------------------------
/cluster/keycodec.go:
--------------------------------------------------------------------------------
 1 | package cluster
 2 | 
 3 | import (
 4 | 	"encoding/binary"
 5 | 	"errors"
 6 | 
 7 | 	xxhash "github.com/cespare/xxhash/v2"
 8 | )
 9 | 
10 | // KeyCodec maps K <-> []byte for wire/hashing. Should be
11 | // stable across nodes. Optional KeyHasher allows zero-copy hash fast-paths.
12 | type KeyCodec[K any] interface {
13 | 	EncodeKey(K) []byte
14 | 	DecodeKey([]byte) (K, error)
15 | }
16 | 
17 | // KeyHasher optional fast-path (zero-copy hash of K).
18 | type KeyHasher[K any] interface {
19 | 	Hash64(K) uint64
20 | }
21 | 
22 | // String keys: encode to raw bytes; xxhash for hashing.
23 | type StringKeyCodec[K ~string] struct{}
24 | 
25 | func (StringKeyCodec[K]) EncodeKey(k K) []byte          { return []byte(string(k)) }
26 | func (StringKeyCodec[K]) DecodeKey(b []byte) (K, error) { return K(string(b)), nil }
27 | func (StringKeyCodec[K]) Hash64(k K) uint64             { return xxhash.Sum64String(string(k)) }
28 | 
29 | // Bytes keys: returns underlying slice. Decode copies to detach from caller.
30 | type BytesKeyCodec[K ~[]byte] struct{}
31 | 
32 | func (BytesKeyCodec[K]) EncodeKey(k K) []byte          { return []byte(k) }
33 | func (BytesKeyCodec[K]) DecodeKey(b []byte) (K, error) { return K(append([]byte(nil), b...)), nil }
34 | func (BytesKeyCodec[K]) Hash64(k K) uint64             { return xxhash.Sum64([]byte(k)) }
35 | 
36 | type Int64KeyCodec[K ~int64] struct{}
37 | 
38 | func (Int64KeyCodec[K]) EncodeKey(k K) []byte {
39 | 	var buf [8]byte
40 | 	binary.BigEndian.PutUint64(buf[:], uint64(k))
41 | 	return buf[:]
42 | }
43 | 
44 | func (Int64KeyCodec[K]) DecodeKey(b []byte) (K, error) {
45 | 	if len(b) != 8 {
46 | 		return *new(K), errors.New("invalid int64 key length")
47 | 	}
48 | 	return K(int64(binary.BigEndian.Uint64(b))), nil
49 | }
50 | 
51 | func (Int64KeyCodec[K]) Hash64(k K) uint64 {
52 | 	return mix64(uint64(k))
53 | }
54 | 
55 | type Uint64KeyCodec[K ~uint64] struct{}
56 | 
57 | func (Uint64KeyCodec[K]) EncodeKey(k K) []byte {
58 | 	var buf [8]byte
59 | 	binary.BigEndian.PutUint64(buf[:], uint64(k))
60 | 	return buf[:]
61 | }
62 | 
63 | func (Uint64KeyCodec[K]) DecodeKey(b []byte) (K, error) {
64 | 	if len(b) != 8 {
65 | 		return *new(K), errors.New("invalid uint64 key length")
66 | 	}
67 | 	return K(binary.BigEndian.Uint64(b)), nil
68 | }
69 | 
70 | func (Uint64KeyCodec[K]) Hash64(k K) uint64 {
71 | 	return mix64(uint64(k))
72 | }
73 | 


--------------------------------------------------------------------------------
/cluster/migrate.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"time"
  5 | 
  6 | 	cbor "github.com/fxamacker/cbor/v2"
  7 | )
  8 | 
  9 | // rebalancerLoop periodically runs a bounded rebalance pass that migrates
 10 | // locally owned-but-not-primary keys to their current primary owner.
 11 | func (n *Node[K, V]) rebalancerLoop() {
 12 | 	iv := n.cfg.RebalanceInterval
 13 | 	if iv <= 0 {
 14 | 		return
 15 | 	}
 16 | 
 17 | 	t := time.NewTicker(iv)
 18 | 	defer t.Stop()
 19 | 	for {
 20 | 		select {
 21 | 		case <-t.C:
 22 | 			n.rebalanceOnce()
 23 | 		case <-n.stop:
 24 | 			return
 25 | 		}
 26 | 	}
 27 | }
 28 | 
 29 | // rebalanceOnce scans up to RebalanceLimit local keys and, for keys whose
 30 | // primary owner moved away, pushes their latest value to the new primary and
 31 | // deletes the local copy on success.
 32 | func (n *Node[K, V]) rebalanceOnce() {
 33 | 	keys := n.local.Keys()
 34 | 	if len(keys) == 0 {
 35 | 		return
 36 | 	}
 37 | 
 38 | 	limit := n.cfg.RebalanceLimit
 39 | 	if limit <= 0 || limit > len(keys) {
 40 | 		limit = len(keys)
 41 | 	}
 42 | 
 43 | 	for i := 0; i < limit; i++ {
 44 | 		k := keys[i]
 45 | 		owners := n.ownersFor(k)
 46 | 		if len(owners) == 0 {
 47 | 			continue
 48 | 		}
 49 | 
 50 | 		primary := owners[0]
 51 | 		if primary.ID == n.cfg.ID {
 52 | 			continue
 53 | 		}
 54 | 
 55 | 		v, ttl, ok := n.local.GetWithTTL(k)
 56 | 		if !ok {
 57 | 			continue
 58 | 		}
 59 | 
 60 | 		// Encode + (maybe) compress once.
 61 | 		vb, err := n.codec.Encode(v)
 62 | 		if err != nil {
 63 | 			continue
 64 | 		}
 65 | 		vb, cp := n.maybeCompress(vb)
 66 | 
 67 | 		bk := n.kc.EncodeKey(k)
 68 | 		exp := absExpiry(ttl)
 69 | 		pc := n.getPeer(primary.ID)
 70 | 		if pc == nil || pc.penalized() {
 71 | 			// Let next pass try again; we keep local until success.
 72 | 			continue
 73 | 		}
 74 | 
 75 | 		id := n.nextReqID()
 76 | 		ver := n.clock.Next()
 77 | 		msg := &MsgSet{
 78 | 			Base: Base{
 79 | 				T:  MTSet,
 80 | 				ID: id,
 81 | 			},
 82 | 			Key: bk,
 83 | 			Val: vb,
 84 | 			Exp: exp,
 85 | 			Ver: ver,
 86 | 			Cp:  cp,
 87 | 		}
 88 | 
 89 | 		raw, err := pc.request(msg, id, n.cfg.Sec.WriteTimeout)
 90 | 		if err != nil {
 91 | 			if isFatalTransport(err) {
 92 | 				n.resetPeer(primary.ID)
 93 | 			}
 94 | 			continue
 95 | 		}
 96 | 
 97 | 		var resp MsgSetResp
 98 | 		if e := cbor.Unmarshal(raw, &resp); e != nil {
 99 | 			n.resetPeer(primary.ID)
100 | 			continue
101 | 		}
102 | 		if !resp.OK {
103 | 			continue
104 | 		}
105 | 		n.local.Delete(k)
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/cluster/keycodec_test.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"testing"
  6 | 
  7 | 	xxhash "github.com/cespare/xxhash/v2"
  8 | )
  9 | 
 10 | func TestStringKeyCodec(t *testing.T) {
 11 | 	var kc StringKeyCodec[string]
 12 | 	k := "hello"
 13 | 	b := kc.EncodeKey(k)
 14 | 	if string(b) != k {
 15 | 		t.Fatalf("encode mismatch: %q", string(b))
 16 | 	}
 17 | 
 18 | 	dk, err := kc.DecodeKey(b)
 19 | 	if err != nil {
 20 | 		t.Fatalf("decode error: %v", err)
 21 | 	}
 22 | 	if dk != k {
 23 | 		t.Fatalf("decode mismatch: %q != %q", dk, k)
 24 | 	}
 25 | 	if kc.Hash64(k) != xxhash.Sum64String(k) {
 26 | 		t.Fatalf("hash mismatch")
 27 | 	}
 28 | }
 29 | 
 30 | func TestBytesKeyCodec(t *testing.T) {
 31 | 	var kc BytesKeyCodec[[]byte]
 32 | 	in := []byte{1, 2, 3}
 33 | 	b := kc.EncodeKey(in)
 34 | 	if string(b) != string(in) {
 35 | 		t.Fatalf("encode mismatch")
 36 | 	}
 37 | 
 38 | 	dk, err := kc.DecodeKey(b)
 39 | 	if err != nil {
 40 | 		t.Fatalf("decode error: %v", err)
 41 | 	}
 42 | 
 43 | 	// decode should copy to detach from caller buffer.
 44 | 	b[0] = 9
 45 | 	if dk[0] == b[0] {
 46 | 		t.Fatalf("decode did not copy")
 47 | 	}
 48 | 
 49 | 	if kc.Hash64(in) != xxhash.Sum64(in) {
 50 | 		t.Fatalf("hash mismatch")
 51 | 	}
 52 | }
 53 | 
 54 | func TestInt64KeyCodec(t *testing.T) {
 55 | 	var kc Int64KeyCodec[int64]
 56 | 	k := int64(-1234567890)
 57 | 	b := kc.EncodeKey(k)
 58 | 	if len(b) != 8 {
 59 | 		t.Fatalf("encode length: %d", len(b))
 60 | 	}
 61 | 	got, err := kc.DecodeKey(b)
 62 | 	if err != nil {
 63 | 		t.Fatalf("decode error: %v", err)
 64 | 	}
 65 | 	if got != k {
 66 | 		t.Fatalf("round-trip mismatch: %d != %d", got, k)
 67 | 	}
 68 | 
 69 | 	// check big-endian layout
 70 | 	if binary.BigEndian.Uint64(b) != uint64(k) {
 71 | 		t.Fatalf("big-endian mismatch")
 72 | 	}
 73 | 
 74 | 	if _, err := kc.DecodeKey([]byte{1, 2}); err == nil {
 75 | 		t.Fatalf("expected length error")
 76 | 	}
 77 | 
 78 | 	if kc.Hash64(k) != mix64(uint64(k)) {
 79 | 		t.Fatalf("hash mismatch")
 80 | 	}
 81 | }
 82 | 
 83 | func TestUint64KeyCodec(t *testing.T) {
 84 | 	var kc Uint64KeyCodec[uint64]
 85 | 	k := uint64(0xdeadbeefcafebabe)
 86 | 	b := kc.EncodeKey(k)
 87 | 	if len(b) != 8 {
 88 | 		t.Fatalf("encode length: %d", len(b))
 89 | 	}
 90 | 
 91 | 	got, err := kc.DecodeKey(b)
 92 | 	if err != nil {
 93 | 		t.Fatalf("decode error: %v", err)
 94 | 	}
 95 | 	if got != k {
 96 | 		t.Fatalf("round-trip mismatch: %d != %d", got, k)
 97 | 	}
 98 | 
 99 | 	if _, err := kc.DecodeKey([]byte{1, 2, 3}); err == nil {
100 | 		t.Fatalf("expected length error")
101 | 	}
102 | 
103 | 	if kc.Hash64(k) != mix64(k) {
104 | 		t.Fatalf("hash mismatch")
105 | 	}
106 | }
107 | 


--------------------------------------------------------------------------------
/snapshot.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | import (
 4 | 	"sync/atomic"
 5 | 	"time"
 6 | )
 7 | 
 8 | // Item is a wire-friendly export with absolute expiry.
 9 | // NOTE:
10 | //   - Version here is NOT the cluster LWW/HLC version. Export() currently uses a
11 | //     placeholder (frequency) which is suitable for application-level cache dumps
12 | //     and warm starts, but not for cluster snapshots.
13 | //   - Cluster replication/backfill paths supply their own authoritative versions
14 | //     and call InMemoryCache.Import directly with those values.
15 | type Item[K comparable, V any] struct {
16 | 	Key       K
17 | 	Val       V
18 | 	ExpireAbs int64  // 0 = no expiration
19 | 	Version   uint64 // reserved for LWW if you add a real version later
20 | }
21 | 
22 | // Export up to max items for which selectFn(key) is true.
23 | // Intended for application-level dump/restore. Not used by cluster state
24 | // transfer, because it does not carry the cluster's LWW versions.
25 | func (c *InMemoryCache[K, V]) Export(selectFn func(K) bool, mx int) []Item[K, V] {
26 | 	out := make([]Item[K, V], 0, mx)
27 | outer:
28 | 	for _, s := range c.shards {
29 | 		s.mu.RLock()
30 | 		now := time.Now().UnixNano()
31 | 		for k, it := range s.data {
32 | 			if mx > 0 && len(out) >= mx {
33 | 				s.mu.RUnlock()
34 | 				break outer
35 | 			}
36 | 			if it.expireTime > 0 && now > it.expireTime {
37 | 				continue
38 | 			}
39 | 			if !selectFn(k) {
40 | 				continue
41 | 			}
42 | 			out = append(out, Item[K, V]{
43 | 				Key:       k,
44 | 				Val:       it.value,
45 | 				ExpireAbs: it.expireTime,
46 | 				Version:   uint64(it.frequency), // placeholder
47 | 			})
48 | 		}
49 | 		s.mu.RUnlock()
50 | 	}
51 | 	return out
52 | }
53 | 
54 | // Import inserts/overwrites with absolute expiry. Cluster replication,
55 | // backfill, and rebalancing use this to apply authoritative state (including
56 | // LWW versions) without admission/eviction decisions.
57 | func (c *InMemoryCache[K, V]) Import(items []Item[K, V]) {
58 | 	now := time.Now().UnixNano()
59 | 	for _, it := range items {
60 | 		s := c.getShard(it.Key)
61 | 		s.mu.Lock()
62 | 		ex, ok := s.data[it.Key]
63 | 		if !ok {
64 | 			ex = c.itemPool.Get().(*cacheItem[V])
65 | 			s.data[it.Key] = ex
66 | 			s.addToLRUHead(ex)
67 | 			atomic.AddInt64(&s.size, 1)
68 | 		} else if c.config.EvictionPolicy == LFU {
69 | 			s.lfuList.remove(ex)
70 | 		}
71 | 		ex.key, ex.value = it.Key, it.Val
72 | 		ex.lastAccess = now
73 | 		ex.expireTime = it.ExpireAbs
74 | 		switch c.config.EvictionPolicy {
75 | 		case LRU:
76 | 			s.moveToLRUHead(ex)
77 | 		case LFU:
78 | 			ex.frequency = 1
79 | 			s.lfuList.add(ex)
80 | 		case AdmissionLFU:
81 | 			ex.frequency = 1
82 | 		}
83 | 		s.mu.Unlock()
84 | 	}
85 | }
86 | 


--------------------------------------------------------------------------------
/benchmarks/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/allegro/bigcache/v3 v3.1.0 h1:H2Vp8VOvxcrB91o86fUSVJFqeuz8kpyyB02eH3bSzwk=
 2 | github.com/allegro/bigcache/v3 v3.1.0/go.mod h1:aPyh7jEvrog9zAwx5N7+JUQX5dZTSGpxF1LAR4dr35I=
 3 | github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 4 | github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
 5 | github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 6 | github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M=
 7 | github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk=
 8 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 9 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
10 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
11 | github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWajOK8=
12 | github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
13 | github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
14 | github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
15 | github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
16 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
17 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
18 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
19 | github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
20 | github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
21 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
22 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
23 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
24 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
25 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
26 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
27 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
28 | golang.org/x/sys v0.0.0-20221010170243-090e33056c14 h1:k5II8e6QD8mITdi+okbbmR/cIyEbeXLBhy5Ha4nevyc=
29 | golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
30 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
31 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
32 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
33 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/README.md:
--------------------------------------------------------------------------------
 1 | Mesh Bench: 3‑Node Kioshun Cluster Load Test
 2 | 
 3 | Overview
 4 | - Spins up a 3‑node Kioshun mesh cluster (no DB) and a separate runner that issues massive concurrent GET/SET operations.
 5 | - Measures p50/p95/p99 latencies for GET and SET, logs HIT/MISS (local vs remote), and performs integrity checks to ensure the cluster never returns the wrong object for a key.
 6 | 
 7 | Quick Start
 8 | - docker compose -f _benchmarks/cluster/docker-compose.yml up --build
 9 | 
10 | Services
11 | - node1/node2/node3: Minimal HTTP wrappers around a Kioshun cluster node.
12 |   - Endpoints:
13 |     - GET /get?k=KEY → 200 with value on hit, 404 on miss. Headers: X-Cache=HIT_LOCAL|HIT_REMOTE|MISS
14 |     - POST /set JSON {"k":"KEY","v":"VALUE","ttl_ms":0} → 200
15 |     - GET /stats → local shard stats
16 | - runner: Generates high concurrency load and prints percentile latencies and hit ratios.
17 | - direct-runner: Starts 3 kioshun nodes in-process and drives load via the Node API (no HTTP); useful to compare protocol-only performance vs Redis.
18 | 
19 | Runner Env Vars
20 | - TARGETS: Comma-separated list of node URLs (default: http://node1:8081,http://node2:8082,http://node3:8083)
21 | - DURATION: Test duration (default: 60s)
22 | - CONCURRENCY: Number of goroutines (default: 512)
23 | - KEYS: Key space size (default: 50000)
24 | - SET_RATIO: Percentage of SET ops (0..100, default: 10)
25 | - LOG_EVERY: Log every N ops per worker (default: 0 = disable)
26 | - STATS_EVERY: Print aggregated node /stats every interval (e.g., 10s). Empty disables.
27 | - SET_TTL_MS: TTL used for all SETs. Use -1 for no expiration across all replicas; positive ms for fixed TTL; avoid 0 if you want consistent TTL across owners.
28 | 
29 | Failure Injection (optional)
30 | - KILL_MODE: none | random | target (default: none)
31 | - KILL_AFTER: when to kill (duration, e.g., 45s)
32 | - KILL_TARGET: base URL of node to kill when mode=target (e.g., http://node2:8082)
33 | - KILL_TOKEN: shared token passed to /kill to authorize
34 | 
35 | Node Env Vars
36 | - ALLOW_KILL: enable /kill endpoint (default: false)
37 | - KILL_TOKEN: required token to authorize /kill (optional, recommended)
38 |   
39 | Read/Write/Failure Tuning (per node)
40 | - REPLICATION_FACTOR: owners per key (default 3)
41 | - WRITE_CONCERN: acks required (default 2)
42 | - READ_MAX_FANOUT: max parallel read legs (default 2)
43 | - READ_PER_TRY_MS: per-leg timeout (ms)
44 | - READ_HEDGE_DELAY_MS: delay before spinning hedges (ms)
45 | - READ_HEDGE_INTERVAL_MS: spacing between hedges (ms)
46 | - WRITE_TIMEOUT_MS: write timeout (ms)
47 | - READ_TIMEOUT_MS: read timeout (ms)
48 | - SUSPICION_AFTER_MS: suspect peer after (ms)
49 | - WEIGHT_UPDATE_MS: ring weight refresh interval (ms)
50 | - GOSSIP_INTERVAL_MS: gossip interval (ms)
51 | 
52 | Output
53 | - Prints total ops, hit/miss counts (local/remote), and p50/p95/p99 for GET/SET.
54 | - Performs periodic consistency checks across all nodes and flags mismatches.
55 | - Optionally prints aggregated node stats during the run and at the end.
56 | 
57 | Stopping
58 | - The runner traps SIGINT/SIGTERM and will always print the summary on exit. Use Ctrl+C safely.
59 | 
60 | Direct Runner
61 | - Build/run: docker compose -f _benchmarks/cluster/docker-compose.yml up --build direct
62 | - Env knobs (same as node tuning + workload): DURATION, CONCURRENCY, KEYS, SET_RATIO, SET_TTL_MS, KILL_AFTER, CACHE_AUTH, READ_* and *_MS vars.
63 | 


--------------------------------------------------------------------------------
/cluster/lease.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync"
  6 | 	"time"
  7 | )
  8 | 
  9 | type inflight struct {
 10 | 	ch   chan struct{}
 11 | 	err  error
 12 | 	exp  int64
 13 | 	done bool
 14 | }
 15 | 
 16 | // leaseTable provides per-key single-flight semantics with a TTL. The first
 17 | // goroutine acquires a lease and performs the work, others wait on the channel
 18 | // until the lease is released or times out.
 19 | type leaseTable struct {
 20 | 	mu     sync.Mutex
 21 | 	m      map[string]*inflight
 22 | 	ttl    time.Duration
 23 | 	stopCh chan struct{}
 24 | }
 25 | 
 26 | // newLeaseTable creates a per-key lease table with an optional TTL to break
 27 | // stuck leases. A background sweeper closes expired leases when ttl>0.
 28 | func newLeaseTable(ttl time.Duration) *leaseTable {
 29 | 	t := &leaseTable{
 30 | 		m:      make(map[string]*inflight),
 31 | 		ttl:    ttl,
 32 | 		stopCh: make(chan struct{}),
 33 | 	}
 34 | 	if ttl > 0 {
 35 | 		go t.sweeper()
 36 | 	}
 37 | 	return t
 38 | }
 39 | 
 40 | // acquire obtains a lease for key if none exists and returns (lease, true).
 41 | // When a lease already exists, returns the existing lease and false.
 42 | func (t *leaseTable) acquire(key string) (*inflight, bool) {
 43 | 	t.mu.Lock()
 44 | 	if f, ok := t.m[key]; ok {
 45 | 		t.mu.Unlock()
 46 | 		return f, false
 47 | 	}
 48 | 	f := &inflight{ch: make(chan struct{})}
 49 | 	if t.ttl > 0 {
 50 | 		f.exp = time.Now().Add(t.ttl).UnixNano()
 51 | 	}
 52 | 	t.m[key] = f
 53 | 	t.mu.Unlock()
 54 | 	return f, true
 55 | }
 56 | 
 57 | // release removes the lease and notifies waiters with the provided error.
 58 | func (t *leaseTable) release(key string, err error) {
 59 | 	t.mu.Lock()
 60 | 	f, ok := t.m[key]
 61 | 	if ok {
 62 | 		delete(t.m, key)
 63 | 	}
 64 | 	// close channel under the lock to avoid double-close with sweeper
 65 | 	if ok && !f.done {
 66 | 		f.err = err
 67 | 		f.done = true
 68 | 		close(f.ch)
 69 | 	}
 70 | 	t.mu.Unlock()
 71 | }
 72 | 
 73 | // wait blocks until the lease for key completes or ctx is done, returning
 74 | // the terminal error set by the releaser (nil on success).
 75 | func (t *leaseTable) wait(ctx context.Context, key string) error {
 76 | 	t.mu.Lock()
 77 | 	f := t.m[key]
 78 | 	t.mu.Unlock()
 79 | 	if f == nil {
 80 | 		return nil
 81 | 	}
 82 | 	select {
 83 | 	case <-ctx.Done():
 84 | 		return ctx.Err()
 85 | 	case <-f.ch:
 86 | 		return f.err
 87 | 	}
 88 | }
 89 | 
 90 | // sweeper periodically scans for and force-closes expired leases to prevent
 91 | // indefinite blocking when holders crash or hang.
 92 | func (t *leaseTable) sweeper() {
 93 | 	period := t.ttl / 2
 94 | 	if period <= 0 {
 95 | 		period = 10 * time.Millisecond
 96 | 	}
 97 | 	tick := time.NewTicker(period)
 98 | 	defer tick.Stop()
 99 | 	for {
100 | 		select {
101 | 		case <-tick.C:
102 | 			now := time.Now().UnixNano()
103 | 			t.mu.Lock()
104 | 			for k, f := range t.m {
105 | 				if f.exp > 0 && now >= f.exp {
106 | 					delete(t.m, k)
107 | 					if !f.done {
108 | 						f.err = ErrLeaseTimeout
109 | 						f.done = true
110 | 						close(f.ch)
111 | 					}
112 | 				}
113 | 			}
114 | 			t.mu.Unlock()
115 | 		case <-t.stopCh:
116 | 			return
117 | 		}
118 | 	}
119 | }
120 | 
121 | // Stop shuts down the sweeper goroutine.
122 | func (t *leaseTable) Stop() {
123 | 	select {
124 | 	case <-t.stopCh:
125 | 		return
126 | 	default:
127 | 		close(t.stopCh)
128 | 	}
129 | }
130 | 


--------------------------------------------------------------------------------
/cluster/hlc.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"time"
  6 | )
  7 | 
  8 | const (
  9 | 	hlcLogicalBits = 16     // total logical bits (low end of the 64-bit HLC)
 10 | 	hlcNodeBits    = 8      // low bits reserved for nodeID (0..255)
 11 | 	hlcSeqBits     = 16 - 8 // remaining logical bits for per-ms sequence
 12 | 	hlcNodeMask    = (1 << hlcNodeBits) - 1
 13 | 	hlcSeqMask     = (1 << hlcSeqBits) - 1
 14 | )
 15 | 
 16 | // hlc is a 64-bit Hybrid Logical Clock:
 17 | // layout:
 18 | // [48 bits physical millis][hlcSeqBits seq][hlcNodeBits nodeID].
 19 | type hlc struct {
 20 | 	mu     sync.Mutex
 21 | 	physMS int64
 22 | 	seq    uint16
 23 | 	nodeID uint16
 24 | }
 25 | 
 26 | // newHLC constructs an HLC that embeds a per-node ID in the low logical bits.
 27 | // nodeID is masked to hlcNodeBits (e.g., 8 bits -> 0..255).
 28 | func newHLC(nodeID uint16) *hlc {
 29 | 	return &hlc{nodeID: nodeID & hlcNodeMask}
 30 | }
 31 | 
 32 | // Next returns a strictly monotonic timestamp for local events.
 33 | // yields strictly monotonic timestamps for local events.
 34 | func (h *hlc) Next() uint64 {
 35 | 	now := time.Now().UnixMilli()
 36 | 
 37 | 	h.mu.Lock()
 38 | 	defer h.mu.Unlock()
 39 | 
 40 | 	if now > h.physMS {
 41 | 		h.physMS = now
 42 | 		h.seq = 0
 43 | 	} else {
 44 | 		if h.seq < hlcSeqMask {
 45 | 			h.seq++
 46 | 		} else {
 47 | 			h.physMS++
 48 | 			h.seq = 0
 49 | 		}
 50 | 	}
 51 | 	v := packHLC(h.physMS, h.seq, h.nodeID)
 52 | 	return v
 53 | }
 54 | 
 55 | // Observe incorporates a remote HLC into our state to avoid regressions.
 56 | // After Observe(remote), a subsequent Next() will be strictly > remote (monotonic).
 57 | func (h *hlc) Observe(remote uint64) {
 58 | 	rp, rlog := unpackHLC(remote)
 59 | 	rseq, _ := splitLogical(rlog)
 60 | 	now := time.Now().UnixMilli()
 61 | 
 62 | 	h.mu.Lock()
 63 | 	defer h.mu.Unlock()
 64 | 
 65 | 	phys := maxOf(h.physMS, now, rp)
 66 | 
 67 | 	switch {
 68 | 	case phys == rp && phys == h.physMS:
 69 | 		target := h.seq
 70 | 		if rseq > target {
 71 | 			target = rseq
 72 | 		}
 73 | 
 74 | 		newSeq := target + 1
 75 | 		if newSeq > hlcSeqMask {
 76 | 			h.physMS = phys + 1
 77 | 			h.seq = 0
 78 | 		} else {
 79 | 			h.physMS = phys
 80 | 			h.seq = newSeq
 81 | 		}
 82 | 	case phys == rp && phys > h.physMS:
 83 | 		newSeq := rseq + 1
 84 | 		if newSeq > hlcSeqMask {
 85 | 			h.physMS = phys + 1
 86 | 			h.seq = 0
 87 | 		} else {
 88 | 			h.physMS = phys
 89 | 			h.seq = newSeq
 90 | 		}
 91 | 	case phys == h.physMS && phys > rp:
 92 | 		if h.seq < hlcSeqMask {
 93 | 			h.seq++
 94 | 		} else {
 95 | 			h.physMS++
 96 | 			h.seq = 0
 97 | 		}
 98 | 
 99 | 	default:
100 | 		h.physMS = phys
101 | 		h.seq = 0
102 | 	}
103 | 
104 | }
105 | 
106 | // packHLC encodes physical milliseconds and a combined (seq,nodeID) into a 64-bit HLC.
107 | func packHLC(physMS int64, seq uint16, nodeID uint16) uint64 {
108 | 	logical := ((seq & hlcSeqMask) << hlcNodeBits) | (nodeID & hlcNodeMask)
109 | 	return (uint64(physMS) << hlcLogicalBits) | uint64(logical)
110 | }
111 | 
112 | // unpackHLC decodes a 64-bit HLC into physical milliseconds and the 16-bit logical.
113 | func unpackHLC(ts uint64) (physMS int64, logical uint16) {
114 | 	return int64(ts >> hlcLogicalBits), uint16(ts & ((1 << hlcLogicalBits) - 1))
115 | }
116 | 
117 | // splitLogical splits the 16-bit logical field into (seq, nodeID).
118 | func splitLogical(logical uint16) (seq uint16, nodeID uint16) {
119 | 	seq = (logical >> hlcNodeBits) & hlcSeqMask
120 | 	nodeID = logical & hlcNodeMask
121 | 	return
122 | }
123 | 
124 | func maxOf(a, b, c int64) int64 {
125 | 	if b > a {
126 | 		a = b
127 | 	}
128 | 	if c > a {
129 | 		a = c
130 | 	}
131 | 	return a
132 | }
133 | 


--------------------------------------------------------------------------------
/cluster/membership.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"sync/atomic"
  6 | 	"time"
  7 | )
  8 | 
  9 | type membership struct {
 10 | 	mu    sync.RWMutex
 11 | 	peers map[NodeID]*nodeMeta
 12 | 	seen  map[NodeID]int64
 13 | 	epoch uint64
 14 | }
 15 | 
 16 | // newMembership creates an empty membership view with per-node metadata and
 17 | // last-seen timestamps used for liveness and ring construction.
 18 | func newMembership() *membership {
 19 | 	return &membership{
 20 | 		peers: make(map[NodeID]*nodeMeta),
 21 | 		seen:  make(map[NodeID]int64),
 22 | 	}
 23 | }
 24 | 
 25 | // snapshot returns copies of peers and seen maps along with the current epoch
 26 | // so callers can take a consistent view without holding locks.
 27 | func (m *membership) snapshot() (map[NodeID]*nodeMeta, map[NodeID]int64, uint64) {
 28 | 	m.mu.RLock()
 29 | 	defer m.mu.RUnlock()
 30 | 	p := make(map[NodeID]*nodeMeta, len(m.peers))
 31 | 	for k, v := range m.peers {
 32 | 		p[k] = v
 33 | 	}
 34 | 
 35 | 	s := make(map[NodeID]int64, len(m.seen))
 36 | 	for k, v := range m.seen {
 37 | 		s[k] = v
 38 | 	}
 39 | 	return p, s, m.epoch
 40 | }
 41 | 
 42 | // integrate merges gossip from a peer: updates address, seen timestamps,
 43 | // and tracks the highest epoch to detect cluster resyncs.
 44 | func (m *membership) integrate(from NodeID, addr string, peers []PeerInfo, seen map[string]int64, epoch uint64, now int64) {
 45 | 	m.mu.Lock()
 46 | 	defer m.mu.Unlock()
 47 | 	if epoch > m.epoch {
 48 | 		m.epoch = epoch
 49 | 	}
 50 | 
 51 | 	if _, ok := m.peers[from]; !ok {
 52 | 		m.peers[from] = newMeta(from, addr)
 53 | 	} else {
 54 | 		m.peers[from].Addr = addr
 55 | 	}
 56 | 
 57 | 	m.seen[from] = now
 58 | 
 59 | 	for _, p := range peers {
 60 | 		id := NodeID(p.ID)
 61 | 		if _, ok := m.peers[id]; !ok {
 62 | 			m.peers[id] = newMeta(id, p.Addr)
 63 | 		} else {
 64 | 			m.peers[id].Addr = p.Addr
 65 | 		}
 66 | 	}
 67 | 
 68 | 	// merge remote observations: keep the freshest timestamp per node.
 69 | 	for k, ts := range seen {
 70 | 		id := NodeID(k)
 71 | 		if old, ok := m.seen[id]; !ok || ts > old {
 72 | 			m.seen[id] = ts
 73 | 		}
 74 | 	}
 75 | }
 76 | 
 77 | // alive returns nodes not suspected within the given timeframe.
 78 | func (m *membership) alive(now int64, suspicionAfter time.Duration) []*nodeMeta {
 79 | 	m.mu.RLock()
 80 | 	defer m.mu.RUnlock()
 81 | 
 82 | 	out := make([]*nodeMeta, 0, len(m.peers))
 83 | 	threshold := now - suspicionAfter.Nanoseconds()
 84 | 	for id, meta := range m.peers {
 85 | 		if m.seen[id] >= threshold {
 86 | 			out = append(out, meta)
 87 | 		}
 88 | 	}
 89 | 	return out
 90 | }
 91 | 
 92 | // pruneTombstones removes nodes that have not been seen for tombstoneAfter.
 93 | func (m *membership) pruneTombstones(now int64, tombstoneAfter time.Duration) {
 94 | 	m.mu.Lock()
 95 | 	defer m.mu.Unlock()
 96 | 	threshold := now - tombstoneAfter.Nanoseconds()
 97 | 	for id := range m.peers {
 98 | 		if ts, ok := m.seen[id]; ok && ts < threshold {
 99 | 			delete(m.peers, id)
100 | 			delete(m.seen, id)
101 | 		}
102 | 	}
103 | }
104 | 
105 | // ensure ensures a node entry exists and bumps its seen timestamp to now.
106 | func (m *membership) ensure(id NodeID, addr string) {
107 | 	m.mu.Lock()
108 | 	defer m.mu.Unlock()
109 | 	if _, ok := m.peers[id]; !ok {
110 | 		m.peers[id] = newMeta(id, addr)
111 | 	}
112 | 	m.seen[id] = time.Now().UnixNano()
113 | }
114 | 
115 | // bumpEpoch increments the membership epoch to signal a topology change.
116 | func (m *membership) bumpEpoch() uint64 {
117 | 	m.mu.Lock()
118 | 	m.epoch++
119 | 	e := m.epoch
120 | 	m.mu.Unlock()
121 | 	return e
122 | }
123 | 
124 | // setWeight updates the rendezvous weight for a peer.
125 | func (m *membership) setWeight(id NodeID, weight uint64) {
126 | 	m.mu.RLock()
127 | 	if meta, ok := m.peers[id]; ok {
128 | 		atomic.StoreUint64(&meta.weight, weight)
129 | 	}
130 | 	m.mu.RUnlock()
131 | }
132 | 


--------------------------------------------------------------------------------
/cluster/wire.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | // CBOR-based wire protocol: frames carry a CBOR-encoded Base{T,ID} header
  4 | // followed by message-specific fields. Keys/values are byte slices; values
  5 | // may be gzip-compressed (Cp=true). LWW uses Ver and HLC.
  6 | 
  7 | type MsgType uint8
  8 | 
  9 | const (
 10 | 	MTHello MsgType = iota + 1
 11 | 	MTHelloResp
 12 | 	MTGet
 13 | 	MTGetResp
 14 | 	MTGetBulk
 15 | 	MTGetBulkResp
 16 | 	MTSet
 17 | 	MTSetResp
 18 | 	MTSetBulk
 19 | 	MTSetBulkResp
 20 | 	MTDelete
 21 | 	MTDeleteResp
 22 | 	MTLeaseLoad
 23 | 	MTLeaseLoadResp
 24 | 	MTMigratePull
 25 | 	MTGossip
 26 | 	MTBackfillDigestReq  MsgType = 200
 27 | 	MTBackfillDigestResp MsgType = 201
 28 | 	MTBackfillKeysReq    MsgType = 202
 29 | 	MTBackfillKeysResp   MsgType = 203
 30 | )
 31 | 
 32 | // PeerInfo advertises identity + current dialable address
 33 | type PeerInfo struct {
 34 | 	ID   string `cbor:"i"`
 35 | 	Addr string `cbor:"a"`
 36 | }
 37 | 
 38 | type Base struct {
 39 | 	T  MsgType `cbor:"t"`
 40 | 	ID uint64  `cbor:"id"`
 41 | }
 42 | 
 43 | type MsgHello struct {
 44 | 	Base
 45 | 	FromID   string `cbor:"fi"`
 46 | 	FromAddr string `cbor:"fa"`
 47 | 	Token    string `cbor:"tok"`
 48 | }
 49 | 
 50 | type MsgHelloResp struct {
 51 | 	Base
 52 | 	OK     bool   `cbor:"ok"`
 53 | 	PeerID string `cbor:"pi"`
 54 | 	Err    string `cbor:"err,omitempty"`
 55 | }
 56 | 
 57 | type MsgGet struct {
 58 | 	Base
 59 | 	Key []byte `cbor:"k"`
 60 | }
 61 | type MsgGetResp struct {
 62 | 	Base
 63 | 	Found bool   `cbor:"f"`
 64 | 	Val   []byte `cbor:"v"`
 65 | 	Exp   int64  `cbor:"e"`
 66 | 	Cp    bool   `cbor:"cp"`
 67 | 	Err   string `cbor:"err,omitempty"`
 68 | }
 69 | 
 70 | type MsgGetBulk struct {
 71 | 	Base
 72 | 	Keys [][]byte `cbor:"ks"`
 73 | }
 74 | 
 75 | type MsgGetBulkResp struct {
 76 | 	Base
 77 | 	Hits []bool   `cbor:"h"`
 78 | 	Vals [][]byte `cbor:"vs"`
 79 | 	Exps []int64  `cbor:"es"`
 80 | 	Cps  []bool   `cbor:"cps"`
 81 | 	Err  string   `cbor:"err,omitempty"`
 82 | }
 83 | 
 84 | type MsgSet struct {
 85 | 	Base
 86 | 	Key []byte `cbor:"k"`
 87 | 	Val []byte `cbor:"v"`
 88 | 	Exp int64  `cbor:"e"`
 89 | 	Ver uint64 `cbor:"ver"`
 90 | 	Cp  bool   `cbor:"cp"`
 91 | }
 92 | 
 93 | type MsgSetResp struct {
 94 | 	Base
 95 | 	OK  bool   `cbor:"ok"`
 96 | 	Err string `cbor:"err,omitempty"`
 97 | }
 98 | 
 99 | type KV struct {
100 | 	K   []byte `cbor:"k"`
101 | 	V   []byte `cbor:"v"`
102 | 	E   int64  `cbor:"e"`
103 | 	Ver uint64 `cbor:"ver"`
104 | 	Cp  bool   `cbor:"cp"`
105 | }
106 | 
107 | type MsgSetBulk struct {
108 | 	Base
109 | 	Items []KV `cbor:"items"`
110 | }
111 | 
112 | type MsgSetBulkResp struct {
113 | 	Base
114 | 	OK  bool   `cbor:"ok"`
115 | 	Err string `cbor:"err,omitempty"`
116 | }
117 | 
118 | type MsgDel struct {
119 | 	Base
120 | 	Key []byte `cbor:"k"`
121 | 	Ver uint64 `cbor:"ver"`
122 | }
123 | 
124 | type MsgDelResp struct {
125 | 	Base
126 | 	OK  bool   `cbor:"ok"`
127 | 	Err string `cbor:"err,omitempty"`
128 | }
129 | 
130 | type MsgLeaseLoad struct {
131 | 	Base
132 | 	Key []byte `cbor:"k"`
133 | }
134 | 
135 | type MsgLeaseLoadResp struct {
136 | 	Base
137 | 	Found bool   `cbor:"f"`
138 | 	Val   []byte `cbor:"v"`
139 | 	Exp   int64  `cbor:"e"`
140 | 	Cp    bool   `cbor:"cp"`
141 | 	Err   string `cbor:"err,omitempty"`
142 | }
143 | 
144 | type MsgGossip struct {
145 | 	Base
146 | 	FromID   string           `cbor:"fi"`
147 | 	FromAddr string           `cbor:"fa"`
148 | 	Seen     map[string]int64 `cbor:"sn"` // keys are peer IDs
149 | 	Peers    []PeerInfo       `cbor:"pe"` // ID + current address
150 | 	Load     NodeLoad         `cbor:"ld"`
151 | 	TopK     []HotKey         `cbor:"hh"`
152 | 	Epoch    uint64           `cbor:"ep"`
153 | }
154 | 
155 | type NodeLoad struct {
156 | 	Size         int64  `cbor:"sz"`
157 | 	Evictions    int64  `cbor:"ev"`
158 | 	FreeMemBytes uint64 `cbor:"fm"`
159 | 	CPUu16       uint16 `cbor:"cpu"`
160 | }
161 | 
162 | type HotKey struct {
163 | 	K []byte `cbor:"k"`
164 | 	C uint64 `cbor:"c"`
165 | }
166 | 


--------------------------------------------------------------------------------
/shard.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"sync/atomic"
  6 | )
  7 | 
  8 | // shard is a per-partition structure that confines contention; map/list mutations under mu, counters via atomics.
  9 | type shard[K comparable, V any] struct {
 10 | 	mu   sync.RWMutex
 11 | 	data map[K]*cacheItem[V]
 12 | 
 13 | 	// Intrusive LRU list sentinels (head.next = MRU, tail.prev = LRU).
 14 | 	// Invariant: head.prev == nil, tail.next == nil, and head↔…↔tail forms the chain.
 15 | 	head *cacheItem[V]
 16 | 	tail *cacheItem[V]
 17 | 
 18 | 	lfuList *lfuList[K, V] // Allocated only for pure LFU policy.
 19 | 
 20 | 	size        int64 // live items (atomic)
 21 | 	hits        int64 // per-shard hits (atomic)
 22 | 	misses      int64 // per-shard misses (atomic)
 23 | 	evictions   int64 // per-shard evictions (atomic)
 24 | 	expirations int64 // per-shard TTL expirations (atomic)
 25 | 
 26 | 	// AdmissionLFU-only: shard-local adaptive admission filter.
 27 | 	admission *adaptiveAdmissionFilter
 28 | 
 29 | 	// Observability: frequency of last evicted victim (AdmissionLFU).
 30 | 	lastVictimFrequency uint64
 31 | }
 32 | 
 33 | // initLRU sets up an empty LRU list with head/tail sentinels (no nil checks on operations).
 34 | func (s *shard[K, V]) initLRU() {
 35 | 	s.head = &cacheItem[V]{}
 36 | 	s.tail = &cacheItem[V]{}
 37 | 	// head <-> tail (empty)
 38 | 	s.head.next = s.tail
 39 | 	s.tail.prev = s.head
 40 | }
 41 | 
 42 | // addToLRUHead inserts item as MRU directly after head (O(1)).
 43 | func (s *shard[K, V]) addToLRUHead(item *cacheItem[V]) {
 44 | 	oldNext := s.head.next
 45 | 	// head -> item -> oldNext
 46 | 	s.head.next = item
 47 | 	item.next = oldNext
 48 | 	// head <- item <- oldNext
 49 | 	item.prev = s.head
 50 | 	oldNext.prev = item
 51 | }
 52 | 
 53 | // removeFromLRU unlinks item from the list by splicing neighbors; clears item links.
 54 | func (s *shard[K, V]) removeFromLRU(item *cacheItem[V]) {
 55 | 	// prev -> next (skip item)
 56 | 	if item.prev != nil {
 57 | 		item.prev.next = item.next
 58 | 	}
 59 | 	if item.next != nil {
 60 | 		item.next.prev = item.prev
 61 | 	}
 62 | 	item.prev = nil
 63 | 	item.next = nil
 64 | }
 65 | 
 66 | // moveToLRUHead promotes item to MRU unless already MRU (unlink then insert after head).
 67 | func (s *shard[K, V]) moveToLRUHead(item *cacheItem[V]) {
 68 | 	if s.head.next == item {
 69 | 		return
 70 | 	}
 71 | 	// Unlink from current position.
 72 | 	if item.prev != nil {
 73 | 		item.prev.next = item.next
 74 | 	}
 75 | 	if item.next != nil {
 76 | 		item.next.prev = item.prev
 77 | 	}
 78 | 
 79 | 	// Insert right after head.
 80 | 	oldNext := s.head.next
 81 | 	s.head.next = item
 82 | 	item.prev = s.head
 83 | 	item.next = oldNext
 84 | 	oldNext.prev = item
 85 | }
 86 | 
 87 | // cleanup removes expired items and halves per-item frequency for AdmissionLFU (cheap aging).
 88 | // Phase 1: collect expired keys and apply in-place aging under write lock.
 89 | // Phase 2: delete collected keys, unlink from structures, recycle nodes, update stats.
 90 | func (s *shard[K, V]) cleanup(now int64, evictionPolicy EvictionPolicy, itemPool *sync.Pool, statsEnabled bool) {
 91 | 	s.mu.Lock()
 92 | 	defer s.mu.Unlock()
 93 | 
 94 | 	var keysToDelete []K
 95 | 	for key, item := range s.data {
 96 | 		if item.expireTime > 0 && now > item.expireTime {
 97 | 			keysToDelete = append(keysToDelete, key)
 98 | 			continue
 99 | 		}
100 | 		// Lightweight aging (AdmissionLFU only) to prevent stale, permanently high frequencies.
101 | 		if evictionPolicy == AdmissionLFU && item.frequency > 1 {
102 | 			item.frequency >>= 1
103 | 		}
104 | 	}
105 | 
106 | 	// Destructive pass over collected keys (re-check existence under lock).
107 | 	for _, key := range keysToDelete {
108 | 		if item, exists := s.data[key]; exists {
109 | 			delete(s.data, key)
110 | 			s.removeFromLRU(item)
111 | 			if evictionPolicy == LFU {
112 | 				s.lfuList.remove(item)
113 | 			}
114 | 			itemPool.Put(item)
115 | 			atomic.AddInt64(&s.size, -1)
116 | 			if statsEnabled {
117 | 				atomic.AddInt64(&s.expirations, 1)
118 | 			}
119 | 		}
120 | 	}
121 | }
122 | 


--------------------------------------------------------------------------------
/hash_test.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | )
  7 | 
  8 | var testStrings = []string{
  9 | 	"a",                                  // 1 byte
 10 | 	"test",                               // 4 bytes
 11 | 	"testkey",                            // 7 bytes
 12 | 	"testkey1",                           // 8 bytes
 13 | 	"testkey12",                          // 9 bytes
 14 | 	"user:profile:12345",                 // 18 bytes
 15 | 	"cache:session:user:1234567890:data", // 34 bytes
 16 | 	"this:is:a:very:long:cache:key:that:represents:typical:usage:in:high:performance:systems", // 89 bytes
 17 | }
 18 | 
 19 | // Test that hash function produces consistent results
 20 | func TestHashConsistency(t *testing.T) {
 21 | 	h := newHasher[string]()
 22 | 
 23 | 	for _, str := range testStrings {
 24 | 		hash1 := h.hash(str)
 25 | 		hash2 := h.hash(str)
 26 | 
 27 | 		if hash1 != hash2 {
 28 | 			t.Errorf("Hash function not consistent for string %q: got %v and %v", str, hash1, hash2)
 29 | 		}
 30 | 	}
 31 | }
 32 | 
 33 | // Test that different strings produce different hashes (basic collision test)
 34 | func TestHashDistribution(t *testing.T) {
 35 | 	h := newHasher[string]()
 36 | 	hashes := make(map[uint64]string)
 37 | 
 38 | 	for _, str := range testStrings {
 39 | 		hash := h.hash(str)
 40 | 		if existing, exists := hashes[hash]; exists {
 41 | 			t.Errorf("Hash collision: %q and %q both hash to %v", str, existing, hash)
 42 | 		}
 43 | 		hashes[hash] = str
 44 | 	}
 45 | }
 46 | 
 47 | // Test integer hashing
 48 | func TestIntegerHashing(t *testing.T) {
 49 | 	h := newHasher[int]()
 50 | 
 51 | 	testInts := []int{0, 1, 42, 1000, -1, -42}
 52 | 	hashes := make(map[uint64]int)
 53 | 
 54 | 	for _, num := range testInts {
 55 | 		hash := h.hash(num)
 56 | 		if existing, exists := hashes[hash]; exists {
 57 | 			t.Errorf("Hash collision: %d and %d both hash to %v", num, existing, hash)
 58 | 		}
 59 | 		hashes[hash] = num
 60 | 	}
 61 | }
 62 | 
 63 | func TestHybridThreshold(t *testing.T) {
 64 | 	h := newHasher[string]()
 65 | 
 66 | 	shortString := "short"                                                       // 5 bytes - should use FNV
 67 | 	longString := "this_is_a_very_long_string_that_exceeds_the_threshold_length" // >32 bytes - should use xxHash
 68 | 
 69 | 	shortHash := h.hash(shortString)
 70 | 	longHash := h.hash(longString)
 71 | 
 72 | 	if shortHash == 0 || longHash == 0 {
 73 | 		t.Error("Hash functions should not produce zero hashes for non-empty strings")
 74 | 	}
 75 | 
 76 | 	if shortHash == longHash {
 77 | 		t.Error("Different strings should produce different hashes")
 78 | 	}
 79 | }
 80 | 
 81 | func BenchmarkHasherString(t *testing.B) {
 82 | 	h := newHasher[string]()
 83 | 
 84 | 	for _, str := range testStrings {
 85 | 		t.Run(fmt.Sprintf("len_%d", len(str)), func(b *testing.B) {
 86 | 			b.ResetTimer()
 87 | 			for i := 0; i < b.N; i++ {
 88 | 				_ = h.hash(str)
 89 | 			}
 90 | 		})
 91 | 	}
 92 | }
 93 | 
 94 | func BenchmarkRealisticWorkload(t *testing.B) {
 95 | 	h := newHasher[string]()
 96 | 
 97 | 	workloadKeys := []string{
 98 | 		"u:1",                           // Very short user ID
 99 | 		"user:1234",                     // Short user key
100 | 		"session:abc123def456",          // Medium session key
101 | 		"cache:user:profile:1234567890", // Long structured key
102 | 		"api:v1:endpoint:users:get:with:filters:and:pagination:page:1:limit:50", // Very long API key
103 | 	}
104 | 
105 | 	t.ResetTimer()
106 | 	for i := 0; i < t.N; i++ {
107 | 		key := workloadKeys[i%len(workloadKeys)]
108 | 		_ = h.hash(key)
109 | 	}
110 | }
111 | 
112 | func BenchmarkHashDistribution(t *testing.B) {
113 | 	h := newHasher[string]()
114 | 
115 | 	// Generate keys with common prefixes to test collision resistance
116 | 	keys := make([]string, 1000)
117 | 	for i := range keys {
118 | 		keys[i] = fmt.Sprintf("user:session:id:%d:data", i)
119 | 	}
120 | 
121 | 	t.ResetTimer()
122 | 	collisions := make(map[uint64]int)
123 | 	for i := 0; i < t.N && i < len(keys); i++ {
124 | 		hash := h.hash(keys[i])
125 | 		collisions[hash]++
126 | 	}
127 | }
128 | 


--------------------------------------------------------------------------------
/cluster/adapter.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"time"
  6 | 
  7 | 	cache "github.com/unkn0wn-root/kioshun"
  8 | )
  9 | 
 10 | // DistributedCache adapts a running Node to the cache.Cache interface so
 11 | // existing code using kioshun's single-node Cache can switch to the clustered
 12 | // backend without invasive changes. Methods that cannot be expressed
 13 | // cluster‑wide (e.g., Clear, Size, Stats) operate on the local shard only.
 14 | //
 15 | // This adapter exposes two usage styles:
 16 | //  1. Compatibility methods without context (Set/Get/Delete) that internally
 17 | //     use Node's configured timeouts (ReadTimeout/WriteTimeout).
 18 | //  2. Context-aware methods (SetCtx/GetCtx/DeleteCtx/GetOrLoadCtx) that accept
 19 | //     a caller-provided context and surface errors.
 20 | type DistributedCache[K comparable, V any] struct {
 21 | 	n *Node[K, V]
 22 | }
 23 | 
 24 | // NewDistributedCache wraps a started Node and returns a cache.Cache
 25 | // compatible adapter. Call node.Start() before using the adapter, and Stop()
 26 | // (or Close() on the adapter) during shutdown.
 27 | func NewDistributedCache[K comparable, V any](n *Node[K, V]) *DistributedCache[K, V] {
 28 | 	return &DistributedCache[K, V]{n: n}
 29 | }
 30 | 
 31 | // Alias for NewDistributedCache
 32 | func NewClient[K comparable, V any](n *Node[K, V]) *DistributedCache[K, V] {
 33 | 	return NewDistributedCache[K, V](n)
 34 | }
 35 | 
 36 | // getCtx returns a context with timeout derived from node security settings.
 37 | func (a *DistributedCache[K, V]) getCtx(write bool) (context.Context, context.CancelFunc) {
 38 | 	to := a.n.cfg.Sec.ReadTimeout
 39 | 	if write {
 40 | 		to = a.n.cfg.Sec.WriteTimeout
 41 | 	}
 42 | 	if to <= 0 {
 43 | 		to = 3 * time.Second
 44 | 	}
 45 | 	return context.WithTimeout(context.Background(), to)
 46 | }
 47 | 
 48 | // Set forwards to Node.Set with the configured write timeout.
 49 | func (a *DistributedCache[K, V]) Set(key K, value V, ttl time.Duration) error {
 50 | 	ctx, cancel := a.getCtx(true)
 51 | 	defer cancel()
 52 | 	return a.n.Set(ctx, key, value, ttl)
 53 | }
 54 | 
 55 | // SetCtx forwards to Node.Set using the provided context.
 56 | func (a *DistributedCache[K, V]) SetCtx(ctx context.Context, key K, value V, ttl time.Duration) error {
 57 | 	return a.n.Set(ctx, key, value, ttl)
 58 | }
 59 | 
 60 | // Get forwards to Node.Get with the configured read timeout.
 61 | func (a *DistributedCache[K, V]) Get(key K) (V, bool) {
 62 | 	ctx, cancel := a.getCtx(false)
 63 | 	defer cancel()
 64 | 	v, ok, err := a.n.Get(ctx, key)
 65 | 	if err != nil {
 66 | 		var zero V
 67 | 		return zero, false
 68 | 	}
 69 | 	return v, ok
 70 | }
 71 | 
 72 | func (a *DistributedCache[K, V]) GetCtx(ctx context.Context, key K) (V, bool, error) {
 73 | 	return a.n.Get(ctx, key)
 74 | }
 75 | 
 76 | func (a *DistributedCache[K, V]) Delete(key K) bool {
 77 | 	ctx, cancel := a.getCtx(true)
 78 | 	defer cancel()
 79 | 	return a.n.Delete(ctx, key) == nil
 80 | }
 81 | 
 82 | func (a *DistributedCache[K, V]) DeleteCtx(ctx context.Context, key K) error {
 83 | 	return a.n.Delete(ctx, key)
 84 | }
 85 | 
 86 | // GetOrLoadCtx delegates to Node.GetOrLoad, enabling single-flight loading via
 87 | // the Node's Lease table on the primary owner. This is the preferred interface
 88 | // for read-through caching at the application layer.
 89 | func (a *DistributedCache[K, V]) GetOrLoadCtx(ctx context.Context, key K, loader func(context.Context) (V, time.Duration, error)) (V, error) {
 90 | 	return a.n.GetOrLoad(ctx, key, loader)
 91 | }
 92 | 
 93 | // Clear clears only the local in-memory shard.
 94 | // This does not broadcast a cluster-wide clear.
 95 | // Callers requiring global invalidation should implement an explicit protocol at a higher layer.
 96 | func (a *DistributedCache[K, V]) Clear() { a.n.local.Clear() }
 97 | 
 98 | // Size returns the size of the local shard only.
 99 | func (a *DistributedCache[K, V]) Size() int64 { return a.n.local.Size() }
100 | 
101 | // Stats returns statistics from the local shard only.
102 | func (a *DistributedCache[K, V]) Stats() cache.Stats { return a.n.local.Stats() }
103 | 
104 | // Close stops the node and returns nil.
105 | func (a *DistributedCache[K, V]) Close() error { a.n.Stop(); return nil }
106 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_runner.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"os/exec"
  7 | 	"strings"
  8 | 	"time"
  9 | )
 10 | 
 11 | func main() {
 12 | 	fmt.Println("=== KIOSHUN Cache Benchmark Suite ===")
 13 | 	fmt.Println("Running benchmarks against popular Go caches")
 14 | 	fmt.Println()
 15 | 
 16 | 	benchmarks := []struct {
 17 | 		name        string
 18 | 		pattern     string
 19 | 		description string
 20 | 		benchtime   string
 21 | 	}{
 22 | 		{
 23 | 			name:        "Comparison - Set Operations",
 24 | 			pattern:     "BenchmarkCacheComparison_Set",
 25 | 			description: "Pure write performance comparison",
 26 | 			benchtime:   "5s",
 27 | 		},
 28 | 		{
 29 | 			name:        "Comparison - Get Operations",
 30 | 			pattern:     "BenchmarkCacheComparison_Get",
 31 | 			description: "Pure read performance comparison",
 32 | 			benchtime:   "5s",
 33 | 		},
 34 | 		{
 35 | 			name:        "Comparison - Mixed Operations",
 36 | 			pattern:     "BenchmarkCacheComparison_Mixed",
 37 | 			description: "Mixed read/write workload comparison",
 38 | 			benchtime:   "5s",
 39 | 		},
 40 | 		{
 41 | 			name:        "Comparison - High Contention",
 42 | 			pattern:     "BenchmarkCacheComparison_HighContention",
 43 | 			description: "High contention scenario comparison",
 44 | 			benchtime:   "5s",
 45 | 		},
 46 | 		{
 47 | 			name:        "Comparison - Read Heavy",
 48 | 			pattern:     "BenchmarkCacheComparison_ReadHeavy",
 49 | 			description: "Read-heavy workload comparison",
 50 | 			benchtime:   "3s",
 51 | 		},
 52 | 		{
 53 | 			name:        "Comparison - Write Heavy",
 54 | 			pattern:     "BenchmarkCacheComparison_WriteHeavy",
 55 | 			description: "Write-heavy workload comparison",
 56 | 			benchtime:   "3s",
 57 | 		},
 58 | 		{
 59 | 			name:        "Comparison - Close to Real World",
 60 | 			pattern:     "BenchmarkCacheComparison_RealWorldWorkload",
 61 | 			description: "Realistic workload patterns",
 62 | 			benchtime:   "3s",
 63 | 		},
 64 | 		{
 65 | 			name:        "Heavy Load Tests",
 66 | 			pattern:     "BenchmarkCacheHeavyLoad",
 67 | 			description: "Extreme load scenarios for kioshun",
 68 | 			benchtime:   "3s",
 69 | 		},
 70 | 		{
 71 | 			name:        "Contention Stress",
 72 | 			pattern:     "BenchmarkCacheContentionStress",
 73 | 			description: "High contention stress test for kioshun",
 74 | 			benchtime:   "3s",
 75 | 		},
 76 | 		{
 77 | 			name:        "Eviction Stress",
 78 | 			pattern:     "BenchmarkCacheEvictionStress",
 79 | 			description: "Heavy eviction testing for kioshun",
 80 | 			benchtime:   "3s",
 81 | 		},
 82 | 		{
 83 | 			name:        "Memory Pressure",
 84 | 			pattern:     "BenchmarkCacheMemoryPressure",
 85 | 			description: "Memory pressure testing for kioshun",
 86 | 			benchtime:   "3s",
 87 | 		},
 88 | 		{
 89 | 			name:        "Sharding Efficiency",
 90 | 			pattern:     "BenchmarkCacheShardingEfficiency",
 91 | 			description: "Sharding performance analysis for kioshun",
 92 | 			benchtime:   "3s",
 93 | 		},
 94 | 	}
 95 | 
 96 | 	totalStart := time.Now()
 97 | 
 98 | 	for i, bench := range benchmarks {
 99 | 		fmt.Printf("[%d/%d] %s\n", i+1, len(benchmarks), bench.name)
100 | 		fmt.Printf("Description: %s\n", bench.description)
101 | 		fmt.Printf("Running: go test -bench=%s -benchmem -benchtime=%s\n", bench.pattern, bench.benchtime)
102 | 		fmt.Println(strings.Repeat("-", 80))
103 | 
104 | 		start := time.Now()
105 | 
106 | 		cmd := exec.Command("go", "test", "-bench="+bench.pattern, "-benchmem", "-benchtime="+bench.benchtime, ".")
107 | 		cmd.Stdout = os.Stdout
108 | 		cmd.Stderr = os.Stderr
109 | 
110 | 		err := cmd.Run()
111 | 
112 | 		duration := time.Since(start)
113 | 
114 | 		if err != nil {
115 | 			fmt.Printf(" - Benchmark failed: %v\n", err)
116 | 		} else {
117 | 			fmt.Printf(" + Benchmark completed in %v\n", duration)
118 | 		}
119 | 
120 | 		fmt.Println()
121 | 	}
122 | 
123 | 	totalDuration := time.Since(totalStart)
124 | 	fmt.Printf("🏁 All benchmarks completed in %v\n", totalDuration)
125 | 	fmt.Println()
126 | 	fmt.Println("=== Summary ===")
127 | 	fmt.Println("The benchmarks compare kioshun cache against:")
128 | 	fmt.Println("- Ristretto (by Dgraph)")
129 | 	fmt.Println("- BigCache (by Allegro)")
130 | 	fmt.Println("- FreeCache (by Coocood)")
131 | 	fmt.Println("- Go-cache (by PatrickMN)")
132 | 	fmt.Println()
133 | 	fmt.Println("Key performance areas tested:")
134 | 	fmt.Println("- Pure read/write performance")
135 | 	fmt.Println("- Mixed workload scenarios")
136 | 	fmt.Println("- High contention handling")
137 | 	fmt.Println("- Memory efficiency")
138 | 	fmt.Println("- Eviction policy performance")
139 | 	fmt.Println("- Sharding effectiveness")
140 | 	fmt.Println("- Scalability under load")
141 | }
142 | 


--------------------------------------------------------------------------------
/lfu.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | // freqNode is a doubly-linked bucket for one exact frequency.
  4 | // non-sentinel empty buckets are removed eagerly so head.next is the current min.
  5 | type freqNode[K comparable, V any] struct {
  6 | 	freq  int64                      // exact frequency (>= 0); head sentinel uses 0
  7 | 	items map[*cacheItem[V]]struct{} // set of items at this frequency
  8 | 	prev  *freqNode[K, V]
  9 | 	next  *freqNode[K, V]
 10 | }
 11 | 
 12 | // lfuList is an LFU index of ascending-frequency buckets (sentinel head at freq==0).
 13 | // O(1) add/increment/remove; used under the shard lock.
 14 | type lfuList[K comparable, V any] struct {
 15 | 	head     *freqNode[K, V]
 16 | 	freqMap  map[int64]*freqNode[K, V]         // freq → bucket
 17 | 	itemFreq map[*cacheItem[V]]*freqNode[K, V] // item → bucket
 18 | }
 19 | 
 20 | // newLFUList creates a circular list with a freq==0 sentinel; sentinel holds no real items.
 21 | func newLFUList[K comparable, V any]() *lfuList[K, V] {
 22 | 	list := &lfuList[K, V]{
 23 | 		head:     &freqNode[K, V]{freq: 0, items: make(map[*cacheItem[V]]struct{})},
 24 | 		freqMap:  make(map[int64]*freqNode[K, V]),
 25 | 		itemFreq: make(map[*cacheItem[V]]*freqNode[K, V]),
 26 | 	}
 27 | 	list.head.next = list.head
 28 | 	list.head.prev = list.head
 29 | 	return list
 30 | }
 31 | 
 32 | // add inserts item with frequency=1 and indexes it.
 33 | func (l *lfuList[K, V]) add(item *cacheItem[V]) {
 34 | 	freq := int64(1)
 35 | 	item.frequency = freq
 36 | 
 37 | 	node := l.getOrCreateFreqNode(freq)
 38 | 	node.items[item] = struct{}{}
 39 | 	l.itemFreq[item] = node
 40 | }
 41 | 
 42 | // increment bumps item's frequency by 1, moves it to the correct bucket, and removes an empty old bucket.
 43 | func (l *lfuList[K, V]) increment(item *cacheItem[V]) {
 44 | 	cur := l.itemFreq[item]
 45 | 	if cur == nil {
 46 | 		// Item not indexed yet (defensive); treat as new with freq=1.
 47 | 		l.add(item)
 48 | 		return
 49 | 	}
 50 | 
 51 | 	newFreq := cur.freq + 1
 52 | 	delete(cur.items, item)
 53 | 
 54 | 	nxt := cur.next
 55 | 	var target *freqNode[K, V]
 56 | 	if nxt != l.head && nxt.freq == newFreq {
 57 | 		// Fast path: the next bucket already has the desired frequency.
 58 | 		target = nxt
 59 | 	} else {
 60 | 		// Create or find the exact bucket at newFreq right after 'cur'.
 61 | 		target = l.ensureIndex(cur, newFreq)
 62 | 	}
 63 | 	target.items[item] = struct{}{}
 64 | 	l.itemFreq[item] = target
 65 | 	item.frequency = newFreq
 66 | 
 67 | 	// Drop the old bucket if it is now empty (sentinel node is never removed).
 68 | 	if len(cur.items) == 0 && cur.freq != 0 {
 69 | 		l.removeFreqNode(cur)
 70 | 	}
 71 | }
 72 | 
 73 | // removeLFU removes and returns one item from the minimum-frequency bucket
 74 | // unlinks the bucket if it becomes empty.
 75 | func (l *lfuList[K, V]) removeLFU() *cacheItem[V] {
 76 | 	node := l.head.next
 77 | 	if node == l.head {
 78 | 		return nil // list is empty
 79 | 	}
 80 | 	// By invariant, non-sentinel buckets are never empty.
 81 | 	var victim *cacheItem[V]
 82 | 	for it := range node.items {
 83 | 		victim = it
 84 | 		break
 85 | 	}
 86 | 
 87 | 	delete(node.items, victim)
 88 | 	delete(l.itemFreq, victim)
 89 | 	if len(node.items) == 0 {
 90 | 		l.removeFreqNode(node)
 91 | 	}
 92 | 	return victim
 93 | }
 94 | 
 95 | // remove deletes a specific item from its bucket and removes the bucket if it becomes empty (non-sentinel).
 96 | func (l *lfuList[K, V]) remove(item *cacheItem[V]) {
 97 | 	node := l.itemFreq[item]
 98 | 	if node == nil {
 99 | 		return // item not tracked
100 | 	}
101 | 
102 | 	delete(node.items, item)
103 | 	delete(l.itemFreq, item)
104 | 
105 | 	if len(node.items) == 0 && node.freq != 0 {
106 | 		l.removeFreqNode(node)
107 | 	}
108 | }
109 | 
110 | // ensureIndex returns the bucket for freq, inserting a new bucket immediately after prev to keep ascending order.
111 | func (l *lfuList[K, V]) ensureIndex(prev *freqNode[K, V], freq int64) *freqNode[K, V] {
112 | 	// Exact-hit fast path via freqMap.
113 | 	if node, ok := l.freqMap[freq]; ok {
114 | 		return node
115 | 	}
116 | 
117 | 	newNode := &freqNode[K, V]{
118 | 		freq:  freq,
119 | 		items: make(map[*cacheItem[V]]struct{}),
120 | 	}
121 | 
122 | 	nxt := prev.next
123 | 	prev.next = newNode
124 | 	newNode.prev = prev
125 | 	newNode.next = nxt
126 | 	nxt.prev = newNode
127 | 
128 | 	l.freqMap[freq] = newNode
129 | 	return newNode
130 | }
131 | 
132 | // getOrCreateFreqNode returns the bucket for freq, inserting after freq-1 (or after head for freq==1).
133 | func (l *lfuList[K, V]) getOrCreateFreqNode(freq int64) *freqNode[K, V] {
134 | 	if freq == 1 {
135 | 		return l.ensureIndex(l.head, 1)
136 | 	}
137 | 	// Insert immediately after the previous frequency bucket.
138 | 	prev := l.freqMap[freq-1]
139 | 	return l.ensureIndex(prev, freq)
140 | }
141 | 
142 | // removeFreqNode unlinks an empty non-sentinel bucket and drops its freq map entry.
143 | func (l *lfuList[K, V]) removeFreqNode(node *freqNode[K, V]) {
144 | 	node.prev.next = node.next
145 | 	node.next.prev = node.prev
146 | 	delete(l.freqMap, node.freq)
147 | }
148 | 


--------------------------------------------------------------------------------
/manager.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | )
  7 | 
  8 | // GlobalManager holds global cache instances
  9 | var GlobalManager = NewManager()
 10 | 
 11 | // Manager manages multiple named cache instances with different configurations
 12 | type Manager struct {
 13 | 	caches   sync.Map // map of cache instances by name
 14 | 	configs  map[string]Config
 15 | 	configMu sync.RWMutex
 16 | }
 17 | 
 18 | // NewManager creates a new cache manager instance
 19 | func NewManager() *Manager {
 20 | 	return &Manager{
 21 | 		configs: make(map[string]Config),
 22 | 	}
 23 | }
 24 | 
 25 | // RegisterCache registers a configuration for a named cache.
 26 | // Returns an error if a configuration with the same name already exists.
 27 | func (m *Manager) RegisterCache(name string, config Config) error {
 28 | 	m.configMu.Lock()
 29 | 	defer m.configMu.Unlock()
 30 | 
 31 | 	if _, exists := m.configs[name]; exists {
 32 | 		return newCacheError("register", name, ErrCacheExists)
 33 | 	}
 34 | 
 35 | 	m.configs[name] = config
 36 | 	return nil
 37 | }
 38 | 
 39 | // GetCache retrieves an existing cache or creates a new one with the registered
 40 | // configuration. If no configuration is registered, uses DefaultConfig().
 41 | func GetCache[K comparable, V any](m *Manager, name string) (*InMemoryCache[K, V], error) {
 42 | 	// Fast path: return existing cache if found (most common case)
 43 | 	if cached, ok := m.caches.Load(name); ok {
 44 | 		if cache, ok := cached.(*InMemoryCache[K, V]); ok {
 45 | 			return cache, nil
 46 | 		}
 47 | 		return nil, newCacheError("get", name, ErrTypeMismatch)
 48 | 	}
 49 | 
 50 | 	m.configMu.RLock()
 51 | 	config, exists := m.configs[name]
 52 | 	m.configMu.RUnlock()
 53 | 
 54 | 	if !exists {
 55 | 		config = DefaultConfig()
 56 | 	}
 57 | 
 58 | 	// Slow path: create new cache
 59 | 	cache := New[K, V](config)
 60 | 	// Atomic LoadOrStore handles race condition where multiple goroutines
 61 | 	// attempt to create the same cache simultaneously
 62 | 	if actual, loaded := m.caches.LoadOrStore(name, cache); loaded {
 63 | 		// Another goroutine created the cache first
 64 | 		cache.Close()
 65 | 		// Return the winner's cache if types match
 66 | 		if existingCache, ok := actual.(*InMemoryCache[K, V]); ok {
 67 | 			return existingCache, nil
 68 | 		}
 69 | 		return nil, newCacheError("get", name, ErrTypeMismatch)
 70 | 	}
 71 | 
 72 | 	return cache, nil
 73 | }
 74 | 
 75 | // GetCacheStats returns performance statistics for all managed caches.
 76 | func (m *Manager) GetCacheStats() map[string]Stats {
 77 | 	stats := make(map[string]Stats)
 78 | 
 79 | 	m.caches.Range(func(key, value any) bool {
 80 | 		if name, ok := key.(string); ok {
 81 | 			if cache, ok := value.(interface{ Stats() Stats }); ok {
 82 | 				stats[name] = cache.Stats()
 83 | 			}
 84 | 		}
 85 | 		return true
 86 | 	})
 87 | 
 88 | 	return stats
 89 | }
 90 | 
 91 | // CloseAll closes all managed cache instances and returns any errors encountered.
 92 | // Uses two-phase approach: first close all caches, then clear the registry.
 93 | func (m *Manager) CloseAll() error {
 94 | 	var closeErrors []error
 95 | 
 96 | 	// Phase 1: Close all cache instances
 97 | 	m.caches.Range(func(key, value any) bool {
 98 | 		if cache, ok := value.(interface{ Close() error }); ok {
 99 | 			if err := cache.Close(); err != nil {
100 | 				if name, ok := key.(string); ok {
101 | 					closeErrors = append(closeErrors, newCacheError("close", name, err))
102 | 				} else {
103 | 					closeErrors = append(closeErrors, wrapError("close", err))
104 | 				}
105 | 			}
106 | 		}
107 | 		return true
108 | 	})
109 | 
110 | 	// Phase 2: Clear all registry entries
111 | 	m.caches.Range(func(key, _ any) bool {
112 | 		m.caches.Delete(key)
113 | 		return true
114 | 	})
115 | 
116 | 	if len(closeErrors) > 0 {
117 | 		return fmt.Errorf("errors closing caches: %v", closeErrors)
118 | 	}
119 | 
120 | 	return nil
121 | }
122 | 
123 | // RemoveCache removes and closes the named cache instance.
124 | // Removes from registry and cleans up both runtime and configuration state.
125 | func (m *Manager) RemoveCache(name string) error {
126 | 	if cached, ok := m.caches.LoadAndDelete(name); ok {
127 | 		if cache, ok := cached.(interface{ Close() error }); ok {
128 | 			return cache.Close()
129 | 		}
130 | 	}
131 | 
132 | 	m.configMu.Lock()
133 | 	delete(m.configs, name)
134 | 	m.configMu.Unlock()
135 | 
136 | 	return nil
137 | }
138 | 
139 | // RegisterGlobalCache registers a configuration in the global manager.
140 | func RegisterGlobalCache(name string, config Config) error {
141 | 	return GlobalManager.RegisterCache(name, config)
142 | }
143 | 
144 | // GetGlobalCache retrieves or creates a cache from the global manager.
145 | func GetGlobalCache[K comparable, V any](name string) (*InMemoryCache[K, V], error) {
146 | 	return GetCache[K, V](GlobalManager, name)
147 | }
148 | 
149 | // GetGlobalCacheStats returns stats for all caches in the global manager.
150 | func GetGlobalCacheStats() map[string]Stats {
151 | 	return GlobalManager.GetCacheStats()
152 | }
153 | 
154 | // CloseAllGlobalCaches closes all caches in the global manager.
155 | func CloseAllGlobalCaches() error {
156 | 	return GlobalManager.CloseAll()
157 | }
158 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | version: "3.9"
  2 | 
  3 | networks:
  4 |   mesh:
  5 |     driver: bridge
  6 | 
  7 | services:
  8 |   node1:
  9 |     build:
 10 |       context: ../..
 11 |       dockerfile: _benchmarks/cluster/node/Dockerfile
 12 |     container_name: kioshun-mesh-node1
 13 |     environment:
 14 |       - PORT=8081
 15 |       - CACHE_BIND=:5011
 16 |       - CACHE_PUBLIC=node1:5011
 17 |       - CACHE_SEEDS=node1:5011,node2:5012,node3:5013
 18 |       - CACHE_AUTH=supersecret
 19 |       - ALLOW_KILL=true
 20 |       - KILL_TOKEN=letmein
 21 |       # Optional tuning (uncomment/tune as needed)
 22 |       # - REPLICATION_FACTOR=3
 23 |       # - WRITE_CONCERN=2
 24 |       - READ_MAX_FANOUT=1
 25 |       # - READ_PER_TRY_MS=100
 26 |       - READ_HEDGE_DELAY_MS=2
 27 |       - READ_HEDGE_INTERVAL_MS=2
 28 |       - WRITE_TIMEOUT_MS=2000
 29 |       # - READ_TIMEOUT_MS=3000
 30 |       # - SUSPICION_AFTER_MS=1500
 31 |       - WEIGHT_UPDATE_MS=500
 32 |       - GOSSIP_INTERVAL_MS=300
 33 |     networks: [mesh]
 34 |     healthcheck:
 35 |       test: ["CMD-SHELL", "wget -qO- http://localhost:8081/ready >/dev/null 2>&1 || exit 1"]
 36 |       interval: 2s
 37 |       timeout: 1s
 38 |       retries: 30
 39 |     restart: unless-stopped
 40 | 
 41 |   node2:
 42 |     build:
 43 |       context: ../..
 44 |       dockerfile: _benchmarks/cluster/node/Dockerfile
 45 |     container_name: kioshun-mesh-node2
 46 |     environment:
 47 |       - PORT=8082
 48 |       - CACHE_BIND=:5012
 49 |       - CACHE_PUBLIC=node2:5012
 50 |       - CACHE_SEEDS=node1:5011,node2:5012,node3:5013
 51 |       - CACHE_AUTH=supersecret
 52 |       - ALLOW_KILL=true
 53 |       - KILL_TOKEN=letmein
 54 |       # Optional tuning (uncomment/tune as needed)
 55 |       # - REPLICATION_FACTOR=3
 56 |       # - WRITE_CONCERN=2
 57 |       - READ_MAX_FANOUT=1
 58 |       # - READ_PER_TRY_MS=100
 59 |       - READ_HEDGE_DELAY_MS=2
 60 |       - READ_HEDGE_INTERVAL_MS=2
 61 |       - WRITE_TIMEOUT_MS=2000
 62 |       # - READ_TIMEOUT_MS=3000
 63 |       # - SUSPICION_AFTER_MS=1500
 64 |       - WEIGHT_UPDATE_MS=500
 65 |       - GOSSIP_INTERVAL_MS=300
 66 |     networks: [mesh]
 67 |     healthcheck:
 68 |       test: ["CMD-SHELL", "wget -qO- http://localhost:8082/ready >/dev/null 2>&1 || exit 1"]
 69 |       interval: 2s
 70 |       timeout: 1s
 71 |       retries: 30
 72 |     restart: unless-stopped
 73 | 
 74 |   node3:
 75 |     build:
 76 |       context: ../..
 77 |       dockerfile: _benchmarks/cluster/node/Dockerfile
 78 |     container_name: kioshun-mesh-node3
 79 |     environment:
 80 |       - PORT=8083
 81 |       - CACHE_BIND=:5013
 82 |       - CACHE_PUBLIC=node3:5013
 83 |       - CACHE_SEEDS=node1:5011,node2:5012,node3:5013
 84 |       - CACHE_AUTH=supersecret
 85 |       - ALLOW_KILL=true
 86 |       - KILL_TOKEN=letmein
 87 |       # Optional tuning (uncomment/tune as needed)
 88 |       # - REPLICATION_FACTOR=3
 89 |       # - WRITE_CONCERN=2
 90 |       - READ_MAX_FANOUT=1
 91 |       # - READ_PER_TRY_MS=100
 92 |       - READ_HEDGE_DELAY_MS=2
 93 |       - READ_HEDGE_INTERVAL_MS=2
 94 |       - WRITE_TIMEOUT_MS=2000
 95 |       # - READ_TIMEOUT_MS=3000
 96 |       # - SUSPICION_AFTER_MS=1500
 97 |       - WEIGHT_UPDATE_MS=500
 98 |       - GOSSIP_INTERVAL_MS=300
 99 |     networks: [mesh]
100 |     healthcheck:
101 |       test: ["CMD-SHELL", "wget -qO- http://localhost:8083/ready >/dev/null 2>&1 || exit 1"]
102 |       interval: 2s
103 |       timeout: 1s
104 |       retries: 30
105 |     restart: unless-stopped
106 | 
107 |   runner:
108 |     build:
109 |       context: ../..
110 |       dockerfile: _benchmarks/cluster/runner/Dockerfile
111 |     container_name: kioshun-mesh-runner
112 |     environment:
113 |       - TARGETS=http://node1:8081,http://node2:8082,http://node3:8083
114 |       - DURATION=120s
115 |       - CONCURRENCY=256
116 |       - KEYS=50000
117 |       - SET_RATIO=30
118 |       - LOG_EVERY=0
119 |       - STATS_EVERY=10s
120 |       - SET_TTL_MS=-1
121 |       - KILL_MODE=none # default: clean run; set to "random" or "target" to enable failures
122 |       # - KILL_AFTER=45s
123 |       # - KILL_TARGET=http://node2:8082
124 |       # - KILL_TOKEN=letmein
125 |     depends_on:
126 |       node1:
127 |         condition: service_healthy
128 |       node2:
129 |         condition: service_healthy
130 |       node3:
131 |         condition: service_healthy
132 |     networks: [mesh]
133 |     restart: "no"
134 | 
135 |   direct:
136 |     build:
137 |       context: ../..
138 |       dockerfile: _benchmarks/cluster/direct/Dockerfile
139 |     container_name: kioshun-direct-runner
140 |     environment:
141 |       - DURATION=120s
142 |       - CONCURRENCY=256
143 |       - KEYS=50000
144 |       - SET_RATIO=30
145 |       - SET_TTL_MS=-1
146 |       # Optional: node tuning
147 |       # - READ_MAX_FANOUT=1
148 |       # - READ_PER_TRY_MS=100
149 |       # - READ_HEDGE_DELAY_MS=2
150 |       # - READ_HEDGE_INTERVAL_MS=2
151 |       # - WRITE_TIMEOUT_MS=2000
152 |       # - SUSPICION_AFTER_MS=1500
153 |       # - WEIGHT_UPDATE_MS=500
154 |       # - GOSSIP_INTERVAL_MS=300
155 |       # Failure injection
156 |       # - KILL_AFTER=45s
157 |     networks: [mesh]
158 |     restart: "no"
159 | 


--------------------------------------------------------------------------------
/cluster/heat.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"hash/maphash"
  5 | 	"sync"
  6 | 	"sync/atomic"
  7 | )
  8 | 
  9 | type cmSketch struct {
 10 | 	rows  [][]uint32
 11 | 	seeds []maphash.Seed
 12 | 	width int
 13 | }
 14 | 
 15 | // newCMS constructs a Count-Min sketch with the given rows and width.
 16 | // Collisions are acceptable. It serves as a lightweight frequency estimator.
 17 | func newCMS(rows, width int) *cmSketch {
 18 | 	s := &cmSketch{
 19 | 		rows:  make([][]uint32, rows),
 20 | 		seeds: make([]maphash.Seed, rows),
 21 | 		width: width,
 22 | 	}
 23 | 
 24 | 	for i := 0; i < rows; i++ {
 25 | 		s.rows[i] = make([]uint32, width)
 26 | 		s.seeds[i] = maphash.MakeSeed()
 27 | 	}
 28 | 	return s
 29 | }
 30 | 
 31 | // add increments counters for the given key across all rows.
 32 | func (c *cmSketch) add(key []byte, n uint32) {
 33 | 	for i := range c.rows {
 34 | 		var h maphash.Hash
 35 | 		h.SetSeed(c.seeds[i])
 36 | 		h.Write(key)
 37 | 		idx := h.Sum64() % uint64(c.width)
 38 | 		c.rows[i][idx] += n
 39 | 	}
 40 | }
 41 | 
 42 | type ssEntry struct {
 43 | 	K string
 44 | 	C uint64
 45 | }
 46 | 
 47 | type spaceSaving struct {
 48 | 	mu  sync.Mutex
 49 | 	cap int            // capacity k
 50 | 	h   []*ssEntry     // min-heap by C
 51 | 	idx map[string]int // key -> index in heap
 52 | }
 53 | 
 54 | // newSpaceSaving builds a Space-Saving top-k structure storing at most k keys.
 55 | func newSpaceSaving(k int) *spaceSaving {
 56 | 	if k < 1 {
 57 | 		k = 1
 58 | 	}
 59 | 	return &spaceSaving{
 60 | 		cap: k,
 61 | 		idx: make(map[string]int, k),
 62 | 	}
 63 | }
 64 | 
 65 | // add updates the estimated frequency of key using Space-Saving rules. When
 66 | // full, it replaces the current minimum counter with the new key.
 67 | func (s *spaceSaving) add(k []byte, inc uint64) {
 68 | 	key := string(k)
 69 | 	s.mu.Lock()
 70 | 	defer s.mu.Unlock()
 71 | 
 72 | 	// existing key: increment and fix heap.
 73 | 	if i, ok := s.idx[key]; ok {
 74 | 		e := s.h[i]
 75 | 		e.C = addSat64(e.C, inc)
 76 | 		s.siftDown(i) // count increased; min-heap needs siftDown
 77 | 		return
 78 | 	}
 79 | 
 80 | 	// room available: insert new node.
 81 | 	if len(s.h) < s.cap {
 82 | 		e := &ssEntry{K: key, C: inc}
 83 | 		s.h = append(s.h, e)
 84 | 		i := len(s.h) - 1
 85 | 		s.idx[key] = i
 86 | 		s.siftUp(i)
 87 | 		return
 88 | 	}
 89 | 
 90 | 	// full: replace current min with (key, minC + inc). Reuse the min node (no alloc).
 91 | 	minIdx := 0
 92 | 	minNode := s.h[minIdx]
 93 | 	oldKey := minNode.K
 94 | 	delete(s.idx, oldKey)
 95 | 
 96 | 	minNode.K = key
 97 | 	minNode.C = addSat64(minNode.C, inc) // space-Saving rule: newC = minC + inc
 98 | 	s.idx[key] = minIdx
 99 | 	s.siftDown(minIdx)
100 | }
101 | 
102 | // export returns the current top-k keys and approximate counts.
103 | func (s *spaceSaving) export() []HotKey {
104 | 	s.mu.Lock()
105 | 	defer s.mu.Unlock()
106 | 	out := make([]HotKey, 0, len(s.h))
107 | 	for _, e := range s.h {
108 | 		out = append(out, HotKey{K: []byte(e.K), C: e.C})
109 | 	}
110 | 	return out
111 | }
112 | 
113 | // siftUp restores the min-heap property by moving the node at i up toward the
114 | // root while its count is less than its parent.
115 | func (s *spaceSaving) siftUp(i int) {
116 | 	for i > 0 {
117 | 		p := (i - 1) / 2
118 | 		if s.h[p].C <= s.h[i].C {
119 | 			break
120 | 		}
121 | 		s.swap(i, p)
122 | 		i = p
123 | 	}
124 | }
125 | 
126 | // siftDown restores the min-heap property by moving the node at i down to the
127 | // smallest child while its count is greater than that child.
128 | func (s *spaceSaving) siftDown(i int) {
129 | 	n := len(s.h)
130 | 	for {
131 | 		l := 2*i + 1
132 | 		if l >= n {
133 | 			return
134 | 		}
135 | 
136 | 		small := l
137 | 		r := l + 1
138 | 		if r < n && s.h[r].C < s.h[l].C {
139 | 			small = r
140 | 		}
141 | 
142 | 		if s.h[i].C <= s.h[small].C {
143 | 			return
144 | 		}
145 | 		s.swap(i, small)
146 | 		i = small
147 | 	}
148 | }
149 | 
150 | // swap exchanges two heap nodes and updates their indices in the map.
151 | func (s *spaceSaving) swap(i, j int) {
152 | 	s.h[i], s.h[j] = s.h[j], s.h[i]
153 | 	s.idx[s.h[i].K] = i
154 | 	s.idx[s.h[j].K] = j
155 | }
156 | 
157 | // addSat64 adds two uint64 numbers with saturation at max uint64 on overflow.
158 | func addSat64(a, b uint64) uint64 {
159 | 	c := a + b
160 | 	if c < a {
161 | 		return ^uint64(0)
162 | 	}
163 | 	return c
164 | }
165 | 
166 | type heat struct {
167 | 	cms     *cmSketch
168 | 	ss      *spaceSaving
169 | 	sampleN uint32
170 | 	ctr     uint32
171 | }
172 | 
173 | // newHeat ties CM-sketch and Space-Saving together and supports sampling to
174 | // reduce overhead under very high request rates.
175 | func newHeat(rows, width, k, sampleN int) *heat {
176 | 	return &heat{
177 | 		cms:     newCMS(rows, width),
178 | 		ss:      newSpaceSaving(k),
179 | 		sampleN: uint32(sampleN),
180 | 	}
181 | }
182 | 
183 | // sample records a key access at a reduced rate (1/sampleN). When sampleN=1,
184 | // every key is recorded.
185 | func (h *heat) sample(key []byte) {
186 | 	if h.sampleN <= 1 {
187 | 		h.cms.add(key, 1)
188 | 		h.ss.add(key, 1)
189 | 		return
190 | 	}
191 | 	if atomic.AddUint32(&h.ctr, 1)%h.sampleN == 0 {
192 | 		h.cms.add(key, 1)
193 | 		h.ss.add(key, 1)
194 | 	}
195 | }
196 | 
197 | func (h *heat) exportTopK() []HotKey {
198 | 	return h.ss.export()
199 | }
200 | 


--------------------------------------------------------------------------------
/cluster/bf_rpc.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"sort"
  7 | 	"time"
  8 | )
  9 | 
 10 | func absExpiryAt(base time.Time, ttl time.Duration) int64 {
 11 | 	if ttl <= 0 {
 12 | 		return 0
 13 | 	}
 14 | 	return base.Add(ttl).UnixNano()
 15 | }
 16 | 
 17 | // rpcBackfillDigest builds digests for the requested prefix depth considering
 18 | // only keys that the target node should own (according to this donor's ring).
 19 | // It returns per-bucket counts and XOR(hash^version) so the joiner can detect
 20 | // which buckets differ and page only those keys.
 21 | func (n *Node[K, V]) rpcBackfillDigest(req MsgBackfillDigestReq) MsgBackfillDigestResp {
 22 | 	depth := int(req.Depth)
 23 | 	if depth <= 0 || depth > 8 {
 24 | 		depth = 2
 25 | 	}
 26 | 
 27 | 	r := n.ring.Load().(*ring)
 28 | 
 29 | 	targetID := NodeID(req.TargetID)
 30 | 	if !r.hasID(targetID) {
 31 | 		return MsgBackfillDigestResp{
 32 | 			Base:      Base{T: MTBackfillDigestResp, ID: req.ID},
 33 | 			Depth:     uint8(depth),
 34 | 			NotInRing: true,
 35 | 		}
 36 | 	}
 37 | 
 38 | 	type agg struct {
 39 | 		c uint32
 40 | 		h uint64
 41 | 	}
 42 | 
 43 | 	// Aggregate per-bucket count and XOR(hash^version) to detect
 44 | 	// differences between donor and joiner without shipping all keys.
 45 | 	buckets := make(map[string]agg, 1<<12)
 46 | 
 47 | 	keys := n.local.Keys()
 48 | 	for _, k := range keys {
 49 | 		h64 := n.hash64Of(k)
 50 | 		if !r.ownsHash(targetID, h64) {
 51 | 			continue
 52 | 		}
 53 | 
 54 | 		var ver uint64
 55 | 		if n.cfg.LWWEnabled {
 56 | 			kb := n.kc.EncodeKey(k)
 57 | 			n.verMu.RLock()
 58 | 			ver = n.version[string(kb)]
 59 | 			n.verMu.RUnlock()
 60 | 		}
 61 | 
 62 | 		var hb [8]byte
 63 | 		binary.BigEndian.PutUint64(hb[:], h64)
 64 | 		prefix := string(hb[:depth])
 65 | 
 66 | 		a := buckets[prefix]
 67 | 		a.c++
 68 | 		a.h ^= (h64 ^ ver)
 69 | 		buckets[prefix] = a
 70 | 	}
 71 | 
 72 | 	out := make([]BucketDigest, 0, len(buckets))
 73 | 	for p, a := range buckets {
 74 | 		out = append(out, BucketDigest{Prefix: []byte(p), Count: a.c, Hash64: a.h})
 75 | 	}
 76 | 	sort.Slice(out, func(i, j int) bool { return bytes.Compare(out[i].Prefix, out[j].Prefix) < 0 })
 77 | 	return MsgBackfillDigestResp{
 78 | 		Base:    Base{T: MTBackfillDigestResp, ID: req.ID},
 79 | 		Depth:   uint8(depth),
 80 | 		Buckets: out,
 81 | 	}
 82 | }
 83 | 
 84 | // rpcBackfillKeys returns the next page of keys within a given hash-prefix
 85 | // bucket that the target should own, ordered by 64-bit key hash. Pagination is
 86 | // driven by the last 8-byte hash cursor provided by the caller. Values may be
 87 | // compressed, and expirations are converted to absolute nanoseconds.
 88 | func (n *Node[K, V]) rpcBackfillKeys(req MsgBackfillKeysReq) MsgBackfillKeysResp {
 89 | 	prefix := req.Prefix
 90 | 	depth := len(prefix)
 91 | 	if depth <= 0 || depth > 8 {
 92 | 		return MsgBackfillKeysResp{Base: Base{T: MTBackfillKeysResp, ID: req.ID}, Done: true}
 93 | 	}
 94 | 
 95 | 	r := n.ring.Load().(*ring)
 96 | 
 97 | 	targetID := NodeID(req.TargetID)
 98 | 	if !r.hasID(targetID) {
 99 | 		return MsgBackfillKeysResp{
100 | 			Base:      Base{T: MTBackfillKeysResp, ID: req.ID},
101 | 			Done:      true,
102 | 			NotInRing: true,
103 | 		}
104 | 	}
105 | 
106 | 	limit := req.Limit
107 | 	if limit <= 0 || limit > 4096 {
108 | 		limit = 1024
109 | 	}
110 | 
111 | 	// decode cursor (last key-hash). The donor walks keys by hash order
112 | 	// inside a bucket to provide consistent pagination.
113 | 	var after uint64
114 | 	if len(req.Cursor) == 8 {
115 | 		after = binary.BigEndian.Uint64(req.Cursor)
116 | 	}
117 | 
118 | 	type row struct {
119 | 		h  uint64
120 | 		k  K
121 | 		kb []byte
122 | 	}
123 | 	rows := make([]row, 0, limit*2)
124 | 
125 | 	keys := n.local.Keys()
126 | 	for _, k := range keys {
127 | 		h64 := n.hash64Of(k)
128 | 
129 | 		var hb [8]byte
130 | 		binary.BigEndian.PutUint64(hb[:], h64)
131 | 		if !bytes.Equal(hb[:depth], prefix) {
132 | 			continue
133 | 		}
134 | 
135 | 		if !r.ownsHash(targetID, h64) || h64 <= after {
136 | 			continue
137 | 		}
138 | 		rows = append(rows, row{h: h64, k: k, kb: n.kc.EncodeKey(k)})
139 | 	}
140 | 
141 | 	// sort by key-hash to respect the cursor pagination.
142 | 	sort.Slice(rows, func(i, j int) bool { return rows[i].h < rows[j].h })
143 | 	if len(rows) > limit {
144 | 		rows = rows[:limit]
145 | 	}
146 | 
147 | 	items := make([]KV, 0, len(rows))
148 | 	now := time.Now()
149 | 	for _, r := range rows {
150 | 		v, ttl, ok := n.local.GetWithTTL(r.k)
151 | 		if !ok {
152 | 			continue
153 | 		}
154 | 
155 | 		bv, _ := n.codec.Encode(v)
156 | 		b2, cp := n.maybeCompress(bv)
157 | 
158 | 		var ver uint64
159 | 		if n.cfg.LWWEnabled {
160 | 			n.verMu.RLock()
161 | 			ver = n.version[string(r.kb)]
162 | 			n.verMu.RUnlock()
163 | 		}
164 | 
165 | 		// 0 means no expiration.
166 | 		abs := absExpiryAt(now, ttl)
167 | 		items = append(items, KV{
168 | 			K:   append([]byte(nil), r.kb...),
169 | 			V:   append([]byte(nil), b2...),
170 | 			E:   abs,
171 | 			Ver: ver,
172 | 			Cp:  cp,
173 | 		})
174 | 	}
175 | 
176 | 	resp := MsgBackfillKeysResp{
177 | 		Base:  Base{T: MTBackfillKeysResp, ID: req.ID},
178 | 		Items: items,
179 | 		Done:  len(items) == 0,
180 | 	}
181 | 	if len(rows) > 0 {
182 | 		var next [8]byte
183 | 		binary.BigEndian.PutUint64(next[:], rows[len(rows)-1].h)
184 | 		resp.NextCursor = append([]byte(nil), next[:]...)
185 | 	}
186 | 	return resp
187 | }
188 | 


--------------------------------------------------------------------------------
/cluster/rendezvous.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"math/bits"
  5 | 	"sort"
  6 | 	"sync/atomic"
  7 | 
  8 | 	"github.com/cespare/xxhash/v2"
  9 | )
 10 | 
 11 | type nodeMeta struct {
 12 | 	ID     NodeID
 13 | 	Addr   string
 14 | 	weight uint64 // scaled 0..1_000_000
 15 | 	salt   uint64 // per-node salt (pre-hashed ID)
 16 | }
 17 | 
 18 | // newMeta initializes per-node rendezvous metadata with a default weight and
 19 | // a precomputed salt derived from the node ID.
 20 | func newMeta(id NodeID, addr string) *nodeMeta {
 21 | 	return &nodeMeta{
 22 | 		ID: id, Addr: addr,
 23 | 		weight: 500_000,
 24 | 		salt:   xxhash.Sum64String(string(id)),
 25 | 	}
 26 | }
 27 | 
 28 | // Weight returns the current scaled weight as a [0,1] float.
 29 | func (n *nodeMeta) Weight() float64 {
 30 | 	return float64(atomic.LoadUint64(&n.weight)) / 1_000_000.0
 31 | }
 32 | 
 33 | type ring struct {
 34 | 	nodes []*nodeMeta
 35 | 	rf    int
 36 | }
 37 | 
 38 | func newRing(rf int) *ring { return &ring{rf: rf} }
 39 | 
 40 | // ownersFromKeyHash returns the top rf owners for a 64-bit key hash using
 41 | // weighted rendezvous hashing. Node salt keeps per-node independence.
 42 | func (r *ring) ownersFromKeyHash(keyHash uint64) []*nodeMeta {
 43 | 	type pair struct {
 44 | 		s uint64 // rendezvous score
 45 | 		w uint64 // scaled weight (0..1_000_000)
 46 | 		n *nodeMeta
 47 | 	}
 48 | 	arr := make([]pair, 0, len(r.nodes))
 49 | 	for _, nm := range r.nodes {
 50 | 		arr = append(arr, pair{
 51 | 			s: mix64(keyHash ^ nm.salt),
 52 | 			w: atomic.LoadUint64(&nm.weight), // snapshot once
 53 | 			n: nm,
 54 | 		})
 55 | 	}
 56 | 
 57 | 	less := func(i, j int) bool {
 58 | 		hi1, lo1 := bits.Mul64(arr[i].s, arr[i].w)
 59 | 		hi2, lo2 := bits.Mul64(arr[j].s, arr[j].w)
 60 | 		if hi1 != hi2 {
 61 | 			return hi1 > hi2 // higher product first
 62 | 		}
 63 | 		if lo1 != lo2 {
 64 | 			return lo1 > lo2
 65 | 		}
 66 | 		return arr[i].n.ID < arr[j].n.ID // tie-break
 67 | 	}
 68 | 	sort.Slice(arr, less)
 69 | 
 70 | 	n := r.rf
 71 | 	if n > len(arr) {
 72 | 		n = len(arr)
 73 | 	}
 74 | 	out := make([]*nodeMeta, n)
 75 | 	for i := 0; i < n; i++ {
 76 | 		out[i] = arr[i].n
 77 | 	}
 78 | 	return out
 79 | }
 80 | 
 81 | // ownersTopNFromKeyHash returns the top N candidates by weighted rendezvous
 82 | // score. Used for hot-key shadowing beyond rf.
 83 | func (r *ring) ownersTopNFromKeyHash(keyHash uint64, n int) []*nodeMeta {
 84 | 	// variant that returns the top-N candidates for hot-key shadowing.
 85 | 	// Use the same integer 128-bit ranking as ownersFromKeyHash for
 86 | 	// consistent ordering and tie-breaking.
 87 | 	type pair struct {
 88 | 		s uint64 // rendezvous score
 89 | 		w uint64 // scaled weight (0..1_000_000)
 90 | 		n *nodeMeta
 91 | 	}
 92 | 
 93 | 	arr := make([]pair, 0, len(r.nodes))
 94 | 	for _, nm := range r.nodes {
 95 | 		arr = append(arr, pair{
 96 | 			s: mix64(keyHash ^ nm.salt),
 97 | 			w: atomic.LoadUint64(&nm.weight),
 98 | 			n: nm,
 99 | 		})
100 | 	}
101 | 
102 | 	less := func(i, j int) bool {
103 | 		hi1, lo1 := bits.Mul64(arr[i].s, arr[i].w)
104 | 		hi2, lo2 := bits.Mul64(arr[j].s, arr[j].w)
105 | 		if hi1 != hi2 {
106 | 			return hi1 > hi2
107 | 		}
108 | 		if lo1 != lo2 {
109 | 			return lo1 > lo2
110 | 		}
111 | 		return arr[i].n.ID < arr[j].n.ID
112 | 	}
113 | 	sort.Slice(arr, less)
114 | 
115 | 	if n > len(arr) {
116 | 		n = len(arr)
117 | 	}
118 | 
119 | 	out := make([]*nodeMeta, n)
120 | 	for i := 0; i < n; i++ {
121 | 		out[i] = arr[i].n
122 | 	}
123 | 	return out
124 | }
125 | 
126 | // ownsHash reports whether selfID is among the top-rf owners for keyHash.
127 | func (r *ring) ownsHash(selfID NodeID, keyHash uint64) bool {
128 | 	if len(r.nodes) == 0 || r.rf <= 0 {
129 | 		return false
130 | 	}
131 | 	top := r.rf
132 | 	if top > len(r.nodes) {
133 | 		top = len(r.nodes)
134 | 	}
135 | 
136 | 	type slot struct {
137 | 		hi, lo uint64 // 128-bit product of (score * weight)
138 | 		n      *nodeMeta
139 | 	}
140 | 	best := make([]slot, 0, top)
141 | 	worst := 0
142 | 
143 | 	worse := func(a slot, b slot) bool {
144 | 		if a.hi != b.hi {
145 | 			return a.hi < b.hi
146 | 		}
147 | 		if a.lo != b.lo {
148 | 			return a.lo < b.lo
149 | 		}
150 | 		return a.n.ID > b.n.ID
151 | 	}
152 | 
153 | 	for _, nm := range r.nodes {
154 | 		s := mix64(keyHash ^ nm.salt)
155 | 		w := atomic.LoadUint64(&nm.weight)
156 | 		hi, lo := bits.Mul64(s, w)
157 | 
158 | 		if len(best) < top {
159 | 			best = append(best, slot{hi: hi, lo: lo, n: nm})
160 | 			if len(best) == 1 || worse(best[len(best)-1], best[worst]) {
161 | 				worst = len(best) - 1
162 | 			}
163 | 			continue
164 | 		}
165 | 
166 | 		if !worse(slot{hi, lo, nm}, best[worst]) {
167 | 			best[worst] = slot{hi: hi, lo: lo, n: nm}
168 | 			// recompute worst
169 | 			worst = 0
170 | 			for i := 1; i < len(best); i++ {
171 | 				if worse(best[i], best[worst]) {
172 | 					worst = i
173 | 				}
174 | 			}
175 | 		}
176 | 	}
177 | 
178 | 	for _, sl := range best {
179 | 		if sl.n.ID == selfID {
180 | 			return true
181 | 		}
182 | 	}
183 | 	return false
184 | }
185 | 
186 | // mix64: fast 64-bit mixer (SplitMix64 finalizer).
187 | func mix64(x uint64) uint64 {
188 | 	x ^= x >> 30
189 | 	x *= 0xbf58476d1ce4e5b9
190 | 	x ^= x >> 27
191 | 	x *= 0x94d049bb133111eb
192 | 	x ^= x >> 31
193 | 	return x
194 | }
195 | 
196 | // hasID returns true when the ID participates in this ring view.
197 | func (r *ring) hasID(id NodeID) bool {
198 | 	for _, nm := range r.nodes {
199 | 		if nm.ID == id {
200 | 			return true
201 | 		}
202 | 	}
203 | 	return false
204 | }
205 | 


--------------------------------------------------------------------------------
/internal/httpcache/index.go:
--------------------------------------------------------------------------------
  1 | package httpcache
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 	"sync"
  6 | )
  7 | 
  8 | const (
  9 | 	rootPath      = "/"
 10 | 	pathSeparator = "/"
 11 | 	wildcardChar  = "*"
 12 | )
 13 | 
 14 | // PatternNode represents a single node in the path tree.
 15 | type PatternNode struct {
 16 | 	children map[string]*PatternNode
 17 | 	keys     map[string]bool
 18 | }
 19 | 
 20 | // PatternIndex maintains a tree structure that maps URL paths to cache keys.
 21 | type PatternIndex struct {
 22 | 	mu   sync.RWMutex
 23 | 	root *PatternNode
 24 | }
 25 | 
 26 | func NewPatternIndex() *PatternIndex {
 27 | 	return &PatternIndex{root: newPatternNode()}
 28 | }
 29 | 
 30 | func newPatternNode() *PatternNode {
 31 | 	return &PatternNode{
 32 | 		children: make(map[string]*PatternNode),
 33 | 		keys:     make(map[string]bool),
 34 | 	}
 35 | }
 36 | 
 37 | // DefaultPathExtractor returns an empty string; override to map keys to paths.
 38 | func DefaultPathExtractor(key string) string { return "" }
 39 | 
 40 | // NormalizePath converts a URL path into a normalized slice of path segments
 41 | // Empty path handling:
 42 | // - Empty string converts to root path ("/")
 43 | // - Ensures all paths have a canonical representation
 44 | //
 45 | // Path cleaning process:
 46 | // 1. Trim leading and trailing slashes to remove "/path/" -> "path"
 47 | // 2. After trimming, empty result indicates root path (returns empty slice)
 48 | // 3. Split remaining path by separator into individual segments
 49 | // 4. Filter out empty segments caused by double slashes ("//") or malformed paths
 50 | //
 51 | // Examples:
 52 | // - "" -> []
 53 | // - "/" -> []
 54 | // - "/api/v1/" -> ["api", "v1"]
 55 | // - "//api//v1//" -> ["api", "v1"]
 56 | // - "api/v1" -> ["api", "v1"]
 57 | //
 58 | // This normalization ensures that equivalent paths (with different slash patterns)
 59 | // map to the same tree location, preventing duplicate entries
 60 | func normalizePath(path string) []string {
 61 | 	if path == "" {
 62 | 		path = rootPath
 63 | 	}
 64 | 
 65 | 	trimmed := strings.Trim(path, pathSeparator)
 66 | 	if trimmed == "" {
 67 | 		return []string{}
 68 | 	}
 69 | 
 70 | 	segments := strings.Split(trimmed, pathSeparator)
 71 | 	result := make([]string, 0, len(segments))
 72 | 	for _, seg := range segments {
 73 | 		if seg != "" {
 74 | 			result = append(result, seg)
 75 | 		}
 76 | 	}
 77 | 	return result
 78 | }
 79 | 
 80 | // AddKey associates a cache key with a specific path in the trie.
 81 | func (pi *PatternIndex) AddKey(path, key string) {
 82 | 	pi.mu.Lock()
 83 | 	defer pi.mu.Unlock()
 84 | 
 85 | 	node := pi.root
 86 | 	segments := normalizePath(path)
 87 | 	for _, s := range segments {
 88 | 		if node.children[s] == nil {
 89 | 			node.children[s] = newPatternNode()
 90 | 		}
 91 | 		node = node.children[s]
 92 | 	}
 93 | 	node.keys[key] = true
 94 | }
 95 | 
 96 | // RemoveKey removes a cache key from the specified path.
 97 | func (pi *PatternIndex) RemoveKey(path, key string) {
 98 | 	pi.mu.Lock()
 99 | 	defer pi.mu.Unlock()
100 | 
101 | 	segments := normalizePath(path)
102 | 	node := pi.findNode(segments)
103 | 	if node != nil {
104 | 		delete(node.keys, key)
105 | 	}
106 | }
107 | 
108 | // GetMatchingKeys returns all cache keys that match the given pattern
109 | //
110 | // Exact path matching:
111 | // - Pattern without '*' suffix matches only keys stored at that exact path
112 | // - Uses findNode() to locate the specific tree node
113 | // - Collects only keys stored directly at the target node
114 | // - Example: "/api/users" matches keys at exactly "/api/users"
115 | //
116 | // Wildcard pattern matching:
117 | // - Pattern ending with '*' enables prefix-based subtree matching
118 | // - Strips the '*' suffix and finds the base path node
119 | // - Recursively collects keys from the base node and all descendant nodes
120 | // - Example: "/api/*" matches keys at "/api", "/api/users", "/api/users/123", etc.
121 | func (pi *PatternIndex) GetMatchingKeys(pattern string) []string {
122 | 	pi.mu.RLock()
123 | 	defer pi.mu.RUnlock()
124 | 
125 | 	if pattern == "" {
126 | 		pattern = rootPath
127 | 	}
128 | 
129 | 	isWildcard := strings.HasSuffix(pattern, wildcardChar)
130 | 	if isWildcard {
131 | 		pattern = strings.TrimSuffix(pattern, wildcardChar)
132 | 	}
133 | 
134 | 	segments := normalizePath(pattern)
135 | 	node := pi.findNode(segments)
136 | 	if node == nil {
137 | 		return nil
138 | 	}
139 | 	if isWildcard {
140 | 		return pi.collectAllKeys(node)
141 | 	}
142 | 	return pi.collectDirectKeys(node)
143 | }
144 | 
145 | func (pi *PatternIndex) findNode(segments []string) *PatternNode {
146 | 	node := pi.root
147 | 	for _, s := range segments {
148 | 		next := node.children[s]
149 | 		if next == nil {
150 | 			return nil
151 | 		}
152 | 		node = next
153 | 	}
154 | 	return node
155 | }
156 | 
157 | func (pi *PatternIndex) collectDirectKeys(node *PatternNode) []string {
158 | 	if len(node.keys) == 0 {
159 | 		return nil
160 | 	}
161 | 
162 | 	keys := make([]string, 0, len(node.keys))
163 | 	for k := range node.keys {
164 | 		keys = append(keys, k)
165 | 	}
166 | 	return keys
167 | }
168 | 
169 | func (pi *PatternIndex) collectAllKeys(node *PatternNode) []string {
170 | 	var keys []string
171 | 	for k := range node.keys {
172 | 		keys = append(keys, k)
173 | 	}
174 | 	for _, c := range node.children {
175 | 		keys = append(keys, pi.collectAllKeys(c)...)
176 | 	}
177 | 	return keys
178 | }
179 | 
180 | func (pi *PatternIndex) Clear() {
181 | 	pi.mu.Lock()
182 | 	pi.root = newPatternNode()
183 | 	pi.mu.Unlock()
184 | }
185 | 


--------------------------------------------------------------------------------
/cluster/config.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"crypto/tls"
  5 | 	"fmt"
  6 | 	"time"
  7 | 
  8 | 	"github.com/cespare/xxhash/v2"
  9 | )
 10 | 
 11 | type NodeID string
 12 | 
 13 | type TLSMode struct {
 14 | 	Enable                   bool
 15 | 	CertFile                 string
 16 | 	KeyFile                  string
 17 | 	CAFile                   string
 18 | 	RequireClientCert        bool
 19 | 	MinVersion               uint16
 20 | 	PreferServerCipherSuites bool
 21 | 	CipherSuites             []uint16
 22 | 	CurvePreferences         []tls.CurveID
 23 | }
 24 | 
 25 | type Security struct {
 26 | 	AuthToken                   string
 27 | 	TLS                         TLSMode
 28 | 	MaxFrameSize                int
 29 | 	MaxKeySize                  int
 30 | 	MaxValueSize                int
 31 | 	ReadTimeout                 time.Duration
 32 | 	WriteTimeout                time.Duration
 33 | 	IdleTimeout                 time.Duration
 34 | 	MaxInflightPerPeer          int
 35 | 	CompressionThreshold        int
 36 | 	LeaseLoadQPS                int
 37 | 	ReadBufSize                 int
 38 | 	WriteBufSize                int
 39 | 	AllowUnauthenticatedClients bool
 40 | 	// MaxConcurrentHandshakes caps simultaneous TLS handshakes.
 41 | 	//  > 0 : fixed cap
 42 | 	//  = 0 : auto => max(64, 32*GOMAXPROCS)
 43 | 	//  < 0 : disabled (no gating)
 44 | 	MaxConcurrentHandshakes int
 45 | }
 46 | 
 47 | type DropPolicy uint8
 48 | 
 49 | const (
 50 | 	DropOldest DropPolicy = iota
 51 | 	DropNewest
 52 | 	DropNone
 53 | )
 54 | 
 55 | type HandoffConfig struct {
 56 | 	Enable         *bool
 57 | 	Pause          bool
 58 | 	MaxItems       int
 59 | 	MaxBytes       int64
 60 | 	PerPeerCap     int
 61 | 	PerPeerBytes   int64
 62 | 	TTL            time.Duration
 63 | 	ReplayRPS      int
 64 | 	DropPolicy     DropPolicy
 65 | 	AutopauseItems int
 66 | 	AutopauseBytes int64
 67 | }
 68 | 
 69 | func (h *HandoffConfig) IsEnabled() bool {
 70 | 	return h.Enable == nil || *h.Enable
 71 | }
 72 | 
 73 | func (h *HandoffConfig) FillDefaults() {
 74 | 	if h.Enable == nil {
 75 | 		b := true
 76 | 		h.Enable = &b
 77 | 	}
 78 | 	if !*h.Enable {
 79 | 		return
 80 | 	}
 81 | 	if h.DropPolicy == 0 {
 82 | 		h.DropPolicy = DropOldest
 83 | 	}
 84 | 	if h.MaxItems == 0 {
 85 | 		h.MaxItems = 500_000
 86 | 	}
 87 | 	if h.MaxBytes == 0 {
 88 | 		h.MaxBytes = 2 << 30 // ~2 GiB
 89 | 	}
 90 | 	if h.PerPeerCap == 0 {
 91 | 		h.PerPeerCap = 50_000
 92 | 	}
 93 | 	if h.PerPeerBytes == 0 {
 94 | 		h.PerPeerBytes = 512 << 20 // ~512 MiB
 95 | 	}
 96 | 	if h.TTL <= 0 {
 97 | 		h.TTL = 10 * time.Minute
 98 | 	}
 99 | 	if h.ReplayRPS <= 0 {
100 | 		h.ReplayRPS = 20_000
101 | 	}
102 | 	if h.AutopauseItems == 0 && h.MaxItems > 0 {
103 | 		h.AutopauseItems = h.MaxItems * 9 / 10
104 | 	}
105 | 	if h.AutopauseBytes == 0 && h.MaxBytes > 0 {
106 | 		h.AutopauseBytes = int64(h.MaxBytes * 9 / 10)
107 | 	}
108 | }
109 | 
110 | func BoolPtr(b bool) *bool { return &b }
111 | 
112 | type Config struct {
113 | 	ID                NodeID
114 | 	BindAddr          string
115 | 	PublicURL         string
116 | 	Seeds             []string
117 | 	ReplicationFactor int
118 | 	WriteConcern      int
119 | 	// Client read tuning
120 | 	ReadMaxFanout     int
121 | 	ReadHedgeDelay    time.Duration
122 | 	ReadHedgeInterval time.Duration
123 | 	ReadPerTryTimeout time.Duration
124 | 	GossipInterval    time.Duration
125 | 	SuspicionAfter    time.Duration
126 | 	TombstoneAfter    time.Duration
127 | 	WeightUpdate      time.Duration
128 | 	HotsetPeriod      time.Duration
129 | 	HotsetSize        int
130 | 	MirrorTTL         time.Duration
131 | 	LeaseTTL          time.Duration
132 | 	RebalanceInterval time.Duration
133 | 	BackfillInterval  time.Duration
134 | 	RebalanceLimit    int
135 | 	Sec               Security
136 | 	LWWEnabled        bool
137 | 	PerConnWorkers    int
138 | 	PerConnQueue      int
139 | 
140 | 	Handoff HandoffConfig
141 | }
142 | 
143 | func Default() Config {
144 | 	return Config{
145 | 		ReplicationFactor: 2,
146 | 		WriteConcern:      1,
147 | 		ReadMaxFanout:     2,
148 | 		ReadHedgeDelay:    3 * time.Millisecond,
149 | 		ReadHedgeInterval: 3 * time.Millisecond,
150 | 		ReadPerTryTimeout: 200 * time.Millisecond,
151 | 		GossipInterval:    500 * time.Millisecond,
152 | 		SuspicionAfter:    2 * time.Second,
153 | 		TombstoneAfter:    30 * time.Second,
154 | 		WeightUpdate:      1 * time.Second,
155 | 		HotsetPeriod:      2 * time.Second,
156 | 		HotsetSize:        1024,
157 | 		MirrorTTL:         30 * time.Second,
158 | 		LeaseTTL:          300 * time.Millisecond,
159 | 		RebalanceInterval: 2 * time.Second,
160 | 		BackfillInterval:  30 * time.Second,
161 | 		RebalanceLimit:    500,
162 | 		Sec: Security{
163 | 			MaxFrameSize:         4 << 20,
164 | 			MaxKeySize:           128 << 10,
165 | 			MaxValueSize:         2 << 20,
166 | 			ReadTimeout:          3 * time.Second,
167 | 			WriteTimeout:         3 * time.Second,
168 | 			IdleTimeout:          10 * time.Second,
169 | 			MaxInflightPerPeer:   256,
170 | 			CompressionThreshold: 64 << 10,
171 | 			LeaseLoadQPS:         0,
172 | 			ReadBufSize:          32 << 10,
173 | 			WriteBufSize:         32 << 10,
174 | 			TLS: TLSMode{
175 | 				PreferServerCipherSuites: true,
176 | 			},
177 | 			AllowUnauthenticatedClients: true,
178 | 			MaxConcurrentHandshakes:     0,
179 | 		},
180 | 		PerConnWorkers: 64,
181 | 		PerConnQueue:   128,
182 | 		LWWEnabled:     true,
183 | 
184 | 		Handoff: HandoffConfig{
185 | 			Enable:         BoolPtr(true),
186 | 			Pause:          false,
187 | 			MaxItems:       500_000,
188 | 			MaxBytes:       2 << 30,
189 | 			PerPeerCap:     50_000,
190 | 			PerPeerBytes:   512 << 20,
191 | 			TTL:            10 * time.Minute,
192 | 			ReplayRPS:      20_000,
193 | 			DropPolicy:     DropOldest,
194 | 			AutopauseItems: 500_000 * 9 / 10,
195 | 			AutopauseBytes: int64((2 << 30) * 9 / 10),
196 | 		},
197 | 	}
198 | }
199 | 
200 | // EnsureID assigns a stable ID when not provided.
201 | // Default: 16-hex digest of PublicURL.
202 | func (c *Config) EnsureID() {
203 | 	if c.ID != "" {
204 | 		return
205 | 	}
206 | 	sum := xxhash.Sum64String(c.PublicURL)
207 | 	c.ID = NodeID(fmt.Sprintf("%016x", sum))
208 | }
209 | 


--------------------------------------------------------------------------------
/hash.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"fmt"
  6 | 	"math/bits"
  7 | )
  8 | 
  9 | // xxHash64 (seed=0) tuned for cache hot paths:
 10 | // preserves spec mixing, uses FNV-1a for tiny strings, and applies avalanche-only for ints.
 11 | const (
 12 | 	// xxHash64 primes (spec).
 13 | 	prime64_1 = 0x9E3779B185EBCA87
 14 | 	prime64_2 = 0xC2B2AE3D27D4EB4F
 15 | 	prime64_3 = 0x165667B19E3779F9
 16 | 	prime64_4 = 0x85EBCA77C2B2AE63
 17 | 	prime64_5 = 0x27D4EB2F165667C5
 18 | 
 19 | 	// Precomputed seeds for seed=0 (v1 and v4 initial values per spec).
 20 | 	seed64_1 = 0x60EA27EEADC0B5D6 // prime64_1 + prime64_2
 21 | 	seed64_4 = 0x61C8864E7A143579 // -prime64_1 (two's complement)
 22 | 
 23 | 	// Size/rotation params (spec).
 24 | 	largeInputThreshold = 32
 25 | 
 26 | 	roundRotation = 31
 27 | 	mergeRotation = 27
 28 | 	smallRotation = 23
 29 | 	tinyRotation  = 11
 30 | 
 31 | 	// Avalanche xor-shifts (order matters).
 32 | 	avalancheShift1 = 33
 33 | 	avalancheShift2 = 29
 34 | 	avalancheShift3 = 32
 35 | 
 36 | 	// Lane-combine rotations (spec).
 37 | 	v1Rotation = 1
 38 | 	v2Rotation = 7
 39 | 	v3Rotation = 12
 40 | 	v4Rotation = 18
 41 | )
 42 | 
 43 | // Strategy heuristic for string keys: ≤8B → FNV-1a, >8B → xxHash64.
 44 | const (
 45 | 	stringByteLength = 8
 46 | )
 47 | 
 48 | // hasher provides type-specialized hashing without reflection (stateless, goroutine-safe).
 49 | type hasher[K comparable] struct{}
 50 | 
 51 | // newHasher returns a tiny value-type hasher for K (no captured state).
 52 | func newHasher[K comparable]() hasher[K] {
 53 | 	return hasher[K]{}
 54 | }
 55 | 
 56 | // hash routes by key type:
 57 | // ints → avalanche-only, strings → FNV/xxHash by length,
 58 | // others → formatted string then string hashing.
 59 | func (h hasher[K]) hash(key K) uint64 {
 60 | 	switch k := any(key).(type) {
 61 | 	case string:
 62 | 		return h.hashString(k)
 63 | 	case int:
 64 | 		return xxHash64Avalanche(uint64(k))
 65 | 	case int32:
 66 | 		return xxHash64Avalanche(uint64(k))
 67 | 	case int64:
 68 | 		return xxHash64Avalanche(uint64(k))
 69 | 	case uint:
 70 | 		return xxHash64Avalanche(uint64(k))
 71 | 	case uint32:
 72 | 		return xxHash64Avalanche(uint64(k))
 73 | 	case uint64:
 74 | 		return xxHash64Avalanche(k)
 75 | 	default:
 76 | 		// Fallback allocates; prefer native K types to avoid it.
 77 | 		return h.hashString(fmt.Sprintf("%v", k))
 78 | 	}
 79 | }
 80 | 
 81 | // hashString selects FNV-1a for very short strings, xxHash64 otherwise.
 82 | func (h hasher[K]) hashString(s string) uint64 {
 83 | 	if len(s) <= stringByteLength {
 84 | 		return fnvHash64(s)
 85 | 	}
 86 | 	return xxHash64(s)
 87 | }
 88 | 
 89 | // xxHash64 computes xxHash64(seed=0) for a string;
 90 | // ≥32B uses 4-lane path, smaller uses small-input path, then avalanche.
 91 | func xxHash64(input string) uint64 {
 92 | 	data := []byte(input)
 93 | 	length := len(data)
 94 | 
 95 | 	var h64 uint64
 96 | 	if length >= largeInputThreshold {
 97 | 		h64 = xxHash64Large(data, uint64(length))
 98 | 	} else {
 99 | 		h64 = prime64_5 + uint64(length) // small-input init
100 | 		h64 = xxHash64Small(data, h64)
101 | 	}
102 | 
103 | 	return xxHash64Avalanche(h64)
104 | }
105 | 
106 | // xxHash64Large processes 32B blocks with four accumulators, combines lanes, then finalizes the tail.
107 | func xxHash64Large(data []byte, length uint64) uint64 {
108 | 	// Seed accumulators for seed=0.
109 | 	v1 := uint64(seed64_1)
110 | 	v2 := uint64(prime64_2)
111 | 	v3 := uint64(0)
112 | 	v4 := uint64(seed64_4)
113 | 
114 | 	// 32B per iteration (8B per lane).
115 | 	for len(data) >= largeInputThreshold {
116 | 		v1 = xxHash64Round(v1, binary.LittleEndian.Uint64(data[0:8]))
117 | 		v2 = xxHash64Round(v2, binary.LittleEndian.Uint64(data[8:16]))
118 | 		v3 = xxHash64Round(v3, binary.LittleEndian.Uint64(data[16:24]))
119 | 		v4 = xxHash64Round(v4, binary.LittleEndian.Uint64(data[24:32]))
120 | 		data = data[largeInputThreshold:]
121 | 	}
122 | 
123 | 	// Combine lanes with distinct rotations, then merge rounds.
124 | 	h64 := bits.RotateLeft64(v1, v1Rotation) +
125 | 		bits.RotateLeft64(v2, v2Rotation) +
126 | 		bits.RotateLeft64(v3, v3Rotation) +
127 | 		bits.RotateLeft64(v4, v4Rotation)
128 | 
129 | 	h64 = xxHash64MergeRound(h64, v1)
130 | 	h64 = xxHash64MergeRound(h64, v2)
131 | 	h64 = xxHash64MergeRound(h64, v3)
132 | 	h64 = xxHash64MergeRound(h64, v4)
133 | 
134 | 	h64 += length
135 | 	return xxHash64Finalize(data, h64)
136 | }
137 | 
138 | // xxHash64Small forwards small inputs directly to finalization.
139 | func xxHash64Small(data []byte, h64 uint64) uint64 {
140 | 	return xxHash64Finalize(data, h64)
141 | }
142 | 
143 | // xxHash64Round is one per-lane round: (acc + input*prime2) → rot(31) → *prime1.
144 | func xxHash64Round(acc, input uint64) uint64 {
145 | 	acc += input * prime64_2
146 | 	acc = bits.RotateLeft64(acc, roundRotation)
147 | 	acc *= prime64_1
148 | 	return acc
149 | }
150 | 
151 | // xxHash64MergeRound folds a lane into the main hash during lane combination.
152 | func xxHash64MergeRound(h64, val uint64) uint64 {
153 | 	val = xxHash64Round(0, val)
154 | 	h64 ^= val
155 | 	h64 = h64*prime64_1 + prime64_4
156 | 	return h64
157 | }
158 | 
159 | // xxHash64Finalize folds tail bytes (8B → 4B → 1B) and applies the final avalanche.
160 | func xxHash64Finalize(data []byte, h64 uint64) uint64 {
161 | 	for len(data) >= 8 {
162 | 		k1 := binary.LittleEndian.Uint64(data[0:8])
163 | 		k1 = xxHash64Round(0, k1)
164 | 		h64 ^= k1
165 | 		h64 = bits.RotateLeft64(h64, mergeRotation)*prime64_1 + prime64_4
166 | 		data = data[8:]
167 | 	}
168 | 
169 | 	if len(data) >= 4 {
170 | 		k1 := uint64(binary.LittleEndian.Uint32(data[0:4]))
171 | 		h64 ^= k1 * prime64_1
172 | 		h64 = bits.RotateLeft64(h64, smallRotation)*prime64_2 + prime64_3
173 | 		data = data[4:]
174 | 	}
175 | 
176 | 	// Final 0..3 bytes.
177 | 	for len(data) > 0 {
178 | 		k1 := uint64(data[0])
179 | 		h64 ^= k1 * prime64_5
180 | 		h64 = bits.RotateLeft64(h64, tinyRotation) * prime64_1
181 | 		data = data[1:]
182 | 	}
183 | 
184 | 	return h64
185 | }
186 | 
187 | // xxHash64Avalanche performs the final xor-shift/multiply chain to enforce avalanche behavior.
188 | func xxHash64Avalanche(h64 uint64) uint64 {
189 | 	h64 ^= h64 >> avalancheShift1
190 | 	h64 *= prime64_2
191 | 	h64 ^= h64 >> avalancheShift2
192 | 	h64 *= prime64_3
193 | 	h64 ^= h64 >> avalancheShift3
194 | 	return h64
195 | }
196 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
  4 | 
  5 | ### [0.3.15](https://github.com/unkn0wn-root/kioshun/compare/v0.3.14...v0.3.15) (2025-10-30)
  6 | 
  7 | ### [0.3.14](https://github.com/unkn0wn-root/kioshun/compare/v0.3.13...v0.3.14) (2025-10-29)
  8 | 
  9 | ### [0.3.13](https://github.com/unkn0wn-root/kioshun/compare/v0.3.12...v0.3.13) (2025-10-29)
 10 | 
 11 | 
 12 | ### Bug Fixes
 13 | 
 14 | * expiration race when upgrading shard lock during get ([dcbc8f0](https://github.com/unkn0wn-root/kioshun/commit/dcbc8f08b883f60f6617519db8929991e7d7a42c))
 15 | 
 16 | ### [0.3.12](https://github.com/unkn0wn-root/kioshun/compare/v0.3.11...v0.3.12) (2025-10-29)
 17 | 
 18 | ### [0.3.11](https://github.com/unkn0wn-root/kioshun/compare/v0.3.10...v0.3.11) (2025-09-07)
 19 | 
 20 | 
 21 | ### Features
 22 | 
 23 | * add MaxConcurrentHandshakes and resoect container limits with GOMAXPROCS ([bfbcc78](https://github.com/unkn0wn-root/kioshun/commit/bfbcc7842fc3fd0dcf3c4db654ee6fbadf406f96))
 24 | * **cluster:** use ID instead of PublicURL ([e5e9bb3](https://github.com/unkn0wn-root/kioshun/commit/e5e9bb39b201fc434293800b385dc0a5f04bfc2c))
 25 | 
 26 | 
 27 | ### Bug Fixes
 28 | 
 29 | * tests after publicurl to node id change ([d9e7f83](https://github.com/unkn0wn-root/kioshun/commit/d9e7f837ff264a4db6b32c4caed9bcb3e18e2667))
 30 | 
 31 | ### [0.3.10](https://github.com/unkn0wn-root/kioshun/compare/v0.3.9...v0.3.10) (2025-09-05)
 32 | 
 33 | 
 34 | ### Bug Fixes
 35 | 
 36 | * **node:** make close idempotent ([4c183b5](https://github.com/unkn0wn-root/kioshun/commit/4c183b54db016e836074017f9f8b476594e171d3))
 37 | 
 38 | ### [0.3.9](https://github.com/unkn0wn-root/kioshun/compare/v0.3.8...v0.3.9) (2025-09-05)
 39 | 
 40 | 
 41 | ### Features
 42 | 
 43 | * **admission:** drop atomics since we hold lock in cache ([1667b84](https://github.com/unkn0wn-root/kioshun/commit/1667b8492a7102ff7d434e2032e9963a3b51dc9e))
 44 | * **client:** if local is primary and key exists, serve locally ([6adb9e2](https://github.com/unkn0wn-root/kioshun/commit/6adb9e2f74ddc5de4b2f08056fb2eef8ab2a20ca))
 45 | 
 46 | ### [0.3.8](https://github.com/unkn0wn-root/kioshun/compare/v0.3.7...v0.3.8) (2025-09-05)
 47 | 
 48 | 
 49 | ### Features
 50 | 
 51 | * add NotInRing to indicate that donor is not ready yet ([0fd11f5](https://github.com/unkn0wn-root/kioshun/commit/0fd11f5fd7db04edb9d15782f8063771dd25f0c3))
 52 | 
 53 | ### [0.3.7](https://github.com/unkn0wn-root/kioshun/compare/v0.3.6...v0.3.7) (2025-09-05)
 54 | 
 55 | 
 56 | ### Bug Fixes
 57 | 
 58 | * **node:** add itself to ring ([cc96554](https://github.com/unkn0wn-root/kioshun/commit/cc965545264e6be40e99720a037fd283710344f3))
 59 | 
 60 | ### [0.3.6](https://github.com/unkn0wn-root/kioshun/compare/v0.3.5...v0.3.6) (2025-09-05)
 61 | 
 62 | 
 63 | ### Features
 64 | 
 65 | * micro opt. and fixes ([d0322de](https://github.com/unkn0wn-root/kioshun/commit/d0322de4fa3dc35f3844afa16c6474b7afc65f9f))
 66 | 
 67 | ### [0.3.5](https://github.com/unkn0wn-root/kioshun/compare/v0.3.4...v0.3.5) (2025-09-03)
 68 | 
 69 | ### [0.3.4](https://github.com/unkn0wn-root/kioshun/compare/v0.3.3...v0.3.4) (2025-09-03)
 70 | 
 71 | ### [0.3.3](https://github.com/unkn0wn-root/kioshun/compare/v0.3.2...v0.3.3) (2025-09-03)
 72 | 
 73 | 
 74 | ### Features
 75 | 
 76 | * added new cluster benchmarks - http via wrapper and direct - via inter claster RPC ([287cedd](https://github.com/unkn0wn-root/kioshun/commit/287cedde636154d340a05bfcc6c8c5cec304f854))
 77 | 
 78 | 
 79 | ### Bug Fixes
 80 | 
 81 | * make eviction test more robust after admission change ([8d4a759](https://github.com/unkn0wn-root/kioshun/commit/8d4a75903ebe059b1c78a73f87563bf941362c82))
 82 | 
 83 | ### [0.3.2](https://github.com/unkn0wn-root/kioshun/compare/v0.3.1...v0.3.2) (2025-09-03)
 84 | 
 85 | 
 86 | ### Features
 87 | 
 88 | * add cluster tests ([dd1ab76](https://github.com/unkn0wn-root/kioshun/commit/dd1ab764ef6e8d4f535e325358a2a96ce0ce8775))
 89 | 
 90 | 
 91 | ### Bug Fixes
 92 | 
 93 | * reset peer only on fatal err; fix example api paths ([409fd08](https://github.com/unkn0wn-root/kioshun/commit/409fd08e3c46da6e526572d70963abc09e86f65f))
 94 | 
 95 | ### [0.3.1](https://github.com/unkn0wn-root/kioshun/compare/v0.3.0...v0.3.1) (2025-09-03)
 96 | 
 97 | 
 98 | ### Features
 99 | 
100 | * add clustered api to examples dir ([90047df](https://github.com/unkn0wn-root/kioshun/commit/90047dfe759fa1188da4ba083be2cca9ec7fad4e))
101 | 
102 | ## [0.3.0](https://github.com/unkn0wn-root/kioshun/compare/v0.2.3...v0.3.0) (2025-09-02)
103 | 
104 | 
105 | ### Features
106 | 
107 | * add dockerfile(cmd) and docker-compose (_examples) ([5e777a6](https://github.com/unkn0wn-root/kioshun/commit/5e777a62fd56047dd4aa906d2ceb97d8d79598f5))
108 | * add kioshun adapter ([48a79af](https://github.com/unkn0wn-root/kioshun/commit/48a79afad83b1bc04abc650cfecc052311dcc306))
109 | * add kioshun node starter to cmd ([c6df4e9](https://github.com/unkn0wn-root/kioshun/commit/c6df4e9c3db3f29db28fb5687abc3feb0820acc6))
110 | * add snapshot bridge between kioshun and cluster ([a8cd204](https://github.com/unkn0wn-root/kioshun/commit/a8cd20442b85eb505896bc21fe7f7cf000f141f5))
111 | * add timeout-only exponential backoff ([ca3fbc1](https://github.com/unkn0wn-root/kioshun/commit/ca3fbc130643b73b9f7105b764ad1eca28d3047c))
112 | * alias distributed cache as client and accept context ([888f5cb](https://github.com/unkn0wn-root/kioshun/commit/888f5cb4c9381855e6f44848d0edc4dd8c39793a))
113 | * get from other replicas and add panelize node ([7f4803a](https://github.com/unkn0wn-root/kioshun/commit/7f4803ad1f69210773b0695d0ce6485c78d2c807))
114 | * move trie and util to internal and fix adapter type ([7ff9dac](https://github.com/unkn0wn-root/kioshun/commit/7ff9dac93ee5b71c991e6d56708d257197bdc7be))
115 | * reset peer on peer closed ([b17ead0](https://github.com/unkn0wn-root/kioshun/commit/b17ead0f492d7b47e532a0b0e306361e40064b77))
116 | 
117 | 
118 | ### Bug Fixes
119 | 
120 | * adapter type ([98d3842](https://github.com/unkn0wn-root/kioshun/commit/98d38429c08b1477e3c176397f2292d3e2cc4146))
121 | 
122 | ### [0.2.3](https://github.com/unkn0wn-root/kioshun/compare/v0.2.2...v0.2.3) (2025-08-29)
123 | 
124 | ### [0.2.2](https://github.com/unkn0wn-root/kioshun/compare/v0.2.1...v0.2.2) (2025-08-29)
125 | 
126 | ### [0.2.1](https://github.com/unkn0wn-root/kioshun/compare/v0.2.0...v0.2.1) (2025-08-28)
127 | 


--------------------------------------------------------------------------------
/_examples/advanced/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sync"
  6 | 	"time"
  7 | 
  8 | 	"github.com/unkn0wn-root/kioshun"
  9 | )
 10 | 
 11 | type User struct {
 12 | 	ID        string    `json:"id"`
 13 | 	Name      string    `json:"name"`
 14 | 	Email     string    `json:"email"`
 15 | 	CreatedAt time.Time `json:"created_at"`
 16 | }
 17 | 
 18 | func main() {
 19 | 	fmt.Println("=== Advanced Cache Usage Example ===")
 20 | 
 21 | 	config := cache.Config{
 22 | 		MaxSize:         10000,
 23 | 		ShardCount:      16,
 24 | 		CleanupInterval: 1 * time.Minute,
 25 | 		DefaultTTL:      30 * time.Minute,
 26 | 		EvictionPolicy:  cache.LRU,
 27 | 		StatsEnabled:    true,
 28 | 	}
 29 | 
 30 | 	userCache := cache.New[string, User](config)
 31 | 	defer userCache.Close()
 32 | 
 33 | 	fmt.Println("\n1. Operations with complex data types:")
 34 | 	users := []User{
 35 | 		{ID: "1", Name: "Alice Johnson", Email: "alice@example.com", CreatedAt: time.Now()},
 36 | 		{ID: "2", Name: "Bob Smith", Email: "bob@example.com", CreatedAt: time.Now()},
 37 | 		{ID: "3", Name: "Charlie Brown", Email: "charlie@example.com", CreatedAt: time.Now()},
 38 | 	}
 39 | 
 40 | 	for _, user := range users {
 41 | 		userCache.Set(user.ID, user, time.Duration(30+len(user.Name))*time.Second)
 42 | 	}
 43 | 
 44 | 	fmt.Println("\n2. Concurrent access:")
 45 | 
 46 | 	var wg sync.WaitGroup
 47 | 	numWorkers := 10
 48 | 	operationsPerWorker := 100
 49 | 
 50 | 	for i := 0; i < numWorkers; i++ {
 51 | 		wg.Add(1)
 52 | 		go func(workerID int) {
 53 | 			defer wg.Done()
 54 | 
 55 | 			for j := 0; j < operationsPerWorker; j++ {
 56 | 				key := fmt.Sprintf("user:%d:%d", workerID, j)
 57 | 				user := User{
 58 | 					ID:        key,
 59 | 					Name:      fmt.Sprintf("User %d-%d", workerID, j),
 60 | 					Email:     fmt.Sprintf("user%d_%d@example.com", workerID, j),
 61 | 					CreatedAt: time.Now(),
 62 | 				}
 63 | 
 64 | 				switch j % 4 {
 65 | 				case 0: // Set
 66 | 					userCache.Set(key, user, 1*time.Hour)
 67 | 				case 1: // Get
 68 | 					if u, found := userCache.Get(key); found {
 69 | 						_ = u.Name // Use the value
 70 | 					}
 71 | 				case 2: // GetWithTTL
 72 | 					if u, ttl, found := userCache.GetWithTTL(key); found {
 73 | 						_ = u.Name
 74 | 						_ = ttl
 75 | 					}
 76 | 				case 3: // Exists
 77 | 					userCache.Exists(key)
 78 | 				}
 79 | 			}
 80 | 		}(i)
 81 | 	}
 82 | 
 83 | 	wg.Wait()
 84 | 	fmt.Printf("Completed %d concurrent ops\n", numWorkers*operationsPerWorker)
 85 | 
 86 | 	fmt.Println("\n3. Cache manager for multiple cache instances:")
 87 | 
 88 | 	manager := cache.NewManager()
 89 | 	defer manager.CloseAll()
 90 | 
 91 | 	manager.RegisterCache("users", cache.UserCacheConfig())
 92 | 	manager.RegisterCache("sessions", cache.SessionCacheConfig())
 93 | 	manager.RegisterCache("api_responses", cache.APICacheConfig())
 94 | 
 95 | 	userManagedCache, _ := cache.GetCache[string, User](manager, "users")
 96 | 	sessionCache, _ := cache.GetCache[string, string](manager, "sessions")
 97 | 	apiCache, _ := cache.GetCache[string, []byte](manager, "api_responses")
 98 | 
 99 | 	userManagedCache.Set("managed_user", users[0], 1*time.Hour)
100 | 	sessionCache.Set("session_123", "user_session_token", 2*time.Hour)
101 | 	apiCache.Set("api_response_1", []byte(`{"status": "success"}`), 15*time.Minute)
102 | 
103 | 	fmt.Println("\n4. Global cache usage:")
104 | 
105 | 	cache.RegisterGlobalCache("global_users", cache.UserCacheConfig())
106 | 	cache.RegisterGlobalCache("global_sessions", cache.SessionCacheConfig())
107 | 
108 | 	globalUserCache, _ := cache.GetGlobalCache[string, User]("global_users")
109 | 	globalSessionCache, _ := cache.GetGlobalCache[string, string]("global_sessions")
110 | 
111 | 	globalUserCache.Set("global_user_1", users[0], 1*time.Hour)
112 | 	globalSessionCache.Set("global_session_1", "global_token", 2*time.Hour)
113 | 
114 | 	fmt.Println("\n5. Performance monitoring:")
115 | 
116 | 	// Generate some activity
117 | 	for i := 0; i < 1000; i++ {
118 | 		key := fmt.Sprintf("perf_test_%d", i)
119 | 		userCache.Set(key, users[i%len(users)], 1*time.Hour)
120 | 
121 | 		// Mix reads and writes
122 | 		if i%3 == 0 {
123 | 			userCache.Get(key)
124 | 		}
125 | 	}
126 | 
127 | 	stats := userCache.Stats()
128 | 	fmt.Printf("Performance Statistics:\n")
129 | 	fmt.Printf("  Total Operations: %d\n", stats.Hits+stats.Misses)
130 | 	fmt.Printf("  Hits: %d\n", stats.Hits)
131 | 	fmt.Printf("  Misses: %d\n", stats.Misses)
132 | 	fmt.Printf("  Hit Ratio: %.2f%%\n", stats.HitRatio*100)
133 | 	fmt.Printf("  Evictions: %d\n", stats.Evictions)
134 | 	fmt.Printf("  Expirations: %d\n", stats.Expirations)
135 | 	fmt.Printf("  Current Size: %d\n", stats.Size)
136 | 	fmt.Printf("  Max Capacity: %d\n", stats.Capacity)
137 | 	fmt.Printf("  Shards: %d\n", stats.Shards)
138 | 
139 | 	fmt.Println("\n6. TTL and expiration handling:")
140 | 
141 | 	// short TTL
142 | 	shortTTLCache := cache.NewWithDefaults[string, string]()
143 | 	defer shortTTLCache.Close()
144 | 
145 | 	shortTTLCache.Set("short_lived_1", "expires_soon", 1*time.Second)
146 | 	shortTTLCache.Set("short_lived_2", "expires_later", 3*time.Second)
147 | 
148 | 	fmt.Printf("Initial size: %d\n", shortTTLCache.Size())
149 | 
150 | 	time.Sleep(2 * time.Second)
151 | 	fmt.Printf("After 2 seconds: %d\n", shortTTLCache.Size())
152 | 
153 | 	if _, found := shortTTLCache.Get("short_lived_1"); !found {
154 | 		fmt.Println("short_lived_1 has expired")
155 | 	}
156 | 	if _, found := shortTTLCache.Get("short_lived_2"); found {
157 | 		fmt.Println("short_lived_2 still exists")
158 | 	}
159 | 
160 | 	fmt.Println("\n7. Manual cleanup:")
161 | 
162 | 	userCache.TriggerCleanup()
163 | 	fmt.Println("Manual cleanup triggered")
164 | 
165 | 	fmt.Println("\n8. Batch operations:")
166 | 
167 | 	batchCache := cache.NewWithDefaults[string, string]()
168 | 	defer batchCache.Close()
169 | 
170 | 	// simulate batch insert
171 | 	start := time.Now()
172 | 	for i := 0; i < 10000; i++ {
173 | 		batchCache.Set(fmt.Sprintf("batch_key_%d", i), fmt.Sprintf("batch_value_%d", i), 1*time.Hour)
174 | 	}
175 | 	insertDuration := time.Since(start)
176 | 
177 | 	// batch read
178 | 	start = time.Now()
179 | 	for i := 0; i < 10000; i++ {
180 | 		batchCache.Get(fmt.Sprintf("batch_key_%d", i))
181 | 	}
182 | 	readDuration := time.Since(start)
183 | 
184 | 	fmt.Printf("Batch insert (10,000 items): %v\n", insertDuration)
185 | 	fmt.Printf("Batch read (10,000 items): %v\n", readDuration)
186 | 	fmt.Printf("Insert rate: %.0f ops/sec\n", 10000/insertDuration.Seconds())
187 | 	fmt.Printf("Read rate: %.0f ops/sec\n", 10000/readDuration.Seconds())
188 | 
189 | 	fmt.Println("\n=== Example completed ===")
190 | }
191 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 |   <img src="assets/logo.JPG" alt="Kioshun Logo" width="200"/>
  3 | 
  4 |   # Kioshun - In-Memory Cache for Go
  5 | 
  6 |   *"kee-oh-shoon" /kiːoʊʃuːn/*
  7 | 
  8 |   [![Go Version](https://img.shields.io/badge/Go-1.24+-blue.svg)](https://golang.org)
  9 |   [![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
 10 |   [![CI](https://github.com/unkn0wn-root/kioshun/actions/workflows/test.yml/badge.svg)](https://github.com/unkn0wn-root/kioshun/actions)
 11 | 
 12 | 
 13 |   *Thread-safe, sharded in-memory cache for Go - with an optional peer-to-peer cluster backend*
 14 | </div>
 15 | 
 16 | ## Index
 17 | 
 18 | - [What is Kioshun?](#what-is-kioshun)
 19 | - [Cluster (Overview)](#cluster-overview)
 20 | - [Internals](INTERNALS.md)
 21 | - [Installation](#installation)
 22 | - [Quick Start](#quick-start)
 23 | - [Configuration](#configuration)
 24 | - [API](#api)
 25 | - [HTTP Middleware](MIDDLEWARE.md)
 26 | - [Benchmark Results](#benchmark-results)
 27 | 
 28 | ## What is Kioshun?
 29 | 
 30 | Kioshun is a thread-safe (and fast!), in-memory cache for Go. You can **run it as a local cache** just like any other in-memory caches, or turn on the **peer-to-peer cluster** when you want replicas across hosts.
 31 | 
 32 | If you want to know more about Kioshun internals and how it works under the hood - see [Kioshun Internals](INTERNALS.md)
 33 | 
 34 | ## Cluster Overview
 35 | 
 36 | > [!NOTE]
 37 | > Clustering is fully **optional**. If you don’t enable the cluster, Kioshun runs as a standalone, in‑memory cache.
 38 | 
 39 | Kioshun’s cluster turns every service instance into a **small, self-managing peer-to-peer cache**. You just point each one at a few reachable *Seeds* and it discovers the rest, builds a weighted rendezvous and replicates writes with configurable RF/WC so hot data stays local. Gossip keeps the peer list fresh, hinted handoff plus backfill repair all gaps, and reads go straight to the primary owner while read-through uses single-flight leases.
 40 | 
 41 | ```
 42 | ┌─────────────┐      Gossip + Weights      ┌─────────────┐
 43 | │  Service A  │◀──────────────────────────▶│  Service B  │
 44 | │  + Node     │◀───────────▶◀───────────▶  │  + Node     │
 45 | └──────┬──────┘                            └──────┬──────┘
 46 |        │   Owner‑routed Get/Set (RF)              │
 47 |        └──────────────▶◀──────────────────────────┘
 48 |                   Service C + Node
 49 | ```
 50 | 
 51 | Small multinode example:
 52 | 
 53 | ```bash
 54 | # on each server
 55 | CACHE_BIND=:4443
 56 | CACHE_PUBLIC=srv-a:4443   # srv-b / srv-c on others
 57 | CACHE_SEEDS=srv-a:4443,srv-b:4443,srv-c:4443
 58 | CACHE_AUTH=supersecret
 59 | ```
 60 | 
 61 | ```go
 62 | // in code
 63 | local := cache.NewWithDefaults[string, []byte]()
 64 | 
 65 | cfg := cluster.Default()
 66 | cfg.BindAddr = os.Getenv("CACHE_BIND")
 67 | cfg.PublicURL = os.Getenv("CACHE_PUBLIC")
 68 | cfg.Seeds = strings.Split(os.Getenv("CACHE_SEEDS"), ",")
 69 | cfg.ReplicationFactor = 3; cfg.WriteConcern = 2
 70 | cfg.Sec.AuthToken = os.Getenv("CACHE_AUTH")
 71 | 
 72 | node := cluster.NewNode[string, []byte](cfg, cluster.StringKeyCodec[string]{}, local, cluster.BytesCodec{})
 73 | if err := node.Start(); err != nil {
 74 |     panic(err)
 75 | }
 76 | 
 77 | dc := cluster.NewDistributedCache[string, []byte](node)
 78 | ```
 79 | 
 80 | Only a subset of nodes need to appear in `CACHE_SEEDS`. The list is purely for bootstrap - include a few stable peers so new processes can reach at least one live seed, then gossip distributes the rest of the membership automatically, whether you run 3 caches or 20.
 81 | 
 82 | > See **CLUSTER.md** for more details.
 83 | 
 84 | ## Installation
 85 | 
 86 | ```bash
 87 | go get github.com/unkn0wn-root/kioshun
 88 | ```
 89 | 
 90 | ## Quick Start
 91 | 
 92 | ```go
 93 | package main
 94 | 
 95 | import (
 96 |     "fmt"
 97 |     "time"
 98 | 
 99 |     cache "github.com/unkn0wn-root/kioshun"
100 | )
101 | 
102 | func main() {
103 |     // Create cache with default configuration
104 |     c := cache.NewWithDefaults[string, string]()
105 |     defer c.Close()
106 | 
107 |     // Set with default TTL (30 min)
108 |     c.Set("user:123", "David Nice 1", cache.DefaultExpiration)
109 | 
110 |     // Set with no expiration
111 |     c.Set("user:123", "David Nice 2", cache.NoExpiration)
112 | 
113 |     // Set value with custom TTL
114 |     c.Set("user:123", "David Nice 3", 5*time.Minute)
115 | 
116 |     // Get value
117 |     if value, found := c.Get("user:123"); found {
118 |         fmt.Printf("User: %s\n", value)
119 |     }
120 | 
121 |     // Get cache statistics
122 |     stats := c.Stats()
123 |     fmt.Printf("Hit ratio: %.2f%%\n", stats.HitRatio*100)
124 | }
125 | ```
126 | 
127 | ## Configuration
128 | 
129 | ### Basic Configuration
130 | 
131 | ```go
132 | config := cache.Config{
133 |     MaxSize:         100000,             // Maximum number of items
134 |     ShardCount:      16,                 // Number of shards (0 = auto-detect)
135 |     CleanupInterval: 5 * time.Minute,    // Cleanup frequency
136 |     DefaultTTL:      30 * time.Minute,   // Default expiration time
137 |     EvictionPolicy:  cache.AdmissionLFU, // Eviction algorithm (default)
138 |     StatsEnabled:    true,               // Enable statistics collection
139 | }
140 | 
141 | cache := cache.New[string, any](config)
142 | ```
143 | 
144 | ## API
145 | 
146 | ```go
147 | cache.Set(key, value, ttl time.Duration) error
148 | cache.SetWithCallback(key, value, ttl, callback func(key, value)) error
149 | cache.Get(key) (value, found bool)
150 | cache.GetWithTTL(key) (value, ttl time.Duration, found bool)
151 | cache.Keys() []K
152 | cache.Clear()
153 | cache.Delete(key) bool
154 | cache.Exists(key) bool
155 | cache.Size() int64
156 | cache.Stats() Stats
157 | cache.TriggerCleanup()
158 | cache.Close() error
159 | ```
160 | 
161 | ### Statistics
162 | 
163 | ```go
164 | type Stats struct {
165 |     Hits        int64
166 |     Misses      int64
167 |     Evictions   int64
168 |     Expirations int64
169 |     Size        int64
170 |     Capacity    int64
171 |     HitRatio    float64
172 |     Shards      int
173 | }
174 | ```
175 | 
176 | ## HTTP Middleware
177 | 
178 | Kioshun provides HTTP middleware out-of-the-box.
179 | 
180 | ```go
181 | config := cache.DefaultMiddlewareConfig()
182 | config.DefaultTTL = 5 * time.Minute
183 | config.MaxSize = 100000
184 | 
185 | middleware := cache.NewHTTPCacheMiddleware(config)
186 | defer middleware.Close()
187 | 
188 | http.Handle("/api/users", middleware.Middleware(usersHandler))
189 | ```
190 | > See **[MIDDLEWARE.md](MIDDLEWARE.md)** for complete documentation, examples, and advanced configuration.
191 | 
192 | ## Benchmark Results
193 | 
194 | Latest benchmark run (Apple M4 Max, Go 1.24.7):
195 | - `SET`: 100,000,000 ops/sec · 75.55 ns/op · 41 B/op · 3 allocs/op
196 | - `GET`: 231,967,180 ops/sec · 25.87 ns/op · 31 B/op · 2 allocs/op
197 | - `Real-World`: 52,742,550 ops/sec · 65.25 ns/op · 48 B/op · 3 allocs/op
198 | 
199 | Full suite: [_benchmarks/README.md](_benchmarks/README.md)
200 | 


--------------------------------------------------------------------------------
/cluster/replication.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"sync/atomic"
  7 | 	"time"
  8 | 
  9 | 	cbor "github.com/fxamacker/cbor/v2"
 10 | )
 11 | 
 12 | type replicator[K comparable, V any] struct {
 13 | 	node *Node[K, V]
 14 | }
 15 | 
 16 | // replicateSet sends a write to all owners and waits for WC acknowledgements.
 17 | //   - Pre-compresses the value once per request to avoid per-peer work.
 18 | //   - Enqueues hinted-handoff on any peer failure so a recovering node can catch up.
 19 | //   - Fast path: if local commit already satisfies WC, fire-and-forget to peers and
 20 | //     rely on hinted handoff to close any gaps, the caller is unblocked.
 21 | func (r *replicator[K, V]) replicateSet(ctx context.Context, key []byte, val []byte, exp int64, ver uint64, owners []*nodeMeta) error {
 22 | 	required := r.node.cfg.WriteConcern
 23 | 	if required < 1 {
 24 | 		required = 1
 25 | 	}
 26 | 
 27 | 	acks := 0
 28 | 	if len(owners) > 0 && owners[0].ID == r.node.cfg.ID {
 29 | 		acks++
 30 | 	}
 31 | 	want := required - acks
 32 | 
 33 | 	// pre-compress once for all peers.
 34 | 	b2, cp := r.node.maybeCompress(val)
 35 | 
 36 | 	// helper to send and enqueue hint on failure
 37 | 	sendOne := func(pid NodeID, pc *peerConn) {
 38 | 		if pc == nil {
 39 | 			if r.node.hh != nil {
 40 | 				r.node.hh.enqueueSet(pid, key, b2, exp, ver, cp)
 41 | 			}
 42 | 			return
 43 | 		}
 44 | 
 45 | 		reqID := r.node.nextReqID()
 46 | 		msg := &MsgSet{Base: Base{T: MTSet, ID: reqID}, Key: key, Val: b2, Exp: exp, Ver: ver, Cp: cp}
 47 | 		raw, err := pc.request(msg, reqID, r.node.cfg.Sec.WriteTimeout)
 48 | 		if err != nil {
 49 | 			if r.node.hh != nil {
 50 | 				r.node.hh.enqueueSet(pid, key, b2, exp, ver, cp)
 51 | 			}
 52 | 			return
 53 | 		}
 54 | 
 55 | 		var resp MsgSetResp
 56 | 		if e := cbor.Unmarshal(raw, &resp); e != nil || !resp.OK {
 57 | 			if r.node.hh != nil {
 58 | 				r.node.hh.enqueueSet(pid, key, b2, exp, ver, cp)
 59 | 			}
 60 | 			return
 61 | 		}
 62 | 	}
 63 | 
 64 | 	// Fast path: local already satisfies WC. Fire and forget to remaining owners,
 65 | 	// still capturing failures into hinted handoff without blocking the caller.
 66 | 	if want <= 0 {
 67 | 		for _, own := range owners {
 68 | 			if own.ID == r.node.cfg.ID {
 69 | 				continue
 70 | 			}
 71 | 			pc := r.node.getPeer(own.ID)
 72 | 			go sendOne(own.ID, pc)
 73 | 		}
 74 | 		return nil
 75 | 	}
 76 | 
 77 | 	// slow path: need acknowledgements from peers to meet write concern.
 78 | 	remaining := int32(want)
 79 | 	timer := time.NewTimer(r.node.cfg.Sec.WriteTimeout + time.Second)
 80 | 	defer timer.Stop()
 81 | 
 82 | 	errCh := make(chan error, len(owners))
 83 | 	for _, own := range owners {
 84 | 		if own.ID == r.node.cfg.ID {
 85 | 			continue
 86 | 		}
 87 | 		pc := r.node.getPeer(own.ID)
 88 | 		if pc == nil {
 89 | 			// we know this one will miss; enqueue and continue
 90 | 			if r.node.hh != nil {
 91 | 				r.node.hh.enqueueSet(own.ID, key, b2, exp, ver, cp)
 92 | 			}
 93 | 			continue
 94 | 		}
 95 | 
 96 | 		go func(pid NodeID, p *peerConn) {
 97 | 			reqID := r.node.nextReqID()
 98 | 			msg := &MsgSet{Base: Base{T: MTSet, ID: reqID}, Key: key, Val: b2, Exp: exp, Ver: ver, Cp: cp}
 99 | 			raw, err := p.request(msg, reqID, r.node.cfg.Sec.WriteTimeout)
100 | 			if err != nil {
101 | 				// enqueue and return an error
102 | 				if r.node.hh != nil {
103 | 					r.node.hh.enqueueSet(pid, key, b2, exp, ver, cp)
104 | 				}
105 | 				errCh <- err
106 | 				return
107 | 			}
108 | 
109 | 			var resp MsgSetResp
110 | 			if e := cbor.Unmarshal(raw, &resp); e != nil || !resp.OK {
111 | 				if r.node.hh != nil {
112 | 					r.node.hh.enqueueSet(pid, key, b2, exp, ver, cp)
113 | 				}
114 | 				if e == nil && resp.Err != "" {
115 | 					errCh <- errors.New(resp.Err)
116 | 				} else {
117 | 					errCh <- errors.New("set not ok")
118 | 				}
119 | 				return
120 | 			}
121 | 			// success
122 | 			if atomic.AddInt32(&remaining, -1) <= 0 {
123 | 				errCh <- nil
124 | 			}
125 | 		}(own.ID, pc)
126 | 	}
127 | 
128 | 	for {
129 | 		select {
130 | 		case err := <-errCh:
131 | 			if err == nil {
132 | 				return nil // met write concern
133 | 			}
134 | 			// keep waiting for success until timeout; errors alone don't abort early
135 | 		case <-timer.C:
136 | 			return ErrTimeout
137 | 		case <-ctx.Done():
138 | 			return ctx.Err()
139 | 		}
140 | 	}
141 | }
142 | 
143 | func (r *replicator[K, V]) replicateDelete(ctx context.Context, key []byte, owners []*nodeMeta, ver uint64) error {
144 | 	required := r.node.cfg.WriteConcern
145 | 	if required < 1 {
146 | 		required = 1
147 | 	}
148 | 	acks := 0
149 | 	if len(owners) > 0 && owners[0].ID == r.node.cfg.ID {
150 | 		acks++
151 | 	}
152 | 	want := required - acks
153 | 
154 | 	sendOne := func(pid NodeID, pc *peerConn) {
155 | 		if pc == nil {
156 | 			if r.node.hh != nil {
157 | 				r.node.hh.enqueueDel(pid, key, ver)
158 | 			}
159 | 			return
160 | 		}
161 | 
162 | 		reqID := r.node.nextReqID()
163 | 		msg := &MsgDel{Base: Base{T: MTDelete, ID: reqID}, Key: key, Ver: ver}
164 | 		raw, err := pc.request(msg, reqID, r.node.cfg.Sec.WriteTimeout)
165 | 		if err != nil {
166 | 			if r.node.hh != nil {
167 | 				r.node.hh.enqueueDel(pid, key, ver)
168 | 			}
169 | 			return
170 | 		}
171 | 
172 | 		var resp MsgDelResp
173 | 		if e := cbor.Unmarshal(raw, &resp); e != nil || !resp.OK {
174 | 			if r.node.hh != nil {
175 | 				r.node.hh.enqueueDel(pid, key, ver)
176 | 			}
177 | 			return
178 | 		}
179 | 	}
180 | 
181 | 	if want <= 0 {
182 | 		for _, own := range owners {
183 | 			if own.ID == r.node.cfg.ID {
184 | 				continue
185 | 			}
186 | 			pc := r.node.getPeer(own.ID)
187 | 			go sendOne(own.ID, pc)
188 | 		}
189 | 		return nil
190 | 	}
191 | 
192 | 	remaining := int32(want)
193 | 	timer := time.NewTimer(r.node.cfg.Sec.WriteTimeout + time.Second)
194 | 	defer timer.Stop()
195 | 	errCh := make(chan error, len(owners))
196 | 
197 | 	for _, own := range owners {
198 | 		if own.ID == r.node.cfg.ID {
199 | 			continue
200 | 		}
201 | 		pc := r.node.getPeer(own.ID)
202 | 		if pc == nil {
203 | 			if r.node.hh != nil {
204 | 				r.node.hh.enqueueDel(own.ID, key, ver)
205 | 			}
206 | 			continue
207 | 		}
208 | 
209 | 		go func(pid NodeID, p *peerConn) {
210 | 			reqID := r.node.nextReqID()
211 | 			msg := &MsgDel{Base: Base{T: MTDelete, ID: reqID}, Key: key, Ver: ver}
212 | 			raw, err := p.request(msg, reqID, r.node.cfg.Sec.WriteTimeout)
213 | 			if err != nil {
214 | 				if r.node.hh != nil {
215 | 					r.node.hh.enqueueDel(pid, key, ver)
216 | 				}
217 | 				errCh <- err
218 | 				return
219 | 			}
220 | 
221 | 			var resp MsgDelResp
222 | 			if e := cbor.Unmarshal(raw, &resp); e != nil || !resp.OK {
223 | 				if r.node.hh != nil {
224 | 					r.node.hh.enqueueDel(pid, key, ver)
225 | 				}
226 | 				if e == nil && resp.Err != "" {
227 | 					errCh <- errors.New(resp.Err)
228 | 				} else {
229 | 					errCh <- errors.New("delete not ok")
230 | 				}
231 | 				return
232 | 			}
233 | 			if atomic.AddInt32(&remaining, -1) <= 0 {
234 | 				errCh <- nil
235 | 			}
236 | 		}(own.ID, pc)
237 | 	}
238 | 
239 | 	for {
240 | 		select {
241 | 		case err := <-errCh:
242 | 			if err == nil {
243 | 				return nil
244 | 			}
245 | 		case <-timer.C:
246 | 			return ErrTimeout
247 | 		case <-ctx.Done():
248 | 			return ctx.Err()
249 | 		}
250 | 	}
251 | }
252 | 


--------------------------------------------------------------------------------
/eviction.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"sync"
  5 | 	"sync/atomic"
  6 | )
  7 | 
  8 | const (
  9 | 	// AdmissionLFU sampling: take a small randomized sample and pick the least-frequent (tie: oldest).
 10 | 	defaultAdmissionLFUSampleSize = 5
 11 | 	maxAdmissionLFUSampleSize     = 20
 12 | )
 13 | 
 14 | // evictor defines a policy that evicts exactly one item when a shard is full (called under shard write lock).
 15 | type evictor[K comparable, V any] interface {
 16 | 	evict(s *shard[K, V], itemPool *sync.Pool, statsEnabled bool) bool
 17 | }
 18 | 
 19 | // lruEvictor removes the least-recently-used item using the shard's intrusive LRU list.
 20 | type lruEvictor[K comparable, V any] struct{}
 21 | 
 22 | // evict unlinks and recycles the tail.prev (LRU) item; updates size/stats; O(1).
 23 | func (e lruEvictor[K, V]) evict(s *shard[K, V], itemPool *sync.Pool, statsEnabled bool) bool {
 24 | 	// Empty list check: only sentinels present.
 25 | 	if s.tail.prev == s.head {
 26 | 		return false
 27 | 	}
 28 | 
 29 | 	// Victim is the node just before the tail sentinel.
 30 | 	lru := s.tail.prev
 31 | 	if lru != nil && lru.key != nil {
 32 | 		// cacheItem.key is stored as 'any' to allow deletion without recomputing the hash.
 33 | 		// We assert to K here; inserts always set key with the correct type.
 34 | 		if key, ok := lru.key.(K); ok {
 35 | 			delete(s.data, key)
 36 | 		}
 37 | 
 38 | 		s.removeFromLRU(lru) // O(1) unlink from intrusive list
 39 | 		itemPool.Put(lru)    // recycle to reduce GC churn
 40 | 		atomic.AddInt64(&s.size, -1)
 41 | 
 42 | 		if statsEnabled {
 43 | 			atomic.AddInt64(&s.evictions, 1)
 44 | 		}
 45 | 		return true
 46 | 	}
 47 | 	return false
 48 | }
 49 | 
 50 | // lfuEvictor removes the global least-frequent item via the O(1) LFU bucket list.
 51 | type lfuEvictor[K comparable, V any] struct{}
 52 | 
 53 | // evict pops the LFU item, unlinks from LRU too, recycles, and updates size/stats; O(1).
 54 | func (e lfuEvictor[K, V]) evict(s *shard[K, V], itemPool *sync.Pool, statsEnabled bool) bool {
 55 | 	lfu := s.lfuList.removeLFU()
 56 | 	if lfu == nil {
 57 | 		return false
 58 | 	}
 59 | 
 60 | 	if lfu.key != nil {
 61 | 		if key, ok := lfu.key.(K); ok {
 62 | 			delete(s.data, key)
 63 | 		}
 64 | 
 65 | 		// We maintain the LRU list for uniform unlinking/cleanup across policies.
 66 | 		s.removeFromLRU(lfu)
 67 | 		itemPool.Put(lfu)
 68 | 		atomic.AddInt64(&s.size, -1)
 69 | 
 70 | 		if statsEnabled {
 71 | 			atomic.AddInt64(&s.evictions, 1)
 72 | 		}
 73 | 		return true
 74 | 	}
 75 | 	return false
 76 | }
 77 | 
 78 | // fifoEvictor treats the LRU list as insertion order and removes the oldest item.
 79 | type fifoEvictor[K comparable, V any] struct{}
 80 | 
 81 | // evict deletes the earliest inserted (tail.prev), unlinks from optional LFU, and updates stats; O(1).
 82 | func (e fifoEvictor[K, V]) evict(s *shard[K, V], itemPool *sync.Pool, statsEnabled bool) bool {
 83 | 	if s.tail.prev == s.head {
 84 | 		return false
 85 | 	}
 86 | 
 87 | 	oldest := s.tail.prev
 88 | 	if oldest != nil && oldest.key != nil {
 89 | 		if key, ok := oldest.key.(K); ok {
 90 | 			delete(s.data, key)
 91 | 		}
 92 | 		s.removeFromLRU(oldest)
 93 | 
 94 | 		if s.lfuList != nil {
 95 | 			s.lfuList.remove(oldest)
 96 | 		}
 97 | 
 98 | 		itemPool.Put(oldest)
 99 | 		atomic.AddInt64(&s.size, -1)
100 | 		if statsEnabled {
101 | 			atomic.AddInt64(&s.evictions, 1)
102 | 		}
103 | 		return true
104 | 	}
105 | 	return false
106 | }
107 | 
108 | // admissionLFUEvictor does approximate-LFU by sampling and optionally gating via the admission filter.
109 | type admissionLFUEvictor[K comparable, V any] struct {
110 | 	sampleSize int // desired sample size; clamped to [1, maxAdmissionLFUSampleSize]
111 | }
112 | 
113 | // pickVictim scans up to sampleSize items (randomized map order) and returns the worst (freq↑, age↑); nil if empty.
114 | func (e admissionLFUEvictor[K, V]) pickVictim(s *shard[K, V]) *cacheItem[V] {
115 | 	if len(s.data) == 0 {
116 | 		return nil
117 | 	}
118 | 
119 | 	n := e.sampleSize
120 | 	if n <= 0 {
121 | 		n = defaultAdmissionLFUSampleSize
122 | 	} else if n > maxAdmissionLFUSampleSize {
123 | 		n = maxAdmissionLFUSampleSize
124 | 	}
125 | 	if n > len(s.data) {
126 | 		n = len(s.data)
127 | 	}
128 | 
129 | 	var victim *cacheItem[V]
130 | 	cnt := 0
131 | 	for _, it := range s.data {
132 | 		// Lower frequency is worse; if equal, older lastAccess is worse.
133 | 		if victim == nil ||
134 | 			it.frequency < victim.frequency ||
135 | 			(it.frequency == victim.frequency && it.lastAccess < victim.lastAccess) {
136 | 			victim = it
137 | 		}
138 | 		cnt++
139 | 		if cnt >= n {
140 | 			break // bounded sample scan
141 | 		}
142 | 	}
143 | 	return victim
144 | }
145 | 
146 | // removeVictim unlinks victim from shard structures, recycles it, updates size/stats (caller holds write lock).
147 | func (e admissionLFUEvictor[K, V]) removeVictim(s *shard[K, V], victim *cacheItem[V], itemPool *sync.Pool, statsEnabled bool) {
148 | 	if key, ok := victim.key.(K); ok {
149 | 		delete(s.data, key)
150 | 	}
151 | 	s.removeFromLRU(victim)
152 | 	// No LFU heap: AdmissionLFU doesn't maintain the O(1) LFU list.
153 | 	itemPool.Put(victim)
154 | 	atomic.AddInt64(&s.size, -1)
155 | 	if statsEnabled {
156 | 		atomic.AddInt64(&s.evictions, 1)
157 | 	}
158 | }
159 | 
160 | // evict samples, picks a victim, evicts it, and records lastVictimFrequency for observability.
161 | func (e admissionLFUEvictor[K, V]) evict(
162 | 	s *shard[K, V],
163 | 	itemPool *sync.Pool,
164 | 	statsEnabled bool,
165 | ) bool {
166 | 	victim := e.pickVictim(s)
167 | 	if victim == nil {
168 | 		return false
169 | 	}
170 | 	if s.admission != nil {
171 | 		s.lastVictimFrequency = uint64(victim.frequency)
172 | 	}
173 | 	e.removeVictim(s, victim, itemPool, statsEnabled)
174 | 
175 | 	return true
176 | }
177 | 
178 | // evictWithAdmission runs sample → shouldAdmit → (optional) evict; denies return false to let Set() skip pollution.
179 | func (e admissionLFUEvictor[K, V]) evictWithAdmission(
180 | 	s *shard[K, V],
181 | 	itemPool *sync.Pool,
182 | 	statsEnabled bool,
183 | 	admission *adaptiveAdmissionFilter,
184 | 	keyHash uint64,
185 | ) bool {
186 | 	victim := e.pickVictim(s)
187 | 	if victim == nil {
188 | 		return false
189 | 	}
190 | 
191 | 	freq := uint64(victim.frequency)
192 | 	victimAge := victim.lastAccess
193 | 	s.lastVictimFrequency = freq // stored for deb/metrics
194 | 
195 | 	// Admission gate: compare candidate(keyHash) vs victim(freq/age).
196 | 	if !admission.shouldAdmit(keyHash, freq, victimAge) {
197 | 		return false
198 | 	}
199 | 	e.removeVictim(s, victim, itemPool, statsEnabled)
200 | 
201 | 	// Feed back pressure to the admission filter for adaptive probability.
202 | 	admission.RecordEviction()
203 | 
204 | 	return true
205 | }
206 | 
207 | // createEvictor returns the policy implementation for the selected EvictionPolicy.
208 | func createEvictor[K comparable, V any](policy EvictionPolicy) evictor[K, V] {
209 | 	switch policy {
210 | 	case LRU:
211 | 		return lruEvictor[K, V]{}
212 | 	case LFU:
213 | 		return lfuEvictor[K, V]{}
214 | 	case FIFO:
215 | 		return fifoEvictor[K, V]{}
216 | 	case AdmissionLFU:
217 | 		return admissionLFUEvictor[K, V]{sampleSize: defaultAdmissionLFUSampleSize}
218 | 	default:
219 | 		return admissionLFUEvictor[K, V]{sampleSize: defaultAdmissionLFUSampleSize}
220 | 	}
221 | }
222 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/node/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"encoding/json"
  6 | 	"log"
  7 | 	"net/http"
  8 | 	"os"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 	"time"
 12 | 
 13 | 	cache "github.com/unkn0wn-root/kioshun"
 14 | 	"github.com/unkn0wn-root/kioshun/cluster"
 15 | )
 16 | 
 17 | type setReq struct {
 18 | 	K     string `json:"k"`
 19 | 	V     string `json:"v"`
 20 | 	TTLms int64  `json:"ttl_ms"`
 21 | }
 22 | 
 23 | func getenv(k, d string) string {
 24 | 	if v := os.Getenv(k); v != "" {
 25 | 		return v
 26 | 	}
 27 | 	return d
 28 | }
 29 | 
 30 | func splitCSV(s string) []string {
 31 | 	parts := strings.Split(s, ",")
 32 | 	out := make([]string, 0, len(parts))
 33 | 	for _, p := range parts {
 34 | 		p = strings.TrimSpace(p)
 35 | 		if p != "" {
 36 | 			out = append(out, p)
 37 | 		}
 38 | 	}
 39 | 	return out
 40 | }
 41 | 
 42 | func main() {
 43 | 	port := getenv("PORT", "8081")
 44 | 	bind := getenv("CACHE_BIND", ":5011")
 45 | 	pub := getenv("CACHE_PUBLIC", "node1:5011")
 46 | 	seeds := splitCSV(getenv("CACHE_SEEDS", pub))
 47 | 	auth := getenv("CACHE_AUTH", "")
 48 | 	allowKill := strings.ToLower(getenv("ALLOW_KILL", "false")) == "true"
 49 | 	killToken := getenv("KILL_TOKEN", "")
 50 | 
 51 | 	// in-process local cache
 52 | 	local := cache.NewWithDefaults[string, []byte]()
 53 | 
 54 | 	// cluster node
 55 | 	cfg := cluster.Default()
 56 | 	cfg.BindAddr = bind
 57 | 	cfg.PublicURL = pub
 58 | 	cfg.Seeds = seeds
 59 | 	cfg.Sec.AuthToken = auth
 60 | 	cfg.ID = cluster.NodeID(cfg.PublicURL)
 61 | 	cfg.PerConnWorkers = 128
 62 | 	cfg.PerConnQueue = 256
 63 | 	cfg.Sec.MaxInflightPerPeer = 512
 64 | 
 65 | 	// optional via env
 66 | 	if v := getenv("REPLICATION_FACTOR", ""); v != "" {
 67 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 68 | 			cfg.ReplicationFactor = n
 69 | 		}
 70 | 	} else {
 71 | 		cfg.ReplicationFactor = 3
 72 | 	}
 73 | 
 74 | 	if v := getenv("WRITE_CONCERN", ""); v != "" {
 75 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 76 | 			cfg.WriteConcern = n
 77 | 		}
 78 | 	} else {
 79 | 		cfg.WriteConcern = 2
 80 | 	}
 81 | 
 82 | 	if v := getenv("READ_MAX_FANOUT", ""); v != "" {
 83 | 		if n, err := strconv.Atoi(v); err == nil && n >= 1 {
 84 | 			cfg.ReadMaxFanout = n
 85 | 		}
 86 | 	}
 87 | 
 88 | 	if v := getenv("READ_PER_TRY_MS", ""); v != "" {
 89 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 90 | 			cfg.ReadPerTryTimeout = time.Duration(n) * time.Millisecond
 91 | 		}
 92 | 	}
 93 | 
 94 | 	if v := getenv("READ_HEDGE_DELAY_MS", ""); v != "" {
 95 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 96 | 			cfg.ReadHedgeDelay = time.Duration(n) * time.Millisecond
 97 | 		}
 98 | 	}
 99 | 
100 | 	if v := getenv("READ_HEDGE_INTERVAL_MS", ""); v != "" {
101 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
102 | 			cfg.ReadHedgeInterval = time.Duration(n) * time.Millisecond
103 | 		}
104 | 	}
105 | 
106 | 	if v := getenv("WRITE_TIMEOUT_MS", ""); v != "" {
107 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
108 | 			cfg.Sec.WriteTimeout = time.Duration(n) * time.Millisecond
109 | 		}
110 | 	}
111 | 
112 | 	if v := getenv("READ_TIMEOUT_MS", ""); v != "" {
113 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
114 | 			cfg.Sec.ReadTimeout = time.Duration(n) * time.Millisecond
115 | 		}
116 | 	}
117 | 
118 | 	if v := getenv("SUSPICION_AFTER_MS", ""); v != "" {
119 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
120 | 			cfg.SuspicionAfter = time.Duration(n) * time.Millisecond
121 | 		}
122 | 	}
123 | 
124 | 	if v := getenv("WEIGHT_UPDATE_MS", ""); v != "" {
125 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
126 | 			cfg.WeightUpdate = time.Duration(n) * time.Millisecond
127 | 		}
128 | 	}
129 | 
130 | 	if v := getenv("GOSSIP_INTERVAL_MS", ""); v != "" {
131 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
132 | 			cfg.GossipInterval = time.Duration(n) * time.Millisecond
133 | 		}
134 | 	}
135 | 
136 | 	kc := cluster.StringKeyCodec[string]{}
137 | 	vc := cluster.BytesCodec{}
138 | 	node := cluster.NewNode[string, []byte](cfg, kc, local, vc)
139 | 	if err := node.Start(); err != nil {
140 | 		log.Fatalf("start node: %v", err)
141 | 	}
142 | 	defer node.Stop()
143 | 
144 | 	mux := http.NewServeMux()
145 | 
146 | 	// GET /get?k=...
147 | 	mux.HandleFunc("/get", func(w http.ResponseWriter, r *http.Request) {
148 | 		k := r.URL.Query().Get("k")
149 | 		if k == "" {
150 | 			http.Error(w, "missing k", http.StatusBadRequest)
151 | 			return
152 | 		}
153 | 
154 | 		ctx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
155 | 		defer cancel()
156 | 		v, found, err := node.Get(ctx, k)
157 | 		if err != nil {
158 | 			http.Error(w, err.Error(), http.StatusBadGateway)
159 | 			return
160 | 		}
161 | 
162 | 		if !found {
163 | 			w.Header().Set("X-Cache", "MISS")
164 | 			http.Error(w, "not found", http.StatusNotFound)
165 | 			log.Printf("[MISS] k=%s", k)
166 | 			return
167 | 		}
168 | 
169 | 		src := "HIT_REMOTE"
170 | 		if local.Exists(k) {
171 | 			src = "HIT_LOCAL"
172 | 		}
173 | 		w.Header().Set("X-Cache", src)
174 | 		w.WriteHeader(http.StatusOK)
175 | 		_, _ = w.Write(v)
176 | 		log.Printf("[%s] k=%s sz=%d", src, k, len(v))
177 | 	})
178 | 
179 | 	// POST /set  {k,v,ttl_ms}
180 | 	mux.HandleFunc("/set", func(w http.ResponseWriter, r *http.Request) {
181 | 		if r.Method != http.MethodPost {
182 | 			http.NotFound(w, r)
183 | 			return
184 | 		}
185 | 
186 | 		var in setReq
187 | 		if err := json.NewDecoder(r.Body).Decode(&in); err != nil {
188 | 			http.Error(w, err.Error(), http.StatusBadRequest)
189 | 			return
190 | 		}
191 | 
192 | 		ttl := time.Duration(in.TTLms) * time.Millisecond
193 | 		ctx, cancel := context.WithTimeout(r.Context(), 3*time.Second)
194 | 		defer cancel()
195 | 		if err := node.Set(ctx, in.K, []byte(in.V), ttl); err != nil {
196 | 			http.Error(w, err.Error(), http.StatusBadGateway)
197 | 			return
198 | 		}
199 | 		w.WriteHeader(http.StatusOK)
200 | 		_, _ = w.Write([]byte("OK"))
201 | 		log.Printf("[SET] k=%s ttl_ms=%d vlen=%d", in.K, in.TTLms, len(in.V))
202 | 	})
203 | 
204 | 	// GET /stats  → local shard stats
205 | 	mux.HandleFunc("/stats", func(w http.ResponseWriter, r *http.Request) {
206 | 		st := local.Stats()
207 | 		w.Header().Set("Content-Type", "application/json")
208 | 		_ = json.NewEncoder(w).Encode(st)
209 | 	})
210 | 
211 | 	// GET /ready simple readiness check
212 | 	mux.HandleFunc("/ready", func(w http.ResponseWriter, r *http.Request) {
213 | 		w.WriteHeader(http.StatusOK)
214 | 	})
215 | 
216 | 	// POST /kill?token=...&after_ms=0 — for failure injection during tests
217 | 	mux.HandleFunc("/kill", func(w http.ResponseWriter, r *http.Request) {
218 | 		if !allowKill {
219 | 			http.Error(w, "kill disabled", http.StatusForbidden)
220 | 			return
221 | 		}
222 | 
223 | 		if killToken != "" && r.URL.Query().Get("token") != killToken {
224 | 			http.Error(w, "unauthorized", http.StatusUnauthorized)
225 | 			return
226 | 		}
227 | 
228 | 		afterMs, _ := strconv.Atoi(r.URL.Query().Get("after_ms"))
229 | 		w.WriteHeader(http.StatusOK)
230 | 		_, _ = w.Write([]byte("bye"))
231 | 		go func() {
232 | 			if afterMs > 0 {
233 | 				time.Sleep(time.Duration(afterMs) * time.Millisecond)
234 | 			}
235 | 			log.Printf("[KILL] exiting process on request")
236 | 			node.Stop()
237 | 			time.Sleep(50 * time.Millisecond)
238 | 			os.Exit(0)
239 | 		}()
240 | 	})
241 | 
242 | 	log.Printf("mesh node up on :%s | node %s bind %s seeds=%v", port, cfg.PublicURL, cfg.BindAddr, cfg.Seeds)
243 | 	if err := http.ListenAndServe(":"+port, mux); err != nil {
244 | 		log.Fatal(err)
245 | 	}
246 | }
247 | 


--------------------------------------------------------------------------------
/benchmarks/README.md:
--------------------------------------------------------------------------------
  1 | # Benchmark Results - Kioshun vs. Ristretto, go-cache and freecache
  2 | 
  3 | ## Benchmark Configuration
  4 | 
  5 | The benchmarks compare **Kioshun** with **AdmissionLFU** eviction policy against other popular Go cache libraries:
  6 | 
  7 | ### Cache Configurations Used
  8 | 
  9 | | Cache Library | Configuration | Notes |
 10 | |---------------|---------------|-------|
 11 | | **Kioshun** | MaxSize: 100,000<br>ShardCount: CPU cores × 4<br>EvictionPolicy: **AdmissionLFU**<br>DefaultTTL: 1 hour<br>CleanupInterval: 5 min | AdmissionLFU eviction policy with admission control |
 12 | | **Ristretto** | NumCounters: 1,000,000<br>MaxCost: 100,000<br>BufferItems: 64 | TinyLFU-based admission policy |
 13 | | **BigCache** | MaxEntriesInWindow: 100,000<br>Shards: CPU cores (power of 2)<br>MaxEntrySize: 64KB<br>HardMaxCacheSize: 256MB | No eviction policy, size-based |
 14 | | **FreeCache** | Size: 128MB | Segmented LRU |
 15 | | **go-cache** | DefaultExpiration: 1 hour<br>CleanupInterval: 5 min | Simple map-based with cleanup |
 16 | 
 17 | **Test Environment (latest run):**
 18 | - **CPU:** Apple M4 Max (arm64)
 19 | - **OS:** macOS (Darwin arm64)
 20 | - **Go Version:** 1.24.7
 21 | - **Benchmark knobs:** `go test -bench … -benchmem`, 16-way parallelism, `-benchtime` 5s (core workloads) / 3s (stress suites)
 22 | - **Kioshun config:** AdmissionLFU, `ShardCount = runtime.NumCPU() * 4` (64 shards), `MaxSize = 100 000`
 23 | 
 24 | ## Running Benchmarks
 25 | 
 26 | ```bash
 27 | # Run comparison benchmarks
 28 | make bench-compare
 29 | 
 30 | # Run stress tests
 31 | make stress-test
 32 | 
 33 | # Run all benchmarks with the benchmark runner
 34 | make bench-runner
 35 | 
 36 | # Run all benchmark tests
 37 | make bench
 38 | ```
 39 | 
 40 | ## Core Operations
 41 | 
 42 | ### SET Operations
 43 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
 44 | |---------------|---------|-------|------|-----------|
 45 | | **Kioshun** | 100,000,000 | 75.55 | 41 | 3 |
 46 | | **FreeCache** | 81,768,051 | 74.19 | 24 | 1 |
 47 | | **Ristretto** | 58,714,996 | 90.86 | 154 | 5 |
 48 | | **BigCache** | 37,852,590 | 151.5 | 40 | 2 |
 49 | | **go-cache** | 19,841,619 | 341.0 | 57 | 3 |
 50 | 
 51 | ### GET Operations
 52 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
 53 | |---------------|---------|-------|------|-----------|
 54 | | **Ristretto** | 244,472,186 | 23.09 | 31 | 2 |
 55 | | **Kioshun** | 239,967,180 | 25.87 | 31 | 2 |
 56 | | **FreeCache** | 77,851,767 | 79.62 | 1,039 | 2 |
 57 | | **BigCache** | 76,458,728 | 76.81 | 1,047 | 3 |
 58 | | **go-cache** | 44,541,900 | 136.6 | 15 | 1 |
 59 | 
 60 | ## Workload-Specific
 61 | 
 62 | ### Mixed Operations (70% reads, 30% writes)
 63 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
 64 | |---------------|---------|-------|------|-----------|
 65 | | **Kioshun** | 114,716,242 | 51.47 | 31 | 2 |
 66 | | **Ristretto** | 96,006,397 | 62.33 | 69 | 3 |
 67 | | **FreeCache** | 80,013,957 | 73.54 | 732 | 2 |
 68 | | **BigCache** | 38,290,142 | 150.0 | 742 | 3 |
 69 | | **go-cache** | 30,545,562 | 200.3 | 22 | 2 |
 70 | 
 71 | ### High Contention Scenarios
 72 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
 73 | |---------------|---------|-------|------|-----------|
 74 | | **Kioshun** | 85,443,963 | 77.03 | 34 | 2 |
 75 | | **FreeCache** | 68,861,860 | 87.68 | 554 | 1 |
 76 | | **BigCache** | 36,476,380 | 154.0 | 568 | 2 |
 77 | | **go-cache** | 29,068,076 | 228.2 | 33 | 1 |
 78 | | **Ristretto** | 27,175,748 | 223.5 | 83 | 3 |
 79 | 
 80 | ### Read-Heavy Workloads (90% reads, 10% writes)
 81 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
 82 | |---------------|---------|-------|------|-----------|
 83 | | **Ristretto** | 101,089,580 | 33.34 | 45 | 3 |
 84 | | **Kioshun** | 97,650,378 | 39.92 | 31 | 2 |
 85 | | **FreeCache** | 46,611,218 | 76.60 | 937 | 2 |
 86 | | **BigCache** | 26,093,739 | 132.6 | 946 | 3 |
 87 | | **go-cache** | 19,943,032 | 180.8 | 18 | 2 |
 88 | 
 89 | ### Write-Heavy Workloads (90% writes, 10% reads)
 90 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
 91 | |---------------|---------|-------|------|-----------|
 92 | | **Kioshun** | 96,439,025 | 36.25 | 31 | 2 |
 93 | | **FreeCache** | 52,917,732 | 66.29 | 118 | 2 |
 94 | | **Ristretto** | 22,717,962 | 147.7 | 133 | 5 |
 95 | | **BigCache** | 21,079,129 | 167.2 | 133 | 3 |
 96 | | **go-cache** | 14,755,354 | 231.2 | 37 | 2 |
 97 | 
 98 | ## Simulate 'Real-World' Workflow
 99 | 
100 | ### Real-World Workload Simulation
101 | | Cache Library | Ops/sec | ns/op | B/op | allocs/op |
102 | |---------------|---------|-------|------|-----------|
103 | | **Kioshun** | 53,742,550 | 65.25 | 48 | 3 |
104 | | **FreeCache** | 44,717,696 | 85.09 | 738 | 2 |
105 | | **Ristretto** | 29,713,388 | 112.0 | 96 | 3 |
106 | | **BigCache** | 21,115,576 | 185.9 | 818 | 3 |
107 | | **go-cache** | 16,147,178 | 230.7 | 40 | 2 |
108 | 
109 | ### Memory Efficiency
110 | | Cache Library | Ops/sec | bytes/op |
111 | |---------------|---------|----------|
112 | | **Kioshun** | 45,916,828 | **40.0** |
113 | | Value size sweep (1–64 KB) held steady at ~67–70 ns/op with 40 B/op and 2 allocs/op.
114 | 
115 | ## Performance Characteristics (Kioshun AdmissionLFU)
116 | 
117 | - ~36–77 ns/op on write-heavy or high-contention microbenchmarks, ~26 ns/op on pure GETs
118 | - ~53 M ops/sec in the mixed “real-world” pattern (65 ns/op average)
119 | - Peak GET throughput observed: ~232 M ops/sec (26 ns/op)
120 | 
121 | ## Stress Test Results
122 | 
123 | ### High Load Scenarios
124 | | Load Profile | Ops/sec | ns/op | B/op | allocs/op | Description |
125 | |-------------|---------|-------|------|-----------|-------------|
126 | | **Small + High Concurrency** | 55,777,849 | 61.52 | 27 | 2 | Many goroutines, small cache |
127 | | **Medium + Mixed Load** | 53,624,493 | 66.63 | 31 | 2 | Balanced read/write operations |
128 | | **Large + Read Heavy** | 64,021,102 | 55.19 | 38 | 2 | Large cache, mostly reads |
129 | | **XLarge + Write Heavy** | 40,838,030 | 80.50 | 40 | 3 | Very large cache, mostly writes |
130 | | **Extreme + Balanced** | 42,276,840 | 85.33 | 40 | 3 | Maximum scale, balanced ops |
131 | 
132 | ### Advanced Stress Test Results
133 | 
134 | #### Contention Stress Test
135 | | Test | Ops/sec | ns/op | B/op | allocs/op |
136 | |------|---------|-------|------|-----------|
137 | | **High Contention** | 40,442,905 | 83.94 | 34 | 2 |
138 | 
139 | #### Eviction Policy Performance
140 | | Eviction Policy | Ops/sec | ns/op | B/op | allocs/op |
141 | |-----------------|---------|-------|------|-----------|
142 | | **FIFO** | **42,899,701** | 82.10 | 46 | 3 |
143 | | **AdmissionLFU** | 41,337,319 | 177.0 | 59 | 3 |
144 | | **LRU** | 31,638,396 | 153.1 | 57 | 3 |
145 | | **LFU** | 24,112,208 | 194.8 | 57 | 3 |
146 | 
147 | #### Memory Pressure Tests
148 | | Value Size | Ops/sec | ns/op | B/op | allocs/op |
149 | |------------|---------|-------|------|-----------|
150 | | **1KB** | 45,916,828 | 69.71 | 40 | 2 |
151 | | **4KB** | 58,272,031 | 68.42 | 40 | 2 |
152 | | **16KB** | 55,135,164 | 68.89 | 40 | 2 |
153 | | **64KB** | 57,774,400 | 67.40 | 40 | 2 |
154 | 
155 | #### Sharding Efficiency Analysis
156 | | Shards | Ops/sec | ns/op | B/op | allocs/op |
157 | |--------|---------|-------|------|-----------|
158 | | **1** | 15,451,604 | 341.2 | 45 | 3 |
159 | | **2** | 15,700,284 | 299.6 | 44 | 3 |
160 | | **4** | 20,301,433 | 205.3 | 45 | 3 |
161 | | **8** | 27,256,491 | 145.4 | 45 | 3 |
162 | | **16** | 35,702,301 | 115.4 | 46 | 3 |
163 | | **32** | 41,248,432 | 91.13 | 46 | 3 |
164 | | **64** | 53,728,068 | 76.04 | 47 | 3 |
165 | | **128** | 66,081,164 | **62.31** | 47 | 3 |
166 | 


--------------------------------------------------------------------------------
/fnv_test.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"strconv"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestFnvHash64(t *testing.T) {
  9 | 	tests := []struct {
 10 | 		name  string
 11 | 		input string
 12 | 	}{
 13 | 		{
 14 | 			name:  "empty string",
 15 | 			input: "",
 16 | 		},
 17 | 		{
 18 | 			name:  "single character",
 19 | 			input: "a",
 20 | 		},
 21 | 		{
 22 | 			name:  "short string",
 23 | 			input: "test",
 24 | 		},
 25 | 		{
 26 | 			name:  "longer string",
 27 | 			input: "Hello, World!",
 28 | 		},
 29 | 		{
 30 | 			name:  "numeric string",
 31 | 			input: "12345",
 32 | 		},
 33 | 		{
 34 | 			name:  "special characters",
 35 | 			input: "!@#$%^&*()",
 36 | 		},
 37 | 		{
 38 | 			name:  "unicode characters",
 39 | 			input: "🚀🌟💫",
 40 | 		},
 41 | 	}
 42 | 
 43 | 	for _, tt := range tests {
 44 | 		t.Run(tt.name, func(t *testing.T) {
 45 | 			result := fnvHash64(tt.input)
 46 | 			if result == 0 && tt.input != "" {
 47 | 				t.Errorf("fnvHash64(%q) returned 0 unexpectedly", tt.input)
 48 | 			}
 49 | 		})
 50 | 	}
 51 | }
 52 | 
 53 | func TestFnvHash64_Consistency(t *testing.T) {
 54 | 	// same input always produces the same hash
 55 | 	testStrings := []string{
 56 | 		"",
 57 | 		"a",
 58 | 		"test",
 59 | 		"Hello, World!",
 60 | 		"This is a longer test string with various characters!@#$%^&*()",
 61 | 		"unicode: 🚀🌟💫✨🎯",
 62 | 	}
 63 | 
 64 | 	for _, str := range testStrings {
 65 | 		t.Run("consistency_"+str, func(t *testing.T) {
 66 | 			hash1 := fnvHash64(str)
 67 | 			hash2 := fnvHash64(str)
 68 | 			hash3 := fnvHash64(str)
 69 | 
 70 | 			if hash1 != hash2 || hash2 != hash3 {
 71 | 				t.Errorf("fnvHash64(%q) not consistent: %d, %d, %d", str, hash1, hash2, hash3)
 72 | 			}
 73 | 		})
 74 | 	}
 75 | }
 76 | 
 77 | func TestFnvHash64_Distribution(t *testing.T) {
 78 | 	// similar strings produce different hashes
 79 | 	testPairs := []struct {
 80 | 		str1, str2 string
 81 | 	}{
 82 | 		{"abc", "abd"},
 83 | 		{"test", "Test"},
 84 | 		{"hello", "Hello"},
 85 | 		{"123", "124"},
 86 | 		{"", " "},
 87 | 		{"a", "aa"},
 88 | 		{"hello world", "hello world "},
 89 | 	}
 90 | 
 91 | 	for _, pair := range testPairs {
 92 | 		t.Run("distribution_"+pair.str1+"_vs_"+pair.str2, func(t *testing.T) {
 93 | 			hash1 := fnvHash64(pair.str1)
 94 | 			hash2 := fnvHash64(pair.str2)
 95 | 
 96 | 			if hash1 == hash2 {
 97 | 				t.Errorf("fnvHash64(%q) == fnvHash64(%q) = %d (collision)", pair.str1, pair.str2, hash1)
 98 | 			}
 99 | 		})
100 | 	}
101 | }
102 | 
103 | func TestFnvHash64_CaseSensitive(t *testing.T) {
104 | 	// hash is case-sensitive
105 | 	testCases := []struct {
106 | 		lower, upper string
107 | 	}{
108 | 		{"hello", "HELLO"},
109 | 		{"world", "WORLD"},
110 | 		{"test", "TEST"},
111 | 		{"abc", "ABC"},
112 | 	}
113 | 
114 | 	for _, tc := range testCases {
115 | 		t.Run("case_sensitive_"+tc.lower, func(t *testing.T) {
116 | 			lowerHash := fnvHash64(tc.lower)
117 | 			upperHash := fnvHash64(tc.upper)
118 | 
119 | 			if lowerHash == upperHash {
120 | 				t.Errorf("fnvHash64 is not case-sensitive: %q and %q have same hash %d", tc.lower, tc.upper, lowerHash)
121 | 			}
122 | 		})
123 | 	}
124 | }
125 | 
126 | func TestFnvHash64_LongStrings(t *testing.T) {
127 | 	longStr := ""
128 | 	for i := 0; i < 10000; i++ {
129 | 		longStr += "a"
130 | 	}
131 | 
132 | 	hash := fnvHash64(longStr)
133 | 
134 | 	// Should not panic and should return a valid hash
135 | 	if hash == 0 {
136 | 		t.Error("fnvHash64 returned 0 for long string")
137 | 	}
138 | 
139 | 	// Test consistency with long strings
140 | 	hash2 := fnvHash64(longStr)
141 | 	if hash != hash2 {
142 | 		t.Error("fnvHash64 not consistent with long strings")
143 | 	}
144 | }
145 | 
146 | func TestFnvHash64_BinaryData(t *testing.T) {
147 | 	// Test with binary data (null bytes, control characters)
148 | 	binaryData := []string{
149 | 		"\x00\x01\x02\x03",
150 | 		"\xff\xfe\xfd\xfc",
151 | 		"\x00hello\x00world\x00",
152 | 		string([]byte{0, 255, 128, 64, 32, 16, 8, 4, 2, 1}),
153 | 	}
154 | 
155 | 	for i, data := range binaryData {
156 | 		t.Run("binary_data_"+strconv.Itoa(i), func(t *testing.T) {
157 | 			hash := fnvHash64(data)
158 | 
159 | 			// Should not panic and should return a valid hash
160 | 			if hash == 0 {
161 | 				t.Error("fnvHash64 returned 0 for binary data")
162 | 			}
163 | 
164 | 			// Test consistency
165 | 			hash2 := fnvHash64(data)
166 | 			if hash != hash2 {
167 | 				t.Error("fnvHash64 not consistent with binary data")
168 | 			}
169 | 		})
170 | 	}
171 | }
172 | 
173 | func TestFnvHash64_EdgeCases(t *testing.T) {
174 | 	// Test edge cases
175 | 	edgeCases := []string{
176 | 		"",           // empty string
177 | 		" ",          // single space
178 | 		"\n",         // newline
179 | 		"\t",         // tab
180 | 		"\r\n",       // windows line ending
181 | 		"a",          // single character
182 | 		"aa",         // repeated character
183 | 		"aaa",        // multiple repeated characters
184 | 		"aaaa",       // even more repeated characters
185 | 		"abcd",       // simple sequence
186 | 		"dcba",       // reverse sequence
187 | 		"1234567890", // numbers
188 | 		"!@#$%^&*()", // special characters
189 | 	}
190 | 
191 | 	hashes := make(map[uint64]string)
192 | 
193 | 	for _, str := range edgeCases {
194 | 		t.Run("edge_case_"+str, func(t *testing.T) {
195 | 			hash := fnvHash64(str)
196 | 
197 | 			// Check for collisions among edge cases
198 | 			if existing, exists := hashes[hash]; exists {
199 | 				t.Errorf("Hash collision: %q and %q both hash to %d", str, existing, hash)
200 | 			}
201 | 			hashes[hash] = str
202 | 
203 | 			// Verify consistency
204 | 			hash2 := fnvHash64(str)
205 | 			if hash != hash2 {
206 | 				t.Errorf("fnvHash64(%q) not consistent: %d != %d", str, hash, hash2)
207 | 			}
208 | 		})
209 | 	}
210 | }
211 | 
212 | func TestFnvHash64_Performance(t *testing.T) {
213 | 	// Test that the function doesn't have obvious performance issues
214 | 	testStr := "This is a test string for performance testing"
215 | 
216 | 	// Run multiple iterations to check for performance consistency
217 | 	for i := 0; i < 1000; i++ {
218 | 		hash := fnvHash64(testStr)
219 | 		if hash == 0 {
220 | 			t.Error("Unexpected zero hash")
221 | 		}
222 | 	}
223 | }
224 | 
225 | // Benchmark tests
226 | func BenchmarkFnvHash64_Short(b *testing.B) {
227 | 	s := "test"
228 | 	b.ResetTimer()
229 | 	for i := 0; i < b.N; i++ {
230 | 		fnvHash64(s)
231 | 	}
232 | }
233 | 
234 | func BenchmarkFnvHash64_Medium(b *testing.B) {
235 | 	s := "This is a medium length string for benchmarking"
236 | 	b.ResetTimer()
237 | 	for i := 0; i < b.N; i++ {
238 | 		fnvHash64(s)
239 | 	}
240 | }
241 | 
242 | func BenchmarkFnvHash64_Long(b *testing.B) {
243 | 	s := ""
244 | 	for i := 0; i < 1000; i++ {
245 | 		s += "a"
246 | 	}
247 | 	b.ResetTimer()
248 | 	for i := 0; i < b.N; i++ {
249 | 		fnvHash64(s)
250 | 	}
251 | }
252 | 
253 | func BenchmarkFnvHash64_VeryLong(b *testing.B) {
254 | 	s := ""
255 | 	for i := 0; i < 10000; i++ {
256 | 		s += "test string "
257 | 	}
258 | 	b.ResetTimer()
259 | 	for i := 0; i < b.N; i++ {
260 | 		fnvHash64(s)
261 | 	}
262 | }
263 | 
264 | func BenchmarkFnvHash64_Empty(b *testing.B) {
265 | 	s := ""
266 | 	b.ResetTimer()
267 | 	for i := 0; i < b.N; i++ {
268 | 		fnvHash64(s)
269 | 	}
270 | }
271 | 
272 | func BenchmarkFnvHash64_SingleChar(b *testing.B) {
273 | 	s := "a"
274 | 	b.ResetTimer()
275 | 	for i := 0; i < b.N; i++ {
276 | 		fnvHash64(s)
277 | 	}
278 | }
279 | 
280 | func BenchmarkFnvHash64_URL(b *testing.B) {
281 | 	s := "https://example.com/api/v1/users/123?param=value&another=test"
282 | 	b.ResetTimer()
283 | 	for i := 0; i < b.N; i++ {
284 | 		fnvHash64(s)
285 | 	}
286 | }
287 | 
288 | func BenchmarkFnvHash64_UUID(b *testing.B) {
289 | 	s := "550e8400-e29b-41d4-a716-446655440000"
290 | 	b.ResetTimer()
291 | 	for i := 0; i < b.N; i++ {
292 | 		fnvHash64(s)
293 | 	}
294 | }
295 | 


--------------------------------------------------------------------------------
/internal/httpcache/index_test.go:
--------------------------------------------------------------------------------
  1 | package httpcache
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"sort"
  6 | 	"testing"
  7 | )
  8 | 
  9 | func TestNewPatternIndex(t *testing.T) {
 10 | 	pi := NewPatternIndex()
 11 | 
 12 | 	if pi == nil {
 13 | 		t.Fatal("NewPatternIndex() returned nil")
 14 | 	}
 15 | 
 16 | 	if pi.root == nil {
 17 | 		t.Fatal("root node is nil")
 18 | 	}
 19 | 
 20 | 	if pi.root.children == nil {
 21 | 		t.Fatal("root children map is nil")
 22 | 	}
 23 | 
 24 | 	if pi.root.keys == nil {
 25 | 		t.Fatal("root keys map is nil")
 26 | 	}
 27 | }
 28 | 
 29 | func TestNormalizePath(t *testing.T) {
 30 | 	tests := []struct {
 31 | 		name     string
 32 | 		path     string
 33 | 		expected []string
 34 | 	}{
 35 | 		{name: "empty path", path: "", expected: []string{}},
 36 | 		{name: "root path", path: "/", expected: []string{}},
 37 | 		{name: "single segment", path: "/api", expected: []string{"api"}},
 38 | 		{name: "multiple segments", path: "/api/v1/users", expected: []string{"api", "v1", "users"}},
 39 | 		{name: "trailing slash", path: "/api/v1/users/", expected: []string{"api", "v1", "users"}},
 40 | 		{name: "no leading slash", path: "api/v1/users", expected: []string{"api", "v1", "users"}},
 41 | 		{name: "multiple slashes", path: "//api//v1//users//", expected: []string{"api", "v1", "users"}},
 42 | 	}
 43 | 
 44 | 	for _, tt := range tests {
 45 | 		t.Run(tt.name, func(t *testing.T) {
 46 | 			result := normalizePath(tt.path)
 47 | 			if !reflect.DeepEqual(result, tt.expected) {
 48 | 				t.Errorf("normalizePath(%q) = %v, expected %v", tt.path, result, tt.expected)
 49 | 			}
 50 | 		})
 51 | 	}
 52 | }
 53 | 
 54 | func TestPatternIndex_AddKey(t *testing.T) {
 55 | 	pi := NewPatternIndex()
 56 | 
 57 | 	// Test adding keys to different paths
 58 | 	pi.AddKey("/api/v1", "key1")
 59 | 	pi.AddKey("/api/v1", "key2")
 60 | 	pi.AddKey("/api/v2", "key3")
 61 | 	pi.AddKey("/", "rootkey")
 62 | 
 63 | 	// Verify keys were added correctly
 64 | 	keys := pi.GetMatchingKeys("/api/v1")
 65 | 	sort.Strings(keys)
 66 | 	expected := []string{"key1", "key2"}
 67 | 	sort.Strings(expected)
 68 | 
 69 | 	if !reflect.DeepEqual(keys, expected) {
 70 | 		t.Errorf("Expected keys %v, got %v", expected, keys)
 71 | 	}
 72 | 
 73 | 	// Test root path
 74 | 	rootKeys := pi.GetMatchingKeys("/")
 75 | 	if len(rootKeys) != 1 || rootKeys[0] != "rootkey" {
 76 | 		t.Errorf("Expected root key [rootkey], got %v", rootKeys)
 77 | 	}
 78 | }
 79 | 
 80 | func TestPatternIndex_RemoveKey(t *testing.T) {
 81 | 	pi := NewPatternIndex()
 82 | 
 83 | 	// Add some keys
 84 | 	pi.AddKey("/api/v1", "key1")
 85 | 	pi.AddKey("/api/v1", "key2")
 86 | 	pi.AddKey("/api/v2", "key3")
 87 | 
 88 | 	// Remove one key
 89 | 	pi.RemoveKey("/api/v1", "key1")
 90 | 
 91 | 	// Verify key was removed
 92 | 	keys := pi.GetMatchingKeys("/api/v1")
 93 | 	if len(keys) != 1 || keys[0] != "key2" {
 94 | 		t.Errorf("Expected [key2], got %v", keys)
 95 | 	}
 96 | 
 97 | 	// Remove non-existent key (should not panic)
 98 | 	pi.RemoveKey("/api/v1", "nonexistent")
 99 | 
100 | 	// Remove from non-existent path (should not panic)
101 | 	pi.RemoveKey("/nonexistent", "key1")
102 | }
103 | 
104 | func TestPatternIndex_GetMatchingKeys(t *testing.T) {
105 | 	pi := NewPatternIndex()
106 | 
107 | 	// Add test data
108 | 	pi.AddKey("/api/v1/users", "users-key1")
109 | 	pi.AddKey("/api/v1/users", "users-key2")
110 | 	pi.AddKey("/api/v1/posts", "posts-key1")
111 | 	pi.AddKey("/api/v2/users", "v2-users-key1")
112 | 	pi.AddKey("/static/css", "css-key1")
113 | 	pi.AddKey("/", "root-key")
114 | 
115 | 	tests := []struct {
116 | 		name     string
117 | 		pattern  string
118 | 		expected []string
119 | 	}{
120 | 		{name: "exact match", pattern: "/api/v1/users", expected: []string{"users-key1", "users-key2"}},
121 | 		{name: "wildcard match", pattern: "/api/v1/*", expected: []string{"users-key1", "users-key2", "posts-key1"}},
122 | 		{name: "broader wildcard", pattern: "/api/*", expected: []string{"users-key1", "users-key2", "posts-key1", "v2-users-key1"}},
123 | 		{name: "root wildcard", pattern: "/*", expected: []string{"users-key1", "users-key2", "posts-key1", "v2-users-key1", "css-key1", "root-key"}},
124 | 		{name: "no match", pattern: "/nonexistent", expected: nil},
125 | 		{name: "root exact", pattern: "/", expected: []string{"root-key"}},
126 | 		{name: "empty pattern", pattern: "", expected: []string{"root-key"}},
127 | 	}
128 | 
129 | 	for _, tt := range tests {
130 | 		t.Run(tt.name, func(t *testing.T) {
131 | 			result := pi.GetMatchingKeys(tt.pattern)
132 | 			sort.Strings(result)
133 | 			sort.Strings(tt.expected)
134 | 			if !reflect.DeepEqual(result, tt.expected) {
135 | 				t.Errorf("GetMatchingKeys(%q) = %v, expected %v", tt.pattern, result, tt.expected)
136 | 			}
137 | 		})
138 | 	}
139 | }
140 | 
141 | func TestPatternIndex_Clear(t *testing.T) {
142 | 	pi := NewPatternIndex()
143 | 
144 | 	// Add some data
145 | 	pi.AddKey("/api/v1", "key1")
146 | 	pi.AddKey("/api/v2", "key2")
147 | 
148 | 	// Verify data exists
149 | 	keys := pi.GetMatchingKeys("/*")
150 | 	if len(keys) == 0 {
151 | 		t.Fatal("Expected keys before clear")
152 | 	}
153 | 
154 | 	// Clear the index
155 | 	pi.Clear()
156 | 
157 | 	// Verify everything is cleared
158 | 	keys = pi.GetMatchingKeys("/*")
159 | 	if len(keys) != 0 {
160 | 		t.Errorf("Expected no keys after clear, got %v", keys)
161 | 	}
162 | 
163 | 	// Verify we can still add keys after clear
164 | 	pi.AddKey("/test", "test-key")
165 | 	keys = pi.GetMatchingKeys("/test")
166 | 	if len(keys) != 1 || keys[0] != "test-key" {
167 | 		t.Errorf("Expected [test-key] after clear and add, got %v", keys)
168 | 	}
169 | }
170 | 
171 | func TestPatternIndex_ConcurrentAccess(t *testing.T) {
172 | 	pi := NewPatternIndex()
173 | 
174 | 	// Test concurrent reads and writes
175 | 	done := make(chan bool)
176 | 
177 | 	// Writer goroutine
178 | 	go func() {
179 | 		for i := 0; i < 100; i++ {
180 | 			pi.AddKey("/api/test", "key"+string(rune(i)))
181 | 		}
182 | 		done <- true
183 | 	}()
184 | 
185 | 	// Reader goroutine
186 | 	go func() {
187 | 		for i := 0; i < 100; i++ {
188 | 			pi.GetMatchingKeys("/api/*")
189 | 		}
190 | 		done <- true
191 | 	}()
192 | 
193 | 	<-done
194 | 	<-done
195 | 
196 | 	// Verify final state
197 | 	keys := pi.GetMatchingKeys("/api/test")
198 | 	if len(keys) != 100 {
199 | 		t.Errorf("Expected 100 keys, got %d", len(keys))
200 | 	}
201 | }
202 | 
203 | func TestPatternNode_Creation(t *testing.T) {
204 | 	node := newPatternNode()
205 | 	if node == nil {
206 | 		t.Fatal("newPatternNode() returned nil")
207 | 	}
208 | 	if node.children == nil {
209 | 		t.Fatal("children map is nil")
210 | 	}
211 | 	if node.keys == nil {
212 | 		t.Fatal("keys map is nil")
213 | 	}
214 | 	if len(node.children) != 0 {
215 | 		t.Errorf("Expected empty children map, got %d items", len(node.children))
216 | 	}
217 | 	if len(node.keys) != 0 {
218 | 		t.Errorf("Expected empty keys map, got %d items", len(node.keys))
219 | 	}
220 | }
221 | 
222 | func BenchmarkPatternIndex_AddKey(b *testing.B) {
223 | 	pi := NewPatternIndex()
224 | 	b.ResetTimer()
225 | 	for i := 0; i < b.N; i++ {
226 | 		pi.AddKey("/api/v1/users", "key"+string(rune(i)))
227 | 	}
228 | }
229 | 
230 | func BenchmarkPatternIndex_GetMatchingKeys(b *testing.B) {
231 | 	pi := NewPatternIndex()
232 | 	for i := 0; i < 1000; i++ {
233 | 		pi.AddKey("/api/v1/users", "key"+string(rune(i)))
234 | 	}
235 | 	b.ResetTimer()
236 | 	for i := 0; i < b.N; i++ {
237 | 		pi.GetMatchingKeys("/api/v1/users")
238 | 	}
239 | }
240 | 
241 | func BenchmarkPatternIndex_WildcardMatch(b *testing.B) {
242 | 	pi := NewPatternIndex()
243 | 	for i := 0; i < 100; i++ {
244 | 		pi.AddKey("/api/v1/users", "users-key"+string(rune(i)))
245 | 		pi.AddKey("/api/v1/posts", "posts-key"+string(rune(i)))
246 | 		pi.AddKey("/api/v2/users", "v2-users-key"+string(rune(i)))
247 | 	}
248 | 	b.ResetTimer()
249 | 	for i := 0; i < b.N; i++ {
250 | 		pi.GetMatchingKeys("/api/*")
251 | 	}
252 | }
253 | 


--------------------------------------------------------------------------------
/CLUSTER.md:
--------------------------------------------------------------------------------
  1 | # Kioshun Cluster
  2 | 
  3 | This document describes the distributed cache cluster components and protocols used by `kioshun/cluster`. It focuses on the replication model, failure handling, and the wire protocol.
  4 | 
  5 | ## Usage Model (peer-to-peer)
  6 | 
  7 | - Peer-to-peer mesh: each service instance runs a full cluster peer. Nodes discover each other via `Seeds`, gossip membership/weights, form a weighted rendezvous ring, and replicate directly. There is no coordinator or proxy.
  8 | - Identity: every node has a stable `ID` (type `NodeID`) used in membership and the ring. If not provided, `ID` defaults to a 16‑hex digest derived from `PublicURL` (`Config.EnsureID()`). `PublicURL` is the dialable address.
  9 | - In-process node: embed a `cluster.Node` in your service. Start it with a unique `PublicURL` and `BindAddr`, configure `Seeds` with known peers, then wrap it with `NewDistributedCache` to call `Set/Get`.
 10 | 
 11 | ### Kioshun Mesh-style vs. Redis-style
 12 | 
 13 | - Redis Cluster: clients stay clients; they compute slot→node and connect over a client protocol.
 14 | - Kioshun Cluster: your app becomes a node in the mesh. It gossips, can be chosen as an owner, stores a shard locally, and routes requests to primary owners when needed.
 15 | 
 16 | ### Quickstart
 17 | 
 18 | Run the same service on three servers. Each instance is a peer in the mesh.
 19 | 
 20 | On each server, env:
 21 | 
 22 | ```
 23 | CACHE_BIND=:4443
 24 | CACHE_PUBLIC=srv-a:4443   # use srv-b:4443 and srv-c:4443 on other servers
 25 | CACHE_SEEDS=srv-a:4443,srv-b:4443,srv-c:4443
 26 | CACHE_AUTH=supersecret
 27 | ```
 28 | 
 29 | In code:
 30 | 
 31 | ```
 32 | local := cache.NewWithDefaults[string, []byte]()
 33 | 
 34 | cfg := cluster.Default()
 35 | cfg.BindAddr = os.Getenv("CACHE_BIND")
 36 | cfg.PublicURL = os.Getenv("CACHE_PUBLIC")
 37 | cfg.Seeds = strings.Split(os.Getenv("CACHE_SEEDS"), ",")
 38 | cfg.ReplicationFactor = 3
 39 | cfg.WriteConcern = 2
 40 | cfg.Sec.AuthToken = os.Getenv("CACHE_AUTH")
 41 | 
 42 | node := cluster.NewNode[string, []byte](cfg, cluster.StringKeyCodec[string]{}, local, cluster.BytesCodec{})
 43 | if err := node.Start(); err != nil { panic(err) }
 44 | dc := cluster.NewDistributedCache[string, []byte](node)
 45 | _ = dc.Set("k", []byte("v"), time.Minute)
 46 | v, ok := dc.Get("k")
 47 | _ = v; _ = ok
 48 | ```
 49 | 
 50 | Key points:
 51 | 
 52 | - Mesh like: every instance is a peer - it gossips, can be selected as an owner, and stores a shard locally.
 53 | - Reachability: `CACHE_PUBLIC` must be routable between peers.
 54 | - Durability: tune `ReplicationFactor` and `WriteConcern` (e.g., RF=3, WC=2).
 55 | - Security: set the same `CACHE_AUTH` on all peers. Enable TLS in config if required. When `AuthToken` is empty and `AllowUnauthenticatedClients` is true, public client RPCs are allowed without Hello; peer‑only RPCs still require Hello.
 56 | - Adapter scope: `Clear/Size/Stats` act on the local shard only.
 57 | 
 58 | ## Architecture
 59 | 
 60 | ```
 61 | Clients ──▶ Node (API) ──▶ Owners (RF replicas) over TCP/TLS (CBOR frames)
 62 |                       │                │
 63 |                       ├── Gossip/Weights (peer discovery + load-based ring)
 64 |                       └── Hinted Handoff (per‑peer queues)
 65 | ```
 66 | 
 67 | Core components:
 68 | - Rendezvous ring with dynamic weights chooses RF owners per key.
 69 | - LWW replication with HLC versions guarantees monotonic conflict resolution.
 70 | - Hinted handoff replays missed writes to down/unreachable owners.
 71 | - Digest‑based backfill repairs divergence on join and periodically. It is also kicked shortly after membership epoch changes.
 72 | 
 73 | ## Data Flow
 74 | 
 75 | Write path (Set/Delete):
 76 | 1. Compute owners via rendezvous (RF replicas, ordered).
 77 | 2. If local is primary, apply locally and record HLC version.
 78 | 3. Replicate to remaining owners in parallel and wait for WC acknowledgements.
 79 | 4. On peer error, enqueue a hinted‑handoff record (per‑peer queue).
 80 | 
 81 | Read path (Get):
 82 | 1. If local is primary and key exists, serve locally.
 83 | 2. Otherwise route to ring owners. Healthy (non‑penalized) peers are tried first; additional legs are hedged after a small delay.
 84 | 3. Responses are decompressed/decoded; "notfound" from an owner is treated as a clean miss.
 85 | 
 86 | ## Failure Handling
 87 | 
 88 | Hinted handoff:
 89 | - Per‑peer queues store newest version per key with absolute expiry.
 90 | - Replay loop drains queues with exponential backoff and global RPS limit.
 91 | - Auto‑pause stops enqueues under high backlog; replay continues to drain.
 92 | - Transport timeouts increment a short penalty window per peer. Penalized peers are temporarily deprioritized for reads.
 93 | 
 94 | Backfill/repair:
 95 | - Donor builds bucket digests (count, XOR(hash^version)) for a prefix depth.
 96 | - Joiner compares to its local digests - on mismatch, pages keys by bucket.
 97 | - Keys are imported with versions and absolute expiries; LWW prunes stale. Backfill also runs shortly after membership epoch increases to start repair promptly.
 98 | 
 99 | Ring membership:
100 | - Gossip exchanges peer identity (`ID`), current address, seen timestamps, and load.
101 | - Weight updates rebuild a weighted rendezvous ring for owner selection.
102 | 
103 | ## Consistency Model
104 | 
105 | - Eventual consistency for reads; WC controls write durability/freshness.
106 | - LWW (HLC) ensures monotonic versions across nodes.
107 | - Rebalancer migrates keys to new primaries (preserves TTL; HLC versions).
108 | 
109 | ## Wire Protocol
110 | 
111 | Transport:
112 | - Length‑prefixed frames over TCP (optional TLS); each frame carries a CBOR message.
113 | - Initial Hello authenticates/identifies peers when auth is enabled. `MsgHello{FromID, FromAddr, Token}` → `MsgHelloResp{OK, PeerID, Err}`. When `AllowUnauthenticatedClients` is true and no token is set, public client RPCs may be sent as the first frame without Hello. Peer‑only RPCs still require Hello.
114 | 
115 | Messages:
116 | - Set/Delete (+Bulk) carry keys, values (for Set), compression flag, absolute expiry, and version (HLC).
117 | - Get/GetBulk return found flag(s), value bytes, compression flags, and optional expiry; they do not carry version.
118 | - LeaseLoad supports coordinated loader on primary with single‑flight leases.
119 | - Gossip exchanges peer list (ID + Addr), load metrics, hot key samples, and epoch.
120 | - BackfillDigest/BackfillKeys implement incremental repair.
121 | 
122 | ```
123 | ┌────────┬───────────────┬───────────────────────────┐
124 | │ Frame  │ 4B length N   │ N bytes: CBOR(Message)    │
125 | └────────┴───────────────┴───────────────────────────┘
126 | 
127 | Message Base: { t: MsgType, id: uint64 }
128 | Key/Value: []byte (value may be gzip-compressed; Cp=true)
129 | Set/Delete versions: uint64 (HLC)
130 | ```
131 | 
132 | ## Hinted Handoff
133 | 
134 | ```
135 | enqueue(write→peer) ──▶ per‑peer queue (max items/bytes, TTL, DropPolicy)
136 |                                  │
137 | replay loop (RPS) ───────────────┘─▶ send → ok: drop; fail: backoff + requeue
138 | ```
139 | 
140 | - Coalesces by key: keeps newest version; older hints replaced in place.
141 | - Drops expired values (SET with already expired E) and aged hints (TTL).
142 | - Auto‑resume when backlog drains below hysteresis threshold.
143 | 
144 | ## Backfill
145 | 
146 | ```
147 | Joiner → Donor: BackfillDigest(depth)
148 | Donor  → Joiner: Buckets [{prefix, count, hash}]
149 | Joiner compares; for mismatched buckets:
150 | Joiner → Donor: BackfillKeys(prefix, cursor, limit)
151 | Donor  → Joiner: Items [{K, V, E, Ver, Cp}] + next cursor
152 | ```
153 | 
154 | - Depth controls bucket granularity (default 2 bytes = 65,536 buckets).
155 | - Cursor is last key‑hash in bucket for stable pagination.
156 | - Typical page sizes: initial pass ~1024, periodic passes ~512.
157 | 
158 | ## Rebalance
159 | 
160 | - Periodically samples local keys - if primary changed, pushes key to new primary using current HLC and remaining TTL → absolute expiry; deletes local key on success.
161 | 
162 | ## Tuning
163 | 
164 | - RF ≥ 3, WC ≥ 2 for balanced durability and freshness.
165 | - Handoff: set per‑peer caps and RPS to sustainable values. TTL high enough to cover expected downtimes.
166 | - Backfill: adjust depth for dataset size. Tune page size to donor capacity.
167 | - Timeouts: read/write/idle tuned to network characteristics; inflight caps per peer.
168 | 


--------------------------------------------------------------------------------
/cluster/bf_join.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"sort"
  6 | 	"time"
  7 | 
  8 | 	"github.com/cespare/xxhash/v2"
  9 | 	cbor "github.com/fxamacker/cbor/v2"
 10 | 	cache "github.com/unkn0wn-root/kioshun"
 11 | )
 12 | 
 13 | type bucketSig struct {
 14 | 	count uint32
 15 | 	hash  uint64
 16 | }
 17 | 
 18 | const defaultBackfillDepth = 2 // 65,536 buckets
 19 | 
 20 | // readyPollInterval picks a small poll period relative to configured cadences.
 21 | func readyPollInterval(cfg Config) time.Duration {
 22 | 	p := 150 * time.Millisecond
 23 | 	if cfg.GossipInterval > 0 && cfg.GossipInterval/4 < p {
 24 | 		p = cfg.GossipInterval / 4
 25 | 	}
 26 | 	if cfg.WeightUpdate > 0 && cfg.WeightUpdate/4 < p {
 27 | 		p = cfg.WeightUpdate / 4
 28 | 	}
 29 | 	if p < 100*time.Millisecond {
 30 | 		p = 100 * time.Millisecond
 31 | 	}
 32 | 	if p > 500*time.Millisecond {
 33 | 		p = 500 * time.Millisecond
 34 | 	}
 35 | 	return p
 36 | }
 37 | 
 38 | // backfillLoop waits until the node has a minimally ready view of the
 39 | // cluster (some peers connected or a ring with >1 node), then performs an
 40 | // initial state backfill from peers. After startup it periodically runs a
 41 | // light repair pass to reconcile keys that may have diverged due to
 42 | // membership changes or temporary failures.
 43 | // Wait for initial membership/ring readiness with a bounded timeout
 44 | // to avoid running a no-op backfill before peers and ring are populated.
 45 | // Conditions to proceed: at least one peer connected OR ring has >1 node.
 46 | func (n *Node[K, V]) backfillLoop() {
 47 | 	timeout := 3 * time.Second
 48 | 	if n.cfg.GossipInterval > 0 {
 49 | 		if d := 3 * n.cfg.GossipInterval; d > timeout {
 50 | 			timeout = d
 51 | 		}
 52 | 	}
 53 | 
 54 | 	if n.cfg.WeightUpdate > 0 {
 55 | 		if d := 3 * n.cfg.WeightUpdate; d > timeout {
 56 | 			timeout = d
 57 | 		}
 58 | 	}
 59 | 
 60 | 	deadline := time.Now().Add(timeout)
 61 | 	poll := readyPollInterval(n.cfg) // typically ~150ms
 62 | 	tk := time.NewTicker(poll)
 63 | 	defer tk.Stop()
 64 | 	for {
 65 | 		r := n.ring.Load().(*ring)
 66 | 		if len(n.peerIDs()) > 0 || len(r.nodes) > 1 {
 67 | 			break
 68 | 		}
 69 | 		if time.Now().After(deadline) {
 70 | 			break
 71 | 		}
 72 | 		select {
 73 | 		case <-tk.C:
 74 | 		case <-n.stop:
 75 | 			return
 76 | 		}
 77 | 	}
 78 | 
 79 | 	n.backfillOnce(defaultBackfillDepth, 1024)
 80 | 
 81 | 	iv := n.cfg.BackfillInterval
 82 | 	if iv <= 0 {
 83 | 		iv = n.cfg.RebalanceInterval
 84 | 		if iv <= 0 {
 85 | 			iv = 30 * time.Second
 86 | 		}
 87 | 	}
 88 | 	t := time.NewTicker(iv)
 89 | 	defer t.Stop()
 90 | 	for {
 91 | 		select {
 92 | 		case <-t.C:
 93 | 			n.backfillOnce(defaultBackfillDepth, 512) // light repair
 94 | 		case <-n.backfillKick:
 95 | 			// triggered by membership epoch increase -> run a light repair promptly.
 96 | 			n.backfillOnce(defaultBackfillDepth, 512)
 97 | 		case <-n.stop:
 98 | 			return
 99 | 		}
100 | 	}
101 | }
102 | 
103 | // backfillOnce reconciles this node's owned keyspace with peers by:
104 | //  1. Computing local per-bucket digests for owned keys.
105 | //  2. Asking each donor for its digests targeted at this node.
106 | //  3. For buckets that differ, paging through donor keys in hash order
107 | //     using a cursor, decoding values, and importing successfully decoded
108 | //     items into the local shard (and LWW version table when enabled).
109 | //
110 | // The donor list excludes self and peers we are not connected to.
111 | func (n *Node[K, V]) backfillOnce(depth int, page int) {
112 | 	donors := n.peerIDs()
113 | 	selfID := n.cfg.ID
114 | 	tmp := donors[:0]
115 | 	for _, d := range donors {
116 | 		if d != selfID && n.getPeer(d) != nil {
117 | 			tmp = append(tmp, d)
118 | 		}
119 | 	}
120 | 
121 | 	donors = tmp
122 | 	if len(donors) == 0 {
123 | 		return
124 | 	}
125 | 
126 | 	// Compute a local view of bucket digests we own to detect divergence.
127 | 	local := n.computeLocalDigests(depth)
128 | 	sort.Slice(donors, func(i, j int) bool { return donors[i] < donors[j] })
129 | 
130 | 	for _, d := range donors {
131 | 		pc := n.getPeer(d)
132 | 		if pc == nil {
133 | 			continue
134 | 		}
135 | 
136 | 		req := &MsgBackfillDigestReq{
137 | 			Base:     Base{T: MTBackfillDigestReq, ID: n.nextReqID()},
138 | 			TargetID: string(selfID),
139 | 			Depth:    uint8(depth),
140 | 		}
141 | 
142 | 		raw, err := pc.request(req, req.ID, n.cfg.Sec.ReadTimeout)
143 | 		if err != nil {
144 | 			continue
145 | 		}
146 | 
147 | 		var dr MsgBackfillDigestResp
148 | 		if e := cbor.Unmarshal(raw, &dr); e != nil {
149 | 			continue
150 | 		}
151 | 		if dr.NotInRing {
152 | 			// Donor hasn't integrated us into its ring yet; skip this donor for now
153 | 			continue
154 | 		}
155 | 		if len(dr.Buckets) == 0 {
156 | 			continue
157 | 		}
158 | 
159 | 		for _, b := range dr.Buckets {
160 | 			lp := local[string(b.Prefix)]
161 | 			if lp.count == b.Count && lp.hash == b.Hash64 {
162 | 				continue // already in sync for this bucket
163 | 			}
164 | 
165 | 			// Page through differing buckets using last key-hash cursor to keep
166 | 			// pagination deterministic and avoid duplicates/skips across pages.
167 | 			var cursor []byte
168 | 			for {
169 | 				kReq := &MsgBackfillKeysReq{
170 | 					Base:     Base{T: MTBackfillKeysReq, ID: n.nextReqID()},
171 | 					TargetID: string(selfID),
172 | 					Prefix:   append([]byte(nil), b.Prefix...),
173 | 					Limit:    page,
174 | 					Cursor:   cursor,
175 | 				}
176 | 
177 | 				raw2, err := pc.request(kReq, kReq.ID, n.cfg.Sec.ReadTimeout)
178 | 				if err != nil {
179 | 					break
180 | 				}
181 | 
182 | 				var kr MsgBackfillKeysResp
183 | 				if e := cbor.Unmarshal(raw2, &kr); e != nil {
184 | 					break
185 | 				}
186 | 				if kr.NotInRing {
187 | 					// Donor not ready; stop paging this bucket from this donor for now
188 | 					break
189 | 				}
190 | 				if len(kr.Items) == 0 {
191 | 					break
192 | 				}
193 | 
194 | 				// Decode and import only keys that successfully decode and pass
195 | 				// size/time limits; errors are skipped to keep repair moving.
196 | 				toImport := make([]cache.Item[K, V], 0, len(kr.Items))
197 | 				for _, kv := range kr.Items {
198 | 					k, err := n.kc.DecodeKey(kv.K)
199 | 					if err != nil {
200 | 						continue
201 | 					}
202 | 
203 | 					vb, err := n.maybeDecompress(kv.V, kv.Cp)
204 | 					if err != nil {
205 | 						continue
206 | 					}
207 | 
208 | 					v, err := n.codec.Decode(vb)
209 | 					if err != nil {
210 | 						continue
211 | 					}
212 | 
213 | 					toImport = append(toImport, cache.Item[K, V]{
214 | 						Key:       k,
215 | 						Val:       v,
216 | 						ExpireAbs: kv.E,
217 | 						Version:   kv.Ver,
218 | 					})
219 | 				}
220 | 
221 | 				if len(toImport) > 0 {
222 | 					n.local.Import(toImport)
223 | 					if n.cfg.LWWEnabled {
224 | 						n.verMu.Lock()
225 | 						for _, it := range toImport {
226 | 							n.version[string(n.kc.EncodeKey(it.Key))] = it.Version
227 | 						}
228 | 						n.verMu.Unlock()
229 | 
230 | 						last := toImport[len(toImport)-1].Version
231 | 						if last > 0 {
232 | 							n.clock.Observe(last)
233 | 						}
234 | 					}
235 | 					// Update our running local digest with imported batch to avoid
236 | 					// asking for keys we've already reconciled in this pass.
237 | 					local = n.updateLocalDigestWithBatch(local, depth, toImport)
238 | 				}
239 | 
240 | 				if len(kr.NextCursor) == 8 {
241 | 					cursor = append([]byte(nil), kr.NextCursor...)
242 | 				} else {
243 | 					break
244 | 				}
245 | 			}
246 | 		}
247 | 	}
248 | }
249 | 
250 | // computeLocalDigests returns an orderless digest per key-hash prefix
251 | // bucket for keys this node currently owns. The digest includes count and
252 | // XOR(hash^version) so that donors and joiners can cheaply detect drift
253 | // without moving all keys. Depth is clamped to [1,8] bytes of the 64-bit
254 | // key hash in big-endian order.
255 | // helper (optional)
256 | func (n *Node[K, V]) computeLocalDigests(depth int) map[string]bucketSig {
257 | 	if depth <= 0 || depth > 8 {
258 | 		depth = 2
259 | 	}
260 | 
261 | 	m := make(map[string]bucketSig, 1<<12)
262 | 	keys := n.local.Keys()
263 | 	r := n.ring.Load().(*ring)
264 | 	selfID := n.cfg.ID
265 | 
266 | 	for _, k := range keys {
267 | 		h64 := n.hash64Of(k)
268 | 		if !r.ownsHash(selfID, h64) {
269 | 			continue
270 | 		}
271 | 
272 | 		var hb [8]byte
273 | 		binary.BigEndian.PutUint64(hb[:], h64)
274 | 		prefix := string(hb[:depth])
275 | 
276 | 		ver := uint64(0)
277 | 		if n.cfg.LWWEnabled {
278 | 			kb := n.kc.EncodeKey(k)
279 | 			n.verMu.RLock()
280 | 			ver = n.version[string(kb)]
281 | 			n.verMu.RUnlock()
282 | 		}
283 | 
284 | 		s := m[prefix]
285 | 		s.count++
286 | 		s.hash ^= (h64 ^ ver)
287 | 		m[prefix] = s
288 | 	}
289 | 	return m
290 | }
291 | 
292 | // updateLocalDigestWithBatch updates an existing local digest with a set
293 | // of imported items so subsequent comparisons consider already-synced
294 | // keys and avoid re-requesting them in the same backfill run.
295 | func (n *Node[K, V]) updateLocalDigestWithBatch(m map[string]bucketSig, depth int, batch []cache.Item[K, V]) map[string]bucketSig {
296 | 	for _, it := range batch {
297 | 		h64 := n.hash64Of(it.Key)
298 | 		var hb [8]byte
299 | 		binary.BigEndian.PutUint64(hb[:], h64)
300 | 		prefix := string(hb[:depth])
301 | 
302 | 		ver := uint64(0)
303 | 		if n.cfg.LWWEnabled {
304 | 			ver = it.Version
305 | 		}
306 | 
307 | 		s := m[prefix]
308 | 		s.count++
309 | 		s.hash ^= (h64 ^ ver)
310 | 		m[prefix] = s
311 | 	}
312 | 	return m
313 | }
314 | 
315 | func (n *Node[K, V]) hash64Of(k K) uint64 {
316 | 	if kh, ok := any(n.kc).(KeyHasher[K]); ok {
317 | 		return kh.Hash64(k)
318 | 	}
319 | 	return xxhash.Sum64(n.kc.EncodeKey(k))
320 | }
321 | 


--------------------------------------------------------------------------------
/cluster/transport.go:
--------------------------------------------------------------------------------
  1 | package cluster
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"crypto/tls"
  6 | 	"encoding/binary"
  7 | 	"errors"
  8 | 	"io"
  9 | 	"net"
 10 | 	"sync"
 11 | 	"sync/atomic"
 12 | 	"syscall"
 13 | 	"time"
 14 | 
 15 | 	cbor "github.com/fxamacker/cbor/v2"
 16 | )
 17 | 
 18 | const (
 19 | 	penaltyBase   = 2 * time.Second // first timeout → 2s
 20 | 	penaltyMax    = 8 * time.Second // cap the penalty
 21 | 	backoffWindow = 5 * time.Second // time window to keep growing the streak
 22 | )
 23 | 
 24 | type peerConn struct {
 25 | 	addr         string // current dial address for this peer
 26 | 	selfID       NodeID
 27 | 	selfAddr     string
 28 | 	peerID       NodeID
 29 | 	conn         net.Conn
 30 | 	r            *bufio.Reader
 31 | 	w            *bufio.Writer
 32 | 	mu           sync.Mutex
 33 | 	pend         sync.Map // reqID -> chan []byte
 34 | 	closed       chan struct{}
 35 | 	maxFrame     int
 36 | 	readTO       time.Duration
 37 | 	writeTO      time.Duration
 38 | 	idleTO       time.Duration
 39 | 	inflightCh   chan struct{}
 40 | 	token        string
 41 | 	penaltyUntil int64
 42 | 	lastTimeout  int64
 43 | 	toStreak     uint32
 44 | }
 45 | 
 46 | // dialPeer establishes a connection, performs Hello (auth+identity),
 47 | // and starts the read loop. Returns the learned peerID.
 48 | func dialPeer(selfID NodeID, selfAddr, addr string, tlsConf *tls.Config, maxFrame int,
 49 | 	readTO, writeTO, idleTO time.Duration, inflight int, token string,
 50 | ) (*peerConn, NodeID, error) {
 51 | 	d := &net.Dialer{
 52 | 		Timeout:   readTO,
 53 | 		KeepAlive: 45 * time.Second,
 54 | 		Control: func(network, address string, c syscall.RawConn) error {
 55 | 			var ctrlErr error
 56 | 			_ = c.Control(func(fd uintptr) {
 57 | 				_ = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_TCP, syscall.TCP_NODELAY, 1)
 58 | 			})
 59 | 			return ctrlErr
 60 | 		},
 61 | 	}
 62 | 
 63 | 	var c net.Conn
 64 | 	var err error
 65 | 	if tlsConf != nil {
 66 | 		c, err = tls.DialWithDialer(d, "tcp", addr, tlsConf)
 67 | 	} else {
 68 | 		c, err = d.Dial("tcp", addr)
 69 | 	}
 70 | 	if err != nil {
 71 | 		return nil, "", err
 72 | 	}
 73 | 
 74 | 	if tlsConf == nil {
 75 | 		if tc, ok := c.(*net.TCPConn); ok {
 76 | 			_ = tc.SetNoDelay(true)
 77 | 			_ = tc.SetKeepAlive(true)
 78 | 			_ = tc.SetKeepAlivePeriod(45 * time.Second)
 79 | 		}
 80 | 	}
 81 | 
 82 | 	pc := &peerConn{
 83 | 		addr:       addr,
 84 | 		selfID:     selfID,
 85 | 		selfAddr:   selfAddr,
 86 | 		conn:       c,
 87 | 		r:          bufio.NewReaderSize(c, 64<<10),
 88 | 		w:          bufio.NewWriterSize(c, 64<<10),
 89 | 		closed:     make(chan struct{}),
 90 | 		maxFrame:   maxFrame,
 91 | 		readTO:     readTO,
 92 | 		writeTO:    writeTO,
 93 | 		idleTO:     idleTO,
 94 | 		inflightCh: make(chan struct{}, inflight),
 95 | 		token:      token,
 96 | 	}
 97 | 
 98 | 	if err := pc.hello(); err != nil {
 99 | 		_ = c.Close()
100 | 		return nil, "", err
101 | 	}
102 | 	// start the demultiplexing reader: one goroutine reads frames and routes
103 | 	// them to the waiting requester channel keyed by Base.ID.
104 | 	go pc.readLoop()
105 | 	return pc, pc.peerID, nil
106 | }
107 | 
108 | // hello performs an authentication handshake by sending MsgHello and expecting
109 | // a positive MsgHelloResp.
110 | func (p *peerConn) hello() error {
111 | 	id := uint64(time.Now().UnixNano())
112 | 	msg := &MsgHello{
113 | 		Base:     Base{T: MTHello, ID: id},
114 | 		FromID:   string(p.selfID),
115 | 		FromAddr: p.selfAddr,
116 | 		Token:    p.token,
117 | 	}
118 | 	raw, err := cborEnc.Marshal(msg)
119 | 	if err != nil {
120 | 		return err
121 | 	}
122 | 
123 | 	if err := p.writeFrame(raw); err != nil {
124 | 		return err
125 | 	}
126 | 
127 | 	respRaw, err := p.readFrame()
128 | 	if err != nil {
129 | 		return err
130 | 	}
131 | 
132 | 	var base Base
133 | 	if err := cbor.Unmarshal(respRaw, &base); err != nil {
134 | 		return err
135 | 	}
136 | 
137 | 	if base.T != MTHelloResp {
138 | 		return errors.New("bad hello resp")
139 | 	}
140 | 
141 | 	var hr MsgHelloResp
142 | 	if err := cbor.Unmarshal(respRaw, &hr); err != nil {
143 | 		return err
144 | 	}
145 | 	if !hr.OK {
146 | 		if hr.Err == "" {
147 | 			hr.Err = "unauthorized"
148 | 		}
149 | 		return errors.New(hr.Err)
150 | 	}
151 | 
152 | 	p.peerID = NodeID(hr.PeerID)
153 | 	return nil
154 | }
155 | 
156 | // close closes the underlying connection and marks the peer as closed.
157 | func (p *peerConn) close() {
158 | 	_ = p.conn.Close()
159 | 	select {
160 | 	case <-p.closed:
161 | 	default:
162 | 		close(p.closed)
163 | 	}
164 | }
165 | 
166 | // failAll closes all pending request channels and the connection to unblock
167 | // waiters when the connection is no longer usable.
168 | func (p *peerConn) failAll(err error) {
169 | 	// notify all pending requests that the connection failed.
170 | 	p.pend.Range(func(_, chAny any) bool {
171 | 		if ch, ok := chAny.(chan []byte); ok {
172 | 			// close channel so request() unblocks and returns "peer closed".
173 | 			close(ch)
174 | 		}
175 | 		return true
176 | 	})
177 | 	p.close()
178 | }
179 | 
180 | // readLoop continuously reads frames, demultiplexes them by request ID, and
181 | // delivers payloads to the waiting channels created by request().
182 | func (p *peerConn) readLoop() {
183 | 	for {
184 | 		buf, err := p.readFrame()
185 | 		if err != nil {
186 | 			p.failAll(err)
187 | 			return
188 | 		}
189 | 		var base Base
190 | 		if err := cbor.Unmarshal(buf, &base); err != nil {
191 | 			continue
192 | 		}
193 | 		if chAny, ok := p.pend.Load(base.ID); ok {
194 | 			p.pend.Delete(base.ID)
195 | 			if ch, ok := chAny.(chan []byte); ok {
196 | 				ch <- buf
197 | 				close(ch)
198 | 			}
199 | 		}
200 | 	}
201 | }
202 | 
203 | // readFrame reads one length-prefixed frame with deadlines and size checks.
204 | func (p *peerConn) readFrame() ([]byte, error) {
205 | 	_ = p.conn.SetReadDeadline(time.Now().Add(p.readTO))
206 | 	var hdr [4]byte
207 | 	if _, err := io.ReadFull(p.r, hdr[:]); err != nil {
208 | 		return nil, err
209 | 	}
210 | 
211 | 	n := int(binary.BigEndian.Uint32(hdr[:]))
212 | 	if p.maxFrame > 0 && n > p.maxFrame {
213 | 		return nil, errors.New("frame too large")
214 | 	}
215 | 
216 | 	buf := make([]byte, n)
217 | 	if _, err := io.ReadFull(p.r, buf); err != nil {
218 | 		return nil, err
219 | 	}
220 | 	_ = p.conn.SetReadDeadline(time.Now().Add(p.idleTO))
221 | 	return buf, nil
222 | }
223 | 
224 | // writeFrame writes one length-prefixed frame with a write deadline.
225 | func (p *peerConn) writeFrame(payload []byte) error {
226 | 	p.mu.Lock()
227 | 	defer p.mu.Unlock()
228 | 	_ = p.conn.SetWriteDeadline(time.Now().Add(p.writeTO))
229 | 	return writeFrameBuf(p.w, payload)
230 | }
231 | 
232 | // writeFrameBuf writes a frame and flushes the buffered writer.
233 | func writeFrameBuf(w *bufio.Writer, payload []byte) error {
234 | 	if err := writeFrame(w, payload); err != nil {
235 | 		return err
236 | 	}
237 | 	return w.Flush()
238 | }
239 | 
240 | // writeFrame writes a 4-byte big-endian length header followed by payload.
241 | func writeFrame(w io.Writer, payload []byte) error {
242 | 	var hdr [4]byte
243 | 	binary.BigEndian.PutUint32(hdr[:], uint32(len(payload)))
244 | 	if _, err := w.Write(hdr[:]); err != nil {
245 | 		return err
246 | 	}
247 | 	_, err := w.Write(payload)
248 | 	return err
249 | }
250 | 
251 | // request sends a message and waits for a response with matching ID or until
252 | // timeout. It bounds per-peer concurrency via inflightCh and applies timeout
253 | // penalties on repeated expirations to avoid hot-looping on bad peers.
254 | func (p *peerConn) request(msg any, id uint64, timeout time.Duration) ([]byte, error) {
255 | 	select {
256 | 	case p.inflightCh <- struct{}{}:
257 | 	default:
258 | 		return nil, errors.New("peer inflight limit")
259 | 	}
260 | 	defer func() { <-p.inflightCh }()
261 | 
262 | 	sel, err := cborEnc.Marshal(msg)
263 | 	if err != nil {
264 | 		return nil, err
265 | 	}
266 | 	// each request registers a one-shot channel under its ID; readLoop
267 | 	// delivers the response or request times out and cleans up the slot.
268 | 	ch := make(chan []byte, 1)
269 | 	p.pend.Store(id, ch)
270 | 
271 | 	if err := p.writeFrame(sel); err != nil {
272 | 		p.pend.Delete(id)
273 | 		return nil, err
274 | 	}
275 | 
276 | 	timer := time.NewTimer(timeout)
277 | 	defer timer.Stop()
278 | 	select {
279 | 	case resp, ok := <-ch:
280 | 		if !ok {
281 | 			return nil, ErrPeerClosed
282 | 		}
283 | 		return resp, nil
284 | 	case <-timer.C:
285 | 		p.pend.Delete(id)
286 | 		p.penalizeTimeout() // backoff on repeated timeouts
287 | 		return nil, ErrTimeout
288 | 	}
289 | }
290 | 
291 | // penalizeTimeout bumps a short penalty - repeated timeouts within backoffWindow
292 | // grow the penalty (2s → 4s → 8s), capped by penaltyMax. O(1), timeout-path only.
293 | func (p *peerConn) penalizeTimeout() {
294 | 	now := time.Now()
295 | 	last := time.Unix(0, atomic.LoadInt64(&p.lastTimeout))
296 | 	var streak uint32
297 | 	if now.Sub(last) > backoffWindow {
298 | 		// stale last-timeout: reset streak to 1
299 | 		atomic.StoreUint32(&p.toStreak, 1)
300 | 		streak = 1
301 | 	} else {
302 | 		// same window: increment
303 | 		streak = atomic.AddUint32(&p.toStreak, 1)
304 | 	}
305 | 	atomic.StoreInt64(&p.lastTimeout, now.UnixNano())
306 | 
307 | 	// penalty = base << (streak-1), capped
308 | 	shift := streak - 1
309 | 	if shift > 3 { // 2s<<3 = 16s
310 | 		shift = 3
311 | 	}
312 | 
313 | 	d := penaltyBase << shift
314 | 	if d > penaltyMax {
315 | 		d = penaltyMax
316 | 	}
317 | 	atomic.StoreInt64(&p.penaltyUntil, now.Add(d).UnixNano())
318 | }
319 | 
320 | // penalized reports whether the peer is currently under penalty.
321 | func (p *peerConn) penalized() bool {
322 | 	return time.Now().UnixNano() < atomic.LoadInt64(&p.penaltyUntil)
323 | }
324 | 


--------------------------------------------------------------------------------
/benchmarks/cluster/direct/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"math/rand"
  7 | 	"os"
  8 | 	"os/signal"
  9 | 	"sort"
 10 | 	"strconv"
 11 | 	"sync"
 12 | 	"sync/atomic"
 13 | 	"syscall"
 14 | 	"time"
 15 | 
 16 | 	cache "github.com/unkn0wn-root/kioshun"
 17 | 	"github.com/unkn0wn-root/kioshun/cluster"
 18 | )
 19 | 
 20 | type stats struct {
 21 | 	Total      int64
 22 | 	Gets       int64
 23 | 	Sets       int64
 24 | 	Errs       int64
 25 | 	Hits       int64
 26 | 	Miss       int64
 27 | 	WrongValue int64
 28 | 	GetLatUs   []int64
 29 | 	SetLatUs   []int64
 30 | }
 31 | 
 32 | func getenv(k, d string) string {
 33 | 	if v := os.Getenv(k); v != "" {
 34 | 		return v
 35 | 	}
 36 | 	return d
 37 | }
 38 | 
 39 | func percentile(vals []int64, p float64) float64 {
 40 | 	if len(vals) == 0 {
 41 | 		return 0
 42 | 	}
 43 | 	cp := append([]int64(nil), vals...)
 44 | 
 45 | 	sort.Slice(cp, func(i, j int) bool { return cp[i] < cp[j] })
 46 | 
 47 | 	rank := p * float64(len(cp)-1)
 48 | 	lo := int(rank)
 49 | 	hi := lo + 1
 50 | 	if hi >= len(cp) {
 51 | 		return float64(cp[lo])
 52 | 	}
 53 | 
 54 | 	frac := rank - float64(lo)
 55 | 	return float64(cp[lo])*(1-frac) + float64(cp[hi])*frac
 56 | }
 57 | 
 58 | func applyNodeEnv(cfg *cluster.Config) {
 59 | 	if v := getenv("REPLICATION_FACTOR", ""); v != "" {
 60 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 61 | 			cfg.ReplicationFactor = n
 62 | 		}
 63 | 	}
 64 | 
 65 | 	if v := getenv("WRITE_CONCERN", ""); v != "" {
 66 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 67 | 			cfg.WriteConcern = n
 68 | 		}
 69 | 	}
 70 | 
 71 | 	if v := getenv("READ_MAX_FANOUT", ""); v != "" {
 72 | 		if n, err := strconv.Atoi(v); err == nil && n >= 1 {
 73 | 			cfg.ReadMaxFanout = n
 74 | 		}
 75 | 	}
 76 | 
 77 | 	if v := getenv("READ_PER_TRY_MS", ""); v != "" {
 78 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 79 | 			cfg.ReadPerTryTimeout = time.Duration(n) * time.Millisecond
 80 | 		}
 81 | 	}
 82 | 
 83 | 	if v := getenv("READ_HEDGE_DELAY_MS", ""); v != "" {
 84 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 85 | 			cfg.ReadHedgeDelay = time.Duration(n) * time.Millisecond
 86 | 		}
 87 | 	}
 88 | 
 89 | 	if v := getenv("READ_HEDGE_INTERVAL_MS", ""); v != "" {
 90 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 91 | 			cfg.ReadHedgeInterval = time.Duration(n) * time.Millisecond
 92 | 		}
 93 | 	}
 94 | 
 95 | 	if v := getenv("WRITE_TIMEOUT_MS", ""); v != "" {
 96 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
 97 | 			cfg.Sec.WriteTimeout = time.Duration(n) * time.Millisecond
 98 | 		}
 99 | 	}
100 | 
101 | 	if v := getenv("READ_TIMEOUT_MS", ""); v != "" {
102 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
103 | 			cfg.Sec.ReadTimeout = time.Duration(n) * time.Millisecond
104 | 		}
105 | 	}
106 | 
107 | 	if v := getenv("SUSPICION_AFTER_MS", ""); v != "" {
108 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
109 | 			cfg.SuspicionAfter = time.Duration(n) * time.Millisecond
110 | 		}
111 | 	}
112 | 
113 | 	if v := getenv("WEIGHT_UPDATE_MS", ""); v != "" {
114 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
115 | 			cfg.WeightUpdate = time.Duration(n) * time.Millisecond
116 | 		}
117 | 	}
118 | 
119 | 	if v := getenv("GOSSIP_INTERVAL_MS", ""); v != "" {
120 | 		if n, err := strconv.Atoi(v); err == nil && n > 0 {
121 | 			cfg.GossipInterval = time.Duration(n) * time.Millisecond
122 | 		}
123 | 	}
124 | }
125 | 
126 | func main() {
127 | 	rand.Seed(time.Now().UnixNano())
128 | 	d, _ := time.ParseDuration(getenv("DURATION", "60s"))
129 | 	conc, _ := strconv.Atoi(getenv("CONCURRENCY", "512"))
130 | 	keys, _ := strconv.Atoi(getenv("KEYS", "50000"))
131 | 	setRatio, _ := strconv.Atoi(getenv("SET_RATIO", "10"))
132 | 	setTTLms, _ := strconv.ParseInt(getenv("SET_TTL_MS", "-1"), 10, 64)
133 | 	auth := getenv("CACHE_AUTH", "")
134 | 
135 | 	// ports and addresses for the three nodes (within this process)
136 | 	bind1 := getenv("BIND1", ":7011")
137 | 	pub1 := getenv("PUB1", "127.0.0.1:7011")
138 | 	bind2 := getenv("BIND2", ":7012")
139 | 	pub2 := getenv("PUB2", "127.0.0.1:7012")
140 | 	bind3 := getenv("BIND3", ":7013")
141 | 	pub3 := getenv("PUB3", "127.0.0.1:7013")
142 | 	seeds := []string{pub1, pub2, pub3}
143 | 
144 | 	// create three nodes
145 | 	mk := func(bind, pub string) *cluster.Node[string, []byte] {
146 | 		local := cache.NewWithDefaults[string, []byte]()
147 | 		cfg := cluster.Default()
148 | 		cfg.BindAddr = bind
149 | 		cfg.PublicURL = pub
150 | 		cfg.Seeds = seeds
151 | 		cfg.Sec.AuthToken = auth
152 | 		cfg.ID = cluster.NodeID(cfg.PublicURL)
153 | 		cfg.ReplicationFactor = 3
154 | 		cfg.WriteConcern = 2
155 | 		cfg.PerConnWorkers = 128
156 | 		cfg.PerConnQueue = 256
157 | 		cfg.Sec.MaxInflightPerPeer = 512
158 | 		applyNodeEnv(&cfg)
159 | 
160 | 		n := cluster.NewNode[string, []byte](cfg, cluster.StringKeyCodec[string]{}, local, cluster.BytesCodec{})
161 | 		if err := n.Start(); err != nil {
162 | 			panic(err)
163 | 		}
164 | 		return n
165 | 	}
166 | 
167 | 	n1 := mk(bind1, pub1)
168 | 	n2 := mk(bind2, pub2)
169 | 	n3 := mk(bind3, pub3)
170 | 	defer n1.Stop()
171 | 	defer n2.Stop()
172 | 	defer n3.Stop()
173 | 
174 | 	// wait until ring is usable: best-effort small Set retry
175 | 	readyCtx, cancelReady := context.WithTimeout(context.Background(), 5*time.Second)
176 | 	defer cancelReady()
177 | 
178 | 	for {
179 | 		if err := n1.Set(readyCtx, "__warmup__", []byte("ok"), time.Second); err == nil {
180 | 			break
181 | 		}
182 | 
183 | 		select {
184 | 		case <-time.After(50 * time.Millisecond):
185 | 		case <-readyCtx.Done():
186 | 		}
187 | 		if readyCtx.Err() != nil {
188 | 			break
189 | 		}
190 | 	}
191 | 
192 | 	// drive load via node1 client API
193 | 	driver := n1
194 | 
195 | 	deadline := time.Now().Add(d)
196 | 	ctx, cancel := context.WithCancel(context.Background())
197 | 	defer cancel()
198 | 
199 | 	sigCh := make(chan os.Signal, 2)
200 | 	signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
201 | 	go func() {
202 | 		<-sigCh
203 | 		fmt.Println("[RUNNER] signal received, stopping...")
204 | 		cancel()
205 | 	}()
206 | 
207 | 	// optional failure injection: stop node3
208 | 	if ka := getenv("KILL_AFTER", ""); ka != "" {
209 | 		if after, err := time.ParseDuration(ka); err == nil && after > 0 {
210 | 			go func() {
211 | 				t := time.NewTimer(after)
212 | 				defer t.Stop()
213 | 
214 | 				select {
215 | 				case <-t.C:
216 | 				case <-ctx.Done():
217 | 					return
218 | 				}
219 | 
220 | 				fmt.Println("[KILL] stopping node3")
221 | 				n3.Stop()
222 | 			}()
223 | 		}
224 | 	}
225 | 
226 | 	var st stats
227 | 	st.GetLatUs = make([]int64, 0, 2_000_000)
228 | 	st.SetLatUs = make([]int64, 0, 500_000)
229 | 	latMu := sync.Mutex{}
230 | 
231 | 	// Progress
232 | 	go func() {
233 | 		ticker := time.NewTicker(5 * time.Second)
234 | 		defer ticker.Stop()
235 | 
236 | 		lastTotal := int64(0)
237 | 		lastTime := time.Now()
238 | 		for {
239 | 			select {
240 | 			case now := <-ticker.C:
241 | 				t := atomic.LoadInt64(&st.Total)
242 | 				dt := now.Sub(lastTime).Seconds()
243 | 				if dt <= 0 {
244 | 					dt = 1
245 | 				}
246 | 
247 | 				qps := float64(t-lastTotal) / dt
248 | 
249 | 				fmt.Printf("[PROGRESS] total=%d qps=%.0f hits=%d miss=%d\n", t, qps, atomic.LoadInt64(&st.Hits), atomic.LoadInt64(&st.Miss))
250 | 
251 | 				lastTotal = t
252 | 				lastTime = now
253 | 				if time.Now().After(deadline) {
254 | 					return
255 | 				}
256 | 			case <-ctx.Done():
257 | 				return
258 | 			}
259 | 		}
260 | 	}()
261 | 
262 | 	// Workload
263 | 	seqs := make([]uint64, keys)
264 | 	wg := sync.WaitGroup{}
265 | 	for i := 0; i < conc; i++ {
266 | 		wg.Add(1)
267 | 		go func(worker int) {
268 | 			defer wg.Done()
269 | 			for {
270 | 				if time.Now().After(deadline) {
271 | 					return
272 | 				}
273 | 
274 | 				select {
275 | 				case <-ctx.Done():
276 | 					return
277 | 				default:
278 | 				}
279 | 
280 | 				isSet := rand.Intn(100) < setRatio
281 | 				kidx := rand.Intn(keys)
282 | 				key := fmt.Sprintf("k%08d", kidx)
283 | 				if isSet {
284 | 					seq := atomic.AddUint64(&seqs[kidx], 1)
285 | 					val := []byte(fmt.Sprintf("v:%s:%d", key, seq))
286 | 					ttl := time.Duration(0)
287 | 					if setTTLms < 0 {
288 | 						ttl = 0
289 | 					} else if setTTLms > 0 {
290 | 						ttl = time.Duration(setTTLms) * time.Millisecond
291 | 					}
292 | 
293 | 					begin := time.Now()
294 | 					if err := driver.Set(ctx, key, val, ttl); err != nil {
295 | 						atomic.AddInt64(&st.Errs, 1)
296 | 					} else {
297 | 						latMu.Lock()
298 | 						st.SetLatUs = append(st.SetLatUs, time.Since(begin).Microseconds())
299 | 						latMu.Unlock()
300 | 						atomic.AddInt64(&st.Sets, 1)
301 | 						atomic.AddInt64(&st.Total, 1)
302 | 					}
303 | 				} else {
304 | 					begin := time.Now()
305 | 					_, ok, err := driver.Get(ctx, key)
306 | 					lat := time.Since(begin)
307 | 
308 | 					atomic.AddInt64(&st.Total, 1)
309 | 					atomic.AddInt64(&st.Gets, 1)
310 | 
311 | 					latMu.Lock()
312 | 					st.GetLatUs = append(st.GetLatUs, lat.Microseconds())
313 | 					latMu.Unlock()
314 | 					if err != nil {
315 | 						atomic.AddInt64(&st.Errs, 1)
316 | 						continue
317 | 					}
318 | 
319 | 					if ok {
320 | 						atomic.AddInt64(&st.Hits, 1)
321 | 					} else {
322 | 						atomic.AddInt64(&st.Miss, 1)
323 | 					}
324 | 				}
325 | 			}
326 | 		}(i)
327 | 	}
328 | 	wg.Wait()
329 | 
330 | 	// Summarize
331 | 	latMu.Lock()
332 | 	getP50 := percentile(st.GetLatUs, 0.50) / 1000
333 | 	getP95 := percentile(st.GetLatUs, 0.95) / 1000
334 | 	getP99 := percentile(st.GetLatUs, 0.99) / 1000
335 | 	getP999 := percentile(st.GetLatUs, 0.999) / 1000
336 | 	setP50 := percentile(st.SetLatUs, 0.50) / 1000
337 | 	setP95 := percentile(st.SetLatUs, 0.95) / 1000
338 | 	setP99 := percentile(st.SetLatUs, 0.99) / 1000
339 | 	setP999 := percentile(st.SetLatUs, 0.999) / 1000
340 | 	latMu.Unlock()
341 | 
342 | 	hitRatio := 0.0
343 | 	if st.Gets > 0 {
344 | 		hitRatio = float64(st.Hits) / float64(st.Gets) * 100
345 | 	}
346 | 	fmt.Println("=== Kioshun Direct Bench Summary ===")
347 | 	fmt.Printf("Nodes: %s,%s,%s\n", pub1, pub2, pub3)
348 | 	fmt.Printf("Total: %d | GETs: %d | SETs: %d | Errors: %d\n", st.Total, st.Gets, st.Sets, st.Errs)
349 | 	fmt.Printf("Hits: %d | Miss: %d | HitRatio=%.2f%%\n", st.Hits, st.Miss, hitRatio)
350 | 	fmt.Printf("GET p50=%.2fms p95=%.2fms p99=%.2fms p99.9=%.2fms | SET p50=%.2fms p95=%.2fms p99=%.2fms p99.9=%.2fms\n", getP50, getP95, getP99, getP999, setP50, setP95, setP99, setP999)
351 | }
352 | 


--------------------------------------------------------------------------------