├── byteview.go ├── cache.go ├── consistenthash └── consistenthash.go ├── go.mod ├── group.go ├── http.go ├── lru └── lru.go ├── peers.go ├── readme.md └── singleflight └── singleflight.go /byteview.go: -------------------------------------------------------------------------------- 1 | package distributecache 2 | 3 | // value接口 4 | type ByteView struct { 5 | b []byte 6 | } 7 | 8 | func (v ByteView) Len() int { 9 | return len(v.b) 10 | } 11 | 12 | func (v ByteView) ByteSlice() []byte { 13 | return cloneBytes(v.b) 14 | } 15 | 16 | func (v ByteView) String() string { 17 | return string(v.b) 18 | } 19 | 20 | func cloneBytes(b []byte) []byte { 21 | c := make([]byte, len(b)) 22 | copy(c, b) 23 | return c 24 | } 25 | -------------------------------------------------------------------------------- /cache.go: -------------------------------------------------------------------------------- 1 | package distributecache 2 | 3 | // 保证并发安全 4 | import ( 5 | "sync" 6 | 7 | "github.com/distributeCache/lru" 8 | ) 9 | 10 | type cache struct { 11 | mu sync.Mutex 12 | lru *lru.Cache 13 | cacheBytes int64 14 | } 15 | 16 | func (c *cache) add(key string, value ByteView) { 17 | c.mu.Lock() 18 | defer c.mu.Unlock() 19 | if c.lru == nil { 20 | c.lru = lru.NewCache(c.cacheBytes, nil) 21 | } 22 | c.lru.Add(key, value) 23 | } 24 | 25 | func (c *cache) get(key string) (value ByteView, ok bool) { 26 | c.mu.Lock() 27 | defer c.mu.Unlock() 28 | if c.lru == nil { 29 | return 30 | } 31 | if v, ok := c.lru.Get(key); ok { 32 | return v.(ByteView), ok 33 | } 34 | return 35 | } 36 | -------------------------------------------------------------------------------- /consistenthash/consistenthash.go: -------------------------------------------------------------------------------- 1 | package consistenthash 2 | 3 | import ( 4 | "hash/crc32" 5 | "sort" 6 | "strconv" 7 | ) 8 | 9 | // Hash 类型,用于进行数据的哈希值计算 10 | type Hash func(data []byte) uint32 11 | 12 | // Map 为一致性哈希的实现 13 | // 包含了运行哈希算法的函数,并指定的转换节点个数和哈希结果序列 14 | // hash: 用来计算哈希值的函数 15 | // replicas: 虚拟节点的倍数 16 | // keys: 哈希环(虚拟节点的哈希值) 17 | // hashMap: 虚拟节点和实际节点的对应关系 18 | type Map struct { 19 | hash Hash 20 | replicas int // 虚拟节点倍数 21 | keys []int // 哈希环 22 | hashMap map[int]string // 虚拟节点与实际节点的映射 23 | } 24 | 25 | // NewHash 函数用于创建一个新的哈希场景 26 | // replicas: 虚拟节点的倍数 27 | // fn: 哈希函数,用来计算哈希值 28 | func NewHash(replicas int, fn Hash) *Map { 29 | m := &Map{ 30 | hash: fn, 31 | replicas: replicas, 32 | hashMap: make(map[int]string), 33 | } 34 | if m.hash == nil { 35 | m.hash = crc32.ChecksumIEEE // 如果未指定哈希函数,则使用默认的循环冷余校验算法 36 | } 37 | return m 38 | } 39 | 40 | // Add 方法用于添加实际节点,并为每个节点生成指定倍数的虚拟节点 41 | // keys: 可变的实际节点 42 | func (m *Map) Add(keys ...string) { 43 | for _, key := range keys { 44 | for i := 0; i < m.replicas; i++ { 45 | hash := int(m.hash([]byte(strconv.Itoa(i) + key))) // 生成虚拟节点的哈希值 46 | m.keys = append(m.keys, hash) // 将哈希值添加到哈希环中 47 | m.hashMap[hash] = key // 将哈希值与实际节点应用 48 | } 49 | } 50 | sort.Ints(m.keys) // 按哈希值序列排序 51 | } 52 | 53 | // Get 方法根据指定的 key 找到最近的节点 54 | // key: 需要查询的键 55 | // 返回实际节点的名称 56 | func (m *Map) Get(key string) string { 57 | if len(m.keys) == 0 { 58 | return "" // 如果哈希环为空,返回空字符串 59 | } 60 | hash := int(m.hash([]byte(key))) // 计算输入 key 的哈希值 61 | idx := sort.Search(len(m.keys), func(i int) bool { 62 | return m.keys[i] >= hash 63 | }) // 查询哈希环中最接近的节点 64 | 65 | return m.hashMap[m.keys[idx%len(m.keys)]] // 返回最近节点的实际节点 66 | } 67 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/distributeCache 2 | 3 | go 1.21.1 4 | -------------------------------------------------------------------------------- /group.go: -------------------------------------------------------------------------------- 1 | package distributecache 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "sync" 7 | 8 | "github.com/distributeCache/singleflight" 9 | ) 10 | 11 | // Getter 接口用于获取指定键的数据 12 | type Getter interface { 13 | Get(key string) ([]byte, error) 14 | } 15 | 16 | // GetterFunc 函数类,实现 Getter 接口 17 | // 便于通过函数进行数据的获取 18 | type GetterFunc func(key string) ([]byte, error) 19 | 20 | func (f GetterFunc) Get(key string) ([]byte, error) { 21 | return f(key) 22 | } 23 | 24 | // Group 属于缓存名称空间,用于缓存分类和数据加载的分发 25 | // name: 缓存空间名称 26 | // getter: 获取数据的 Getter 27 | // mainCache: 主缓存中的数据 28 | // peers: PeerPicker 用于选择同伴 29 | // loader: singlefight 30 | type Group struct { 31 | name string 32 | getter Getter 33 | mainCache cache 34 | peers PeerPicker 35 | loader *singleflight.Group 36 | } 37 | 38 | var ( 39 | mu sync.RWMutex 40 | groups = make(map[string]*Group) 41 | ) 42 | 43 | func NewGroup(name string, cacheBytes int64, getter Getter) *Group { 44 | if getter == nil { 45 | panic("nil Getter") 46 | } 47 | mu.Lock() 48 | defer mu.Unlock() 49 | g := &Group{ 50 | name: name, 51 | getter: getter, 52 | mainCache: cache{cacheBytes: cacheBytes}, 53 | loader: &singleflight.Group{}, 54 | } 55 | groups[name] = g 56 | return g 57 | } 58 | 59 | func GetGroup(key string) *Group { 60 | mu.RLock() 61 | g := groups[key] 62 | mu.RUnlock() 63 | return g 64 | } 65 | 66 | // RegisterPeers 方法将实现了 PeerPicker 接口的 HTTPPool 注入到 Group 中 67 | func (g *Group) RegisterPeers(peers PeerPicker) { 68 | if g.peers != nil { 69 | panic("RegisterPeerPick called more than once") // 禁止重复注册同伴 70 | } 71 | g.peers = peers 72 | } 73 | 74 | // Get 方法根据键获取缓存中的数据;如果缓存中已经存在该值,将返回,否则将加载这个数据 75 | func (g *Group) Get(key string) (ByteView, error) { 76 | if key == "" { 77 | return ByteView{}, fmt.Errorf("key isn't existed") 78 | } 79 | if v, ok := g.mainCache.get(key); ok { 80 | log.Println("[Cache hit]") 81 | return v, nil 82 | } 83 | return g.load(key) // 如果缓存中不存在,通过回调函数进行加载 84 | } 85 | 86 | // load 方法用于引入该键的值 87 | // 该值会从临远程同伴加载,不能从同伴中加载,则调用当前空间进行加载 88 | func (g *Group) load(key string) (value ByteView, err error) { 89 | viewi, err := g.loader.Do(key, func() (any, error) { 90 | if g.peers != nil { 91 | if peer, ok := g.peers.PickPeer(key); ok { 92 | if value, err := g.getFromPeer(peer, key); err == nil { 93 | return value, nil 94 | } 95 | log.Println("[Cache] failed to get from peer") // 无法从同伴获取 96 | } 97 | } 98 | return g.getLocally(key) // 此地进行数据加载 99 | }) 100 | if err == nil { 101 | return viewi.(ByteView), nil 102 | } 103 | return 104 | } 105 | 106 | // getFromPeer 方法用于从同伴中获取指定键的数据 107 | func (g *Group) getFromPeer(peer PeerGetter, key string) (ByteView, error) { 108 | bytes, err := peer.Get(g.name, key) // 远程同伴的 HTTPGetter 109 | if err != nil { 110 | return ByteView{}, err 111 | } 112 | return ByteView{b: bytes}, nil 113 | } 114 | 115 | // getLocally 方法用于此地加载数据 116 | func (g *Group) getLocally(key string) (ByteView, error) { 117 | bytes, err := g.getter.Get(key) 118 | if err != nil { 119 | return ByteView{}, nil 120 | } 121 | value := ByteView{b: cloneBytes(bytes)} 122 | g.populateCache(key, value) 123 | return value, nil 124 | } 125 | 126 | func (g *Group) populateCache(key string, value ByteView) { 127 | g.mainCache.add(key, value) 128 | } 129 | -------------------------------------------------------------------------------- /http.go: -------------------------------------------------------------------------------- 1 | package distributecache 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/http" 8 | "net/url" 9 | "strings" 10 | "sync" 11 | 12 | "github.com/distributeCache/consistenthash" 13 | ) 14 | 15 | const defaultBasePath = "/Distribute_cache" 16 | const defaultReplicas = 50 17 | 18 | // HttpPool 代表一个分布式缓存节点的 HTTP 池 19 | // 它包含当前服务器的地址 (self) 和所有 HTTP 请求的基础 URL 路径 20 | type HttpPool struct { 21 | self string 22 | basePath string 23 | mu sync.Mutex 24 | peers *consistenthash.Map 25 | httpGetters map[string]*httpGetter 26 | } 27 | 28 | func NewHttpPool(self string) *HttpPool { 29 | return &HttpPool{ 30 | self: self, 31 | basePath: defaultBasePath, 32 | } 33 | } 34 | 35 | // Log 函数用于记录服务器的日志信息 36 | func (p *HttpPool) Log(format string, v ...any) { 37 | log.Printf("[Server %s] %s", p.self, fmt.Sprintf(format, v...)) 38 | } 39 | 40 | // ServeHTTP 函数处理所有匹配 HttpPool 的 basePath 的 HTTP 请求 41 | // 这个函数运行请求,并从指定的缓存组和键中查询数据 42 | func (p *HttpPool) ServeHTTP(w http.ResponseWriter, r *http.Request) { 43 | if !strings.HasPrefix(r.URL.Path, p.basePath) { 44 | panic("HTTPPool serving unexpected path: " + r.URL.Path) 45 | } 46 | 47 | p.Log("%s %s", r.Method, r.URL.Path) 48 | 49 | // 分割 URL 路径,将 basePath 之后的部分分割为 groupName 和 key 50 | parts := strings.SplitN(r.URL.Path[len(p.basePath):], "/", 2) 51 | if len(parts) != 2 { 52 | http.Error(w, "bad request", http.StatusBadRequest) 53 | return 54 | } 55 | 56 | groupName := parts[0] 57 | key := parts[1] 58 | 59 | // 根据名称获取缓存组 60 | group := GetGroup(groupName) 61 | if group == nil { 62 | http.Error(w, "no such group: "+groupName, http.StatusNotFound) 63 | return 64 | } 65 | 66 | // 从组中查询指定键的缓存值 67 | view, err := group.Get(key) 68 | if err != nil { 69 | http.Error(w, err.Error(), http.StatusInternalServerError) 70 | return 71 | } 72 | 73 | // 设置返回的内容类型为 "application/octet-stream",并将查询到的缓存值写入响应中 74 | w.Header().Set("Content-Type", "application/octet-stream") 75 | w.Write(view.ByteSlice()) 76 | } 77 | 78 | // httpGetter 代表 HTTP 的客户端 79 | type httpGetter struct { 80 | baseURL string 81 | } 82 | 83 | // 客户端功能,通过远程请求获取指定缓存组和键对应的值 84 | func (h *httpGetter) Get(group string, key string) ([]byte, error) { 85 | u := h.baseURL + url.QueryEscape(group) + url.QueryEscape(key) 86 | res, err := http.Get(u) 87 | if err != nil { 88 | return nil, err 89 | } 90 | defer res.Body.Close() 91 | 92 | if res.StatusCode != http.StatusOK { 93 | return nil, fmt.Errorf("server returned : %v", res.Status) 94 | } 95 | bytes, err := io.ReadAll(res.Body) 96 | if err != nil { 97 | return nil, fmt.Errorf("reading response body : %v", err) 98 | } 99 | return bytes, nil 100 | } 101 | 102 | var _ PeerGetter = (*httpGetter)(nil) 103 | 104 | // 实例化一致性哈希算法 105 | func (p *HttpPool) Set(peers ...string) { 106 | p.mu.Lock() 107 | defer p.mu.Unlock() 108 | p.peers = consistenthash.NewHash(defaultReplicas, nil) 109 | p.peers.Add(peers...) 110 | p.httpGetters = make(map[string]*httpGetter) 111 | } 112 | 113 | // PickPeer 方法根据键选择合适的 PeerGetter,返回相应的 PeerGetter 并表示是否已找到合适的节点 114 | func (p *HttpPool) PickPeer(key string) (PeerGetter, bool) { 115 | p.mu.Lock() 116 | defer p.mu.Unlock() 117 | if peer := p.peers.Get(key); peer != "" && peer != p.self { 118 | p.Log("Pick Peer %s", peer) 119 | return p.httpGetters[peer], true 120 | } 121 | return nil, false 122 | } 123 | 124 | // 类型检查 125 | var _ PeerPicker = (*HttpPool)(nil) 126 | -------------------------------------------------------------------------------- /lru/lru.go: -------------------------------------------------------------------------------- 1 | package lru 2 | 3 | import "container/list" 4 | 5 | // OnEvicted: 退出时调用的可选功能 6 | type Cache struct { 7 | maxBytes int64 8 | usedBytes int64 9 | ll *list.List 10 | cache map[string]*list.Element 11 | // optional and executed when an entry is purged 12 | OnEvicted func(key string, value Value) 13 | } 14 | 15 | type entry struct { 16 | key string 17 | value Value 18 | } 19 | 20 | type Value interface { 21 | Len() int 22 | } 23 | 24 | func NewCache(maxBytes int64, oe func(string, Value)) *Cache { 25 | return &Cache{ 26 | maxBytes: maxBytes, 27 | ll: list.New(), 28 | cache: make(map[string]*list.Element), 29 | OnEvicted: oe, 30 | } 31 | } 32 | 33 | func (c *Cache) Get(key string) (value Value, ok bool) { 34 | if ele, ok := c.cache[key]; ok { 35 | c.ll.MoveToFront(ele) 36 | kv := ele.Value.(*entry) 37 | return kv.value, true 38 | } 39 | return 40 | } 41 | 42 | func (c *Cache) Remove() { 43 | ele := c.ll.Back() 44 | if ele != nil { 45 | c.ll.Remove(ele) 46 | kv := ele.Value.(*entry) 47 | delete(c.cache, kv.key) 48 | c.usedBytes -= int64(len(kv.key)) + int64(kv.value.Len()) 49 | if c.OnEvicted != nil { 50 | c.OnEvicted(kv.key, kv.value) 51 | } 52 | } 53 | } 54 | 55 | func (c *Cache) Add(key string, value Value) { 56 | if ele, ok := c.cache[key]; ok { 57 | c.ll.MoveToFront(ele) 58 | kv := ele.Value.(*entry) 59 | c.usedBytes += int64(value.Len()) - int64(kv.value.Len()) 60 | kv.value = value 61 | } else { 62 | ele := c.ll.PushFront(&entry{key, value}) 63 | c.cache[key] = ele 64 | c.usedBytes += int64(len(key)) + int64(value.Len()) 65 | } 66 | for c.maxBytes > 0 && c.usedBytes >= c.maxBytes { 67 | c.Remove() 68 | } 69 | } 70 | 71 | func (c *Cache) Len() int { 72 | return c.ll.Len() 73 | } 74 | -------------------------------------------------------------------------------- /peers.go: -------------------------------------------------------------------------------- 1 | package distributecache 2 | 3 | type PeerPicker interface { 4 | PickPeer(key string) (peer PeerGetter, ok bool) 5 | } 6 | 7 | type PeerGetter interface { 8 | Get(group string, key string) ([]byte, error) 9 | } 10 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Distributed Cache System 2 | 3 | This project implements a distributed caching solution inspired by common practices in distributed systems, such as consistent hashing, LRU caching, and single-flight request deduplication. The primary goal is to provide a scalable and efficient caching mechanism that can handle high request rates, prevent cache breakdowns, and distribute data evenly across nodes. 4 | 5 | ## Features 6 | 7 | - **LRU (Least Recently Used) Cache**: Each node in the system maintains an LRU cache to manage cached data, ensuring that the most frequently accessed data is readily available and less-used data is evicted as needed. 8 | 9 | - **Consistent Hashing**: The system employs consistent hashing to distribute cached data across multiple nodes. This helps minimize data movement when nodes are added or removed, ensuring balanced load distribution. 10 | 11 | - **SingleFlight for Request Deduplication**: To prevent cache breakdowns, the system leverages a "single-flight" mechanism, where duplicate requests for the same key are merged into a single request. This prevents redundant calls to the backend and reduces the load on data sources. 12 | 13 | - **HTTP Communication Between Nodes**: Nodes communicate with each other via HTTP. Each node can query others for data, ensuring that data can be accessed seamlessly even if it resides on a different server. 14 | 15 | ## Project Structure 16 | 17 | - `lru/`: Implements an LRU cache to manage data locally within each cache node. 18 | - `consistenthash/`: Contains the consistent hashing implementation, responsible for evenly distributing keys among nodes. 19 | - `singleflight/`: Provides request deduplication to prevent multiple redundant loads for the same key. 20 | - `distributecache/`: Manages core caching functionality, including peer-to-peer communication and handling HTTP requests. 21 | 22 | ### Data Request Flow 23 | 24 | 1. A client requests data for a given key. 25 | 2. The Group checks if the key is available in the local LRU cache. 26 | 3. If the data is not found locally, the `HttpPool` (implementing `PeerPicker`) is used to select a remote peer via consistent hashing. 27 | 4. If a suitable peer is found, an HTTP request is made to fetch the data from that peer. 28 | 5. If no peer has the data, the system falls back to a data source using the `Getter` function, and the fetched data is added to the cache. 29 | 30 | ### SingleFlight Prevention 31 | 32 | To avoid multiple cache misses for the same key causing a load spike, `singleflight.Group` ensures only one request to the backend is made for each key at a time. 33 | 34 | ### Consistent Hashing 35 | 36 | Consistent hashing distributes keys evenly across nodes, and helps maintain balanced load distribution when nodes join or leave the system, thereby reducing the number of keys that need to be remapped. 37 | 38 | ## Future Improvements 39 | 40 | To make this distributed caching system more production-ready and robust, several areas can be optimized: 41 | 42 | 1. **Service Discovery and Coordination with etcd/Consul** 43 | Currently, nodes are manually configured, and there is no dynamic mechanism for service discovery. Integrating etcd or Consul would allow nodes to automatically discover peers, making the system more fault-tolerant and easy to scale. Service discovery tools could help manage node registration, track node availability, and automatically adjust the consistent hashing ring when nodes are added or removed. 44 | 45 | 2. **Use RPC for Inter-Node Communication** 46 | The system currently uses HTTP for communication between nodes, which introduces additional overhead in terms of latency and serialization/deserialization of data. Replacing HTTP with a more efficient RPC (Remote Procedure Call) mechanism, such as gRPC, would provide lower latency, better performance, and strong data typing. This could improve the overall efficiency of inter-node communication, especially in high-throughput environments. 47 | 48 | 3. **Adding a Distributed Lock Mechanism** 49 | The cache currently uses a simple mutex for managing access to shared resources. Introducing a distributed locking mechanism, such as etcd's lease or a Redis-based lock, would make the system more robust in scenarios where multiple nodes could attempt to update the same resource concurrently, particularly in cases where nodes share responsibilities. 50 | 51 | 4. **Advanced Consistency Mechanism** 52 | Implementing cache consistency mechanisms to keep data up-to-date across distributed nodes would improve reliability. Strategies such as write-through, write-behind, or invalidations could be implemented to ensure the data in different caches remains consistent. Depending on the use case, this could be paired with eventual or strong consistency guarantees. 53 | 54 | 5. **Monitoring and Metrics Collection** 55 | Adding monitoring and metrics via Prometheus and Grafana would allow for performance tracking and system health monitoring. Observability is crucial in distributed systems, as it helps identify bottlenecks, node failures, and potential inconsistencies. 56 | 57 | ## How to Run 58 | 59 | To set up the distributed cache system: 60 | 61 | 1. Clone the repository. 62 | 2. Start multiple instances of the distributed cache node (`HttpPool`) with different addresses. 63 | 3. Configure the nodes to be aware of each other using the `Set()` method, or use etcd/Consul for automatic service discovery. 64 | 4. Use a client to interact with the nodes by querying data using the HTTP endpoints provided by each node. 65 | 66 | ## Acknowledgments 67 | 68 | This project was inspired by concepts from groupcache and articles from tutu. We appreciate their valuable insights and contributions. 69 | -------------------------------------------------------------------------------- /singleflight/singleflight.go: -------------------------------------------------------------------------------- 1 | package singleflight 2 | 3 | import "sync" 4 | 5 | // call 类用于记录正在运行或已经结束的请求 6 | // wg: 使用 sync.WaitGroup 来等待并发请求的结束 7 | // val: 返回的结果值 8 | // err: 返回的错误信息 9 | 10 | type call struct { 11 | wg sync.WaitGroup 12 | val any 13 | err error 14 | } 15 | 16 | // Group 用于管理一组正在运行的请求 17 | // mu: 互斥锁,保证多人访问并发安全 18 | // m: 一个实时请求的应用转应的键和 call 的 map 19 | 20 | type Group struct { 21 | mu sync.Mutex 22 | m map[string]*call 23 | } 24 | 25 | // Do 方法用于执行一个请求,防止同时多个经过同一键的请求 26 | // key: 需要运行的请求的唯一标识 27 | // fn: 运行该键的任务函数 28 | // 返回函数的返回值,包括返回结果和错误 29 | 30 | func (g *Group) Do(key string, fn func() (any, error)) (any, error) { 31 | g.mu.Lock() // 加互斥锁,以保证多人并发操作的安全 32 | if g.m == nil { 33 | g.m = make(map[string]*call) // 如果应用 map 为 nil,创建 map 34 | } 35 | if c, ok := g.m[key]; ok { 36 | // 如果该 key 的请求已经在 map 中,表明该请求已经被执行,就等待它结束 37 | g.mu.Unlock() 38 | c.wg.Wait() // 等待对应请求的完成 39 | return c.val, c.err // 返回请求的结果 40 | } 41 | // 否则,创建一个新的 call 实例 42 | c := new(call) 43 | c.wg.Add(1) // 添加等待计数 44 | g.m[key] = c // 将这个 call 设置为当前请求的值 45 | g.mu.Unlock() // 锁释放 46 | 47 | // 执行函数,获取返回值和错误 48 | c.val, c.err = fn() 49 | c.wg.Done() // 调用 Done 来使等待的中任完成 50 | 51 | // 开始写销,将该请求从 map 中删除 52 | g.mu.Lock() 53 | delete(g.m, key) // 将完成的 call 从 map 中删除 54 | g.mu.Unlock() 55 | 56 | // 返回请求的值 57 | return c.val, c.err 58 | } 59 | --------------------------------------------------------------------------------