├── diagram.png ├── go.mod ├── cache_test.go ├── LICENCE ├── example ├── naive │ └── main.go └── zipcache │ └── main.go ├── go.sum ├── README.md └── cache.go /diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ostafen/zipcache/HEAD/diagram.png -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ostafen/zipcache 2 | 3 | go 1.19 4 | 5 | require github.com/stretchr/testify v1.8.1 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /cache_test.go: -------------------------------------------------------------------------------- 1 | package zipcache 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestSingleThread(t *testing.T) { 13 | cache := New(DefaultConfig().WithChunkSize(100)) 14 | 15 | keys := make([][]byte, 0) 16 | values := make([][]byte, 0) 17 | 18 | rand.Seed(time.Now().Unix()) 19 | 20 | n := 1000 21 | for i := 0; i < n; i++ { 22 | k := make([]byte, 8) 23 | v := make([]byte, 10+rand.Int()%256+1) 24 | 25 | rand.Read(k) 26 | rand.Read(v) 27 | 28 | for j := 0; j < len(v); j++ { 29 | v[j] %= 10 30 | } 31 | 32 | err := cache.Put(k, v) 33 | if errors.Is(err, ErrKeyBound) { 34 | continue 35 | } 36 | require.NoError(t, err) 37 | 38 | keys = append(keys, k) 39 | values = append(values, v) 40 | } 41 | 42 | for i := 0; i < len(keys); i++ { 43 | v, err := cache.Get(keys[i]) 44 | require.NoError(t, err) 45 | require.Equal(t, v, values[i]) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Stefano Scafiti 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /example/naive/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/json" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "strconv" 12 | "time" 13 | ) 14 | 15 | func fatalIfErr(err error) { 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | } 20 | 21 | func compress(data []byte) []byte { 22 | var buf bytes.Buffer 23 | 24 | w := gzip.NewWriter(&buf) 25 | _, err := w.Write(data) 26 | fatalIfErr(err) 27 | 28 | err = w.Flush() 29 | fatalIfErr(err) 30 | 31 | err = w.Close() 32 | fatalIfErr(err) 33 | 34 | return buf.Bytes() 35 | } 36 | 37 | func main() { 38 | f, err := os.Open("../airlines.json") 39 | fatalIfErr(err) 40 | 41 | data, err := io.ReadAll(f) 42 | fatalIfErr(err) 43 | 44 | m := make(map[string]string) 45 | 46 | x := make([]map[string]any, 0) 47 | err = json.Unmarshal(data, &x) 48 | fatalIfErr(err) 49 | 50 | totalTime := time.Duration(0) 51 | for i, item := range x { 52 | data, err := json.Marshal(item) 53 | fatalIfErr(err) 54 | 55 | start := time.Now() 56 | m[strconv.Itoa(i)] = string(compress(data)) 57 | totalTime += time.Since(start) 58 | fatalIfErr(err) 59 | } 60 | 61 | size := 0 62 | for _, v := range m { 63 | size += len(v) 64 | } 65 | fmt.Printf("time (seconds):\t %f\n", totalTime.Seconds()) 66 | fmt.Printf("ratio (%%):\t %.2f\n", float64(size)/float64(len(data))) 67 | } 68 | -------------------------------------------------------------------------------- /example/zipcache/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "compress/gzip" 5 | "encoding/json" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "strconv" 11 | "time" 12 | 13 | "github.com/ostafen/zipcache" 14 | ) 15 | 16 | func fatalIfErr(err error) { 17 | if err != nil { 18 | log.Fatal(err) 19 | } 20 | } 21 | 22 | func gzipReader(r io.Reader) (zipcache.Reader, error) { 23 | return gzip.NewReader(r) 24 | } 25 | 26 | func gzipWriter(w io.Writer) (zipcache.Writer, error) { 27 | return gzip.NewWriter(w), nil 28 | } 29 | 30 | func main() { 31 | f, err := os.Open("../airlines.json") 32 | fatalIfErr(err) 33 | 34 | data, err := io.ReadAll(f) 35 | fatalIfErr(err) 36 | 37 | cfg := zipcache.DefaultConfig().WithChunkSize(4096*4). // defines how many entries will be compressed together. Set this according to average entry size. 38 | WithReaderWriter(gzipReader, gzipWriter) // use gzip compression algorithm. Default is deflate. 39 | 40 | cache := zipcache.New(cfg) 41 | 42 | x := make([]map[string]any, 0) 43 | err = json.Unmarshal(data, &x) 44 | fatalIfErr(err) 45 | 46 | total := time.Duration(0) 47 | for i, item := range x { 48 | data, err := json.Marshal(item) 49 | fatalIfErr(err) 50 | 51 | start := time.Now() 52 | err = cache.Put([]byte(strconv.Itoa(i)), data) 53 | total += time.Since(start) 54 | fatalIfErr(err) 55 | } 56 | 57 | fmt.Printf("time (seconds):\t %f\n", total.Seconds()) 58 | fmt.Printf("ratio (%%):\t %.2f\n", float64(cache.Size())/float64(len(data))) 59 | } 60 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 9 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 10 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 11 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 12 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 16 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 17 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # zipcache 2 | An in-memory compressed cache 3 | 4 | # Why? 5 | 6 | Caching many items in memory can considerably increase the space requirements of an application. A simple way to mitigate this problem is to compress each cache entry using some compression algorithm (deflate, gzip, etc...). Howewer this approach is not effective when the average size of each entry is relatively small. 7 | 8 | ## ZipCache Layout 9 | The goal of ZipCache is to achieve a better compression rate by arranging multiple values (represented as **byte slices**) within a single byte chunk, whose maximum size is fixed (**4096** by default). Values are appended in the current chunk in the same order are they are inserted. When a chunk is full, a new one is appended to the **chunk list** and the old one gets compressed in the background. Values whose size exceeds the chunk size are split across multiple blocks. A separate map is kept (which is a standard golang **map**) to easily locate values within blocks, which associates a key to a triplet ****. 10 | 11 |

12 | 13 |

14 | 15 | 16 | # Sample use case 17 | 18 | Suppose you want to cache a set of json encoded entries of relatively small size. 19 | 20 | Let's try to firstly follow the naive approach, where each entry is compressed separatedly and stored in a standard golang map. 21 | 22 | ```golang 23 | package main 24 | 25 | ... 26 | 27 | func main() { 28 | f, err := os.Open("../airlines.json") 29 | fatalIfErr(err) 30 | 31 | data, err := io.ReadAll(f) 32 | fatalIfErr(err) 33 | 34 | m := make(map[string]string) 35 | 36 | x := make([]map[string]any, 0) 37 | err = json.Unmarshal(data, &x) 38 | fatalIfErr(err) 39 | 40 | totalTime := time.Now() 41 | for i, item := range x { 42 | data, err := json.Marshal(item) 43 | fatalIfErr(err) 44 | 45 | start := time.Now() 46 | m[strconv.Itoa(i)] = string(compress(data)) 47 | totalTime += time.Since(start) 48 | fatalIfErr(err) 49 | } 50 | 51 | size := 0 52 | for _, v := range m { 53 | size += len(v) 54 | } 55 | fmt.Printf("time (seconds):\t %f\n", totalTime.Seconds()) 56 | fmt.Printf("ratio (%%):\t %.2f\n", float64(size)/float64(len(data))) 57 | } 58 | 59 | ``` 60 | The output is the following: 61 | 62 | ```bash 63 | time (seconds): 1.503385 64 | ratio (%): 0.34 65 | ``` 66 | 67 | Now, let's try to use ZipCache to accomplish the same task. 68 | 69 | ```golang 70 | 71 | import ( 72 | ... 73 | "github.com/ostafen/zipcache" 74 | ) 75 | 76 | ... 77 | 78 | func main() { 79 | f, err := os.Open("./airlines.json") 80 | fatalIfErr(err) 81 | 82 | data, err := io.ReadAll(f) 83 | fatalIfErr(err) 84 | 85 | cfg := zipcache.DefaultConfig(). 86 | WithChunkSize(4096*4). // defines how many entries will be compressed together. Set this according to average entry size. 87 | WithReaderWriter(gzipReader, gzipWriter) // use gzip compression algorithm. Default is deflate. 88 | 89 | cache := zipcache.New(cfg) 90 | 91 | x := make([]map[string]any, 0) 92 | err = json.Unmarshal(data, &x) 93 | fatalIfErr(err) 94 | 95 | total := time.Duration(0) 96 | for i, item := range x { 97 | data, err := json.Marshal(item) 98 | fatalIfErr(err) 99 | 100 | start := time.Now() 101 | err = cache.Put([]byte(strconv.Itoa(i)), data) 102 | total += time.Since(start) 103 | fatalIfErr(err) 104 | } 105 | 106 | fmt.Printf("time (seconds):\t %f\n", total.Seconds()) 107 | fmt.Printf("ratio (%%):\t %.2f\n", float64(cache.Size())/float64(len(data))) 108 | } 109 | ``` 110 | 111 | This is what we get: 112 | 113 | ```bash 114 | time (seconds): 0.006377 115 | ratio (%): 0.11 116 | ``` 117 | 118 | Not only we save about 23% additional space, but code is ~200x faster, since compression is invoked less times. 119 | 120 | # Limitations 121 | 122 | ZipCache is a grow-only cache. Cache items are not evicted and mappings cannot be updated nor deleted. However, support for update/deletion is planned. 123 | 124 | # When to use ZipCache 125 | 126 | Use ZipCache when: 127 | 128 | - you are more concerned about RAM consumption rather then absolute lookup performance; 129 | - you never change your key-value mappings once you enter your items in the cache; 130 | - you don't need to expire your items. 131 | -------------------------------------------------------------------------------- /cache.go: -------------------------------------------------------------------------------- 1 | package zipcache 2 | 3 | import ( 4 | "bytes" 5 | "compress/flate" 6 | "errors" 7 | "io" 8 | "sync" 9 | "sync/atomic" 10 | ) 11 | 12 | type chunk struct { 13 | isCompressed bool 14 | data []byte 15 | } 16 | 17 | var ( 18 | ErrKeyBound = errors.New("key already bound") 19 | ) 20 | 21 | type Writer interface { 22 | io.WriteCloser 23 | Flush() error 24 | } 25 | 26 | type Reader interface { 27 | io.ReadCloser 28 | } 29 | 30 | type WriterFactory func(w io.Writer) (Writer, error) 31 | type ReaderFactory func(w io.Reader) (Reader, error) 32 | 33 | func flateWriter(w io.Writer) (Writer, error) { 34 | return flate.NewWriter(w, flate.BestSpeed) 35 | } 36 | 37 | func flateReader(r io.Reader) (Reader, error) { 38 | return flate.NewReader(r), nil 39 | } 40 | 41 | type Config struct { 42 | ChunkSize int 43 | ChunkMinGain float64 44 | NewWriter WriterFactory 45 | NewReader ReaderFactory 46 | } 47 | 48 | const ( 49 | chunkSizeDefault = 4096 50 | chunkMinRatioDefault = 0.05 51 | ) 52 | 53 | func DefaultConfig() Config { 54 | return Config{ 55 | ChunkSize: chunkSizeDefault, 56 | ChunkMinGain: chunkMinRatioDefault, 57 | NewWriter: flateWriter, 58 | NewReader: flateReader, 59 | } 60 | } 61 | 62 | func (cfg Config) WithChunkSize(size int) Config { 63 | cfg.ChunkSize = size 64 | return cfg 65 | } 66 | 67 | func (cfg Config) WithChunkMinGain(gain float64) Config { 68 | cfg.ChunkMinGain = gain 69 | return cfg 70 | } 71 | 72 | func (cfg Config) WithReaderWriter(r ReaderFactory, w WriterFactory) Config { 73 | cfg.NewReader = r 74 | cfg.NewWriter = w 75 | return cfg 76 | } 77 | 78 | type ZipCache struct { 79 | cfg Config 80 | 81 | mtx sync.RWMutex 82 | m map[string]pointer 83 | 84 | chunks []*atomic.Pointer[chunk] 85 | nChunks atomic.Int32 86 | currChunkOffset uint32 87 | } 88 | 89 | type pointer struct { 90 | blockNumber uint32 91 | offset uint32 92 | size uint32 93 | } 94 | 95 | func (p pointer) Block() int { 96 | return int(p.blockNumber) 97 | } 98 | 99 | func (p pointer) Offset() int { 100 | return int(p.offset) 101 | } 102 | 103 | func (p pointer) Len() int { 104 | return int(p.size) 105 | } 106 | 107 | func (c *ZipCache) newChunk() *chunk { 108 | return &chunk{ 109 | isCompressed: false, 110 | data: make([]byte, c.cfg.ChunkSize), 111 | } 112 | } 113 | 114 | func New(cfg Config) *ZipCache { 115 | c := &ZipCache{ 116 | cfg: cfg, 117 | m: map[string]pointer{}, 118 | chunks: make([]*atomic.Pointer[chunk], 0), 119 | } 120 | c.chunks = append(c.chunks, &atomic.Pointer[chunk]{}) 121 | c.chunks[0].Store(c.newChunk()) 122 | c.nChunks.Store(1) 123 | return c 124 | } 125 | 126 | func (c *ZipCache) compressChunks(ptrs []*atomic.Pointer[chunk]) error { 127 | for _, ptr := range ptrs { 128 | var buf bytes.Buffer 129 | 130 | w, err := c.cfg.NewWriter(&buf) 131 | if err != nil { 132 | return err 133 | } 134 | 135 | b := ptr.Load() 136 | 137 | if _, err := w.Write(b.data); err != nil { 138 | return err 139 | } 140 | 141 | if err := w.Flush(); err != nil { 142 | return err 143 | } 144 | 145 | if err := w.Close(); err != nil { 146 | return err 147 | } 148 | 149 | newBlock := &chunk{ 150 | isCompressed: buf.Len() < len(b.data), 151 | data: b.data, 152 | } 153 | 154 | gain := 1 - (float64(buf.Len()) / float64(len(b.data))) 155 | if gain >= c.cfg.ChunkMinGain { 156 | newBlock.data = buf.Bytes() 157 | } 158 | 159 | ptr.Store(newBlock) 160 | } 161 | 162 | return nil 163 | } 164 | 165 | // TODO: do not create reader multiple times 166 | func (c *ZipCache) uncompress(src, dst []byte) (int, error) { 167 | r, err := c.cfg.NewReader(bytes.NewBuffer(src)) 168 | if err != nil { 169 | return -1, err 170 | } 171 | defer r.Close() 172 | return r.Read(dst) 173 | } 174 | 175 | func newPointer(blockNumber, byteOffset, len uint64) pointer { 176 | return pointer{ 177 | size: uint32(len), 178 | offset: uint32(byteOffset), 179 | blockNumber: uint32(blockNumber), 180 | } 181 | } 182 | 183 | func (c *ZipCache) Put(k, v []byte) error { 184 | c.mtx.Lock() 185 | defer c.mtx.Unlock() 186 | 187 | if _, has := c.m[string(k)]; has { 188 | return ErrKeyBound 189 | } 190 | 191 | c.m[string(k)] = newPointer(uint64(uint32(len(c.chunks)-1)), uint64(c.currChunkOffset), uint64(len(v))) 192 | 193 | compressChunks := make([]*atomic.Pointer[chunk], 0) 194 | 195 | size := len(v) 196 | for size > 0 { 197 | currChunkPtr := c.chunks[len(c.chunks)-1] 198 | currChunk := currChunkPtr.Load() 199 | 200 | n := copy(currChunk.data[c.currChunkOffset:], v[len(v)-size:]) 201 | c.currChunkOffset += uint32(n) 202 | 203 | if c.currChunkOffset == uint32(c.cfg.ChunkSize) { 204 | compressChunks = append(compressChunks, currChunkPtr) 205 | 206 | var ptr atomic.Pointer[chunk] 207 | ck := c.newChunk() 208 | ptr.Store(ck) 209 | 210 | c.chunks = append(c.chunks, &ptr) 211 | c.currChunkOffset = 0 212 | 213 | c.nChunks.Add(1) 214 | } 215 | size -= int(n) 216 | } 217 | 218 | if len(compressChunks) > 0 { 219 | go c.compressChunks(compressChunks) 220 | } 221 | return nil 222 | } 223 | 224 | func (c *ZipCache) Get(k []byte) ([]byte, error) { 225 | c.mtx.RLock() 226 | 227 | ptr, ok := c.m[string(k)] 228 | if !ok { 229 | c.mtx.RUnlock() 230 | return nil, nil 231 | } 232 | 233 | currChunk := c.chunks[ptr.Block()].Load() 234 | if !currChunk.isCompressed && (ptr.Offset()+ptr.Len() <= len(currChunk.data)) { 235 | c.mtx.RUnlock() 236 | return currChunk.data[ptr.Offset() : ptr.Offset()+ptr.Len()], nil 237 | } 238 | 239 | nChunks := 1 + (ptr.Len()+(c.cfg.ChunkSize-1))/c.cfg.ChunkSize 240 | chunks := make([]*chunk, 0, nChunks) 241 | for i := 0; i < nChunks; i++ { 242 | if i+ptr.Block() < len(c.chunks) { 243 | chunks = append(chunks, c.chunks[i+ptr.Block()].Load()) 244 | } 245 | } 246 | c.mtx.RUnlock() 247 | 248 | dst := make([]byte, ptr.Len()) 249 | off := ptr.Offset() 250 | n := 0 251 | for _, chunk := range chunks { 252 | uncompressed := chunk.data 253 | 254 | if chunk.isCompressed { 255 | uncompressed = make([]byte, c.cfg.ChunkSize) 256 | _, err := c.uncompress(chunk.data, uncompressed) 257 | if err != nil { 258 | return nil, err 259 | } 260 | } 261 | 262 | endOff := off + ptr.Len() - n 263 | if endOff > len(uncompressed) { 264 | endOff = len(uncompressed) 265 | } 266 | 267 | copied := copy(dst[n:], uncompressed[off:endOff]) 268 | n += copied 269 | off = 0 270 | 271 | if n == ptr.Len() { 272 | break 273 | } 274 | } 275 | 276 | return dst, nil 277 | } 278 | 279 | func (c *ZipCache) Size() int64 { 280 | c.mtx.RLock() 281 | 282 | var size int64 283 | for _, ck := range c.chunks { 284 | size += int64(len(ck.Load().data)) 285 | } 286 | 287 | c.mtx.RUnlock() 288 | 289 | return size 290 | } 291 | --------------------------------------------------------------------------------