├── .github ├── FUNDING.yml ├── logo.png ├── docker │ └── Dockerfile └── workflows │ └── test.yml ├── fixtures ├── players.bin ├── 3million.bin.s2 └── players.go ├── .gitignore ├── examples ├── snapshot │ ├── README.md │ └── main.go ├── simple │ ├── README.md │ └── main.go ├── million │ ├── README.md │ └── main.go ├── cache │ ├── cache.go │ ├── README.md │ └── main.go ├── merge │ ├── README.md │ └── main.go └── bench │ ├── README.md │ └── bench.go ├── go.mod ├── codegen ├── main.go └── numbers.tpl ├── LICENSE ├── column_bool.go ├── txn_lock.go ├── column_expire.go ├── commit ├── buffer_codec.go ├── log_test.go ├── log.go ├── commit.go ├── commit_test.go ├── reader_test.go ├── buffer_test.go ├── buffer.go └── reader.go ├── go.sum ├── column_record.go ├── snapshot.go ├── column_numeric.go ├── column_index.go ├── column.go ├── txn_row.go ├── column_strings.go ├── snapshot_test.go └── column_numbers.go /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [kelindar] 2 | -------------------------------------------------------------------------------- /.github/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kelindar/column/HEAD/.github/logo.png -------------------------------------------------------------------------------- /fixtures/players.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kelindar/column/HEAD/fixtures/players.bin -------------------------------------------------------------------------------- /fixtures/3million.bin.s2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kelindar/column/HEAD/fixtures/3million.bin.s2 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | *.test 8 | *.out 9 | snapshot.bin 10 | 11 | # Vendor and workspace 12 | vendor/ 13 | go.work -------------------------------------------------------------------------------- /.github/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # docker build -t column_ubuntu_test . 2 | FROM golang:1.18-bullseye 3 | 4 | WORKDIR /usr/ 5 | 6 | COPY go.mod go.sum ./ 7 | RUN go mod download && go mod verify 8 | 9 | COPY . . 10 | RUN go test -race -covermode atomic -coverprofile=profile.cov . 11 | -------------------------------------------------------------------------------- /examples/snapshot/README.md: -------------------------------------------------------------------------------- 1 | # Snapshot Example 2 | 3 | This example reads a collection state from an existing snapshot `players.bin`, performs a simple query and saves a snapshot to a different file. 4 | 5 | ## Example output 6 | 7 | ``` 8 | snapshot: created an empty collection (0 rows) 9 | snapshot: restoring from '../../fixtures/players.bin' ... 10 | snapshot: restored 500 rows 11 | snapshot: saving state into 'snapshot.bin' ... 12 | ``` 13 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kelindar/column 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/kelindar/bitmap v1.4.1 7 | github.com/kelindar/intmap v1.1.0 8 | github.com/kelindar/iostream v1.3.0 9 | github.com/kelindar/simd v1.1.2 10 | github.com/kelindar/smutex v1.0.0 11 | github.com/klauspost/compress v1.16.6 12 | github.com/stretchr/testify v1.8.4 13 | github.com/tidwall/btree v1.6.0 14 | github.com/zeebo/xxh3 v1.0.2 15 | 16 | ) 17 | 18 | require ( 19 | github.com/davecgh/go-spew v1.1.1 // indirect 20 | github.com/dustin/go-humanize v1.0.1 21 | github.com/kelindar/async v1.1.0 22 | github.com/kelindar/xxrand v1.0.2 23 | github.com/klauspost/cpuid/v2 v2.2.5 // indirect 24 | github.com/pmezard/go-difflib v1.0.0 // indirect 25 | golang.org/x/sys v0.9.0 // indirect 26 | golang.org/x/time v0.3.0 // indirect 27 | gopkg.in/yaml.v3 v3.0.1 // indirect 28 | ) 29 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: [push, pull_request] 3 | env: 4 | GITHUB_TOKEN: ${{ secrets.COVERALLS_TOKEN }} 5 | GO111MODULE: "on" 6 | jobs: 7 | test: 8 | name: Test with Coverage 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | go: ["1.19", "1.20"] 13 | steps: 14 | - name: Set up Go ${{ matrix.go }} 15 | uses: actions/setup-go@v3 16 | with: 17 | go-version: ${{ matrix.go }} 18 | - name: Check out code 19 | uses: actions/checkout@v3 20 | - name: Install dependencies 21 | run: | 22 | go mod download 23 | - name: Run Unit Tests 24 | run: | 25 | go test -race -covermode atomic -coverprofile=profile.cov ./... 26 | - name: Upload Coverage 27 | uses: shogo82148/actions-goveralls@v1 28 | with: 29 | path-to-profile: profile.cov 30 | -------------------------------------------------------------------------------- /examples/simple/README.md: -------------------------------------------------------------------------------- 1 | # Simple Example 2 | 3 | This is a simple example that creates a collection, loads some data, inserts a few indexes and performs a query and iteration over the result set. 4 | 5 | ## Example output 6 | 7 | ``` 8 | human old mage Marsha Duffy 9 | human old mage Buckner Frazier 10 | human old mage Castillo Mcfadden 11 | human old mage Dillard Landry 12 | human old mage Boyle Garrison 13 | human old mage Angeline Robles 14 | human old mage Samantha Conley 15 | human old mage Wiley Nunez 16 | human old mage Sandoval Meyer 17 | human old mage Brandie Valentine 18 | human old mage Burns Dotson 19 | human old mage Marla Todd 20 | human old mage Salinas Hughes 21 | human old mage Orr Francis 22 | human old mage Shelly Kirk 23 | human old mage Regina Rollins 24 | human old mage Burnett Boyle 25 | human old mage Lora Mueller 26 | human old mage Rose Hull 27 | human old mage Mckenzie Best 28 | human old mage Mcguire Dale 29 | ``` -------------------------------------------------------------------------------- /codegen/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | _ "embed" 5 | "os" 6 | "text/template" 7 | ) 8 | 9 | //go:embed numbers.tpl 10 | var numbers string 11 | 12 | type Type struct { 13 | Name string 14 | Type string 15 | } 16 | 17 | func main() { 18 | t, err := template.New("numbers").Parse(numbers) 19 | if err != nil { 20 | panic(err) 21 | } 22 | 23 | dst, err := os.OpenFile("column_numbers.go", os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm) 24 | defer dst.Close() 25 | if err != nil { 26 | panic(err) 27 | } 28 | 29 | if err := t.Execute(dst, []Type{ 30 | {Name: "Int", Type: "int"}, 31 | {Name: "Int16", Type: "int16"}, 32 | {Name: "Int32", Type: "int32"}, 33 | {Name: "Int64", Type: "int64"}, 34 | {Name: "Uint", Type: "uint"}, 35 | {Name: "Uint16", Type: "uint16"}, 36 | {Name: "Uint32", Type: "uint32"}, 37 | {Name: "Uint64", Type: "uint64"}, 38 | {Name: "Float32", Type: "float32"}, 39 | {Name: "Float64", Type: "float64"}, 40 | }); err != nil { 41 | panic(err) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /examples/million/README.md: -------------------------------------------------------------------------------- 1 | # Ten Million Rows 2 | 3 | This example adds 10 million rows to a collection, runs and measures a few different queries and transaction around it. 4 | 5 | ## Example output 6 | 7 | ``` 8 | running insert of 10000000 rows... 9 | -> insert took 7.6464726s 10 | 11 | running snapshot of 10000000 rows... 12 | -> snapshot took 1.35868707s 13 | 14 | running full scan of age >= 30... 15 | -> result = 5100000 16 | -> full scan took 27.011615ms 17 | 18 | running full scan of class == "rogue"... 19 | -> result = 3580000 20 | -> full scan took 45.053185ms 21 | 22 | running indexed query of human mages... 23 | -> result = 680000 24 | -> indexed query took 309.74µs 25 | 26 | running indexed query of human female mages... 27 | -> result = 320000 28 | -> indexed query took 385.606µs 29 | 30 | running update of balance of everyone... 31 | -> updated 10000000 rows 32 | -> update took 108.09756ms 33 | 34 | running update of age of mages... 35 | -> updated 3020000 rows 36 | -> update took 41.731165ms 37 | ``` 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Roman Atachiants 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/cache/cache.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package main 5 | 6 | import ( 7 | "github.com/kelindar/column" 8 | ) 9 | 10 | // Cache represents a key-value store 11 | type Cache struct { 12 | store *column.Collection 13 | } 14 | 15 | // New creates a new key-value cache 16 | func New() *Cache { 17 | db := column.NewCollection() 18 | db.CreateColumn("key", column.ForKey()) 19 | db.CreateColumn("val", column.ForString()) 20 | 21 | return &Cache{ 22 | store: db, 23 | } 24 | } 25 | 26 | // Get attempts to retrieve a value for a key 27 | func (c *Cache) Get(key string) (value string, found bool) { 28 | c.store.QueryKey(key, func(r column.Row) error { 29 | value, found = r.String("val") 30 | return nil 31 | }) 32 | return 33 | } 34 | 35 | // Set updates or inserts a new value 36 | func (c *Cache) Set(key, value string) { 37 | if err := c.store.UpsertKey(key, func(r column.Row) error { 38 | r.SetString("val", value) 39 | return nil 40 | }); err != nil { 41 | panic(err) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /fixtures/players.go: -------------------------------------------------------------------------------- 1 | package fixtures 2 | 3 | import ( 4 | _ "embed" 5 | "encoding/json" 6 | ) 7 | 8 | //go:embed players.json 9 | var playerData []byte 10 | 11 | // Players loads a set of players 12 | func Players() []Player { 13 | var data []Player 14 | if err := json.Unmarshal(playerData, &data); err != nil { 15 | panic(err) 16 | } 17 | 18 | return data 19 | } 20 | 21 | // --------------------------- Player ---------------------------- 22 | 23 | type Player struct { 24 | Serial string `json:"serial"` 25 | Name string `json:"name"` 26 | Active bool `json:"active"` 27 | Class string `json:"class"` 28 | Race string `json:"race"` 29 | Age int `json:"age"` 30 | Hp int `json:"hp"` 31 | Mp int `json:"mp"` 32 | Balance float64 `json:"balance"` 33 | Gender string `json:"gender"` 34 | Guild string `json:"guild"` 35 | Location Location `json:"location"` 36 | } 37 | 38 | type Location struct { 39 | X float64 `json:"x"` 40 | Y float64 `json:"y"` 41 | } 42 | 43 | func (l Location) MarshalBinary() ([]byte, error) { 44 | return json.Marshal(l) 45 | } 46 | 47 | func (l *Location) UnmarshalBinary(b []byte) error { 48 | return json.Unmarshal(b, l) 49 | } 50 | -------------------------------------------------------------------------------- /examples/merge/README.md: -------------------------------------------------------------------------------- 1 | # Custom merging strategy 2 | 3 | In this example we are creating a column called `location` that contains a JSON-encoded position and velocity. The position is updated by calling a `MergeString()` function with the velocity vector, the updates are then merged atomically using a merging function specified. 4 | 5 | The merge happens when transaction is committed to ensure consistency. Hence, this technique allows for two concurrent transactions to update the same position. 6 | 7 | ## Example output 8 | 9 | ```json 10 | 00: {"position":[1,2],"velocity":[1,2]} 11 | 01: {"position":[2,4],"velocity":[1,2]} 12 | 02: {"position":[3,6],"velocity":[1,2]} 13 | 03: {"position":[4,8],"velocity":[1,2]} 14 | 04: {"position":[5,10],"velocity":[1,2]} 15 | 05: {"position":[6,12],"velocity":[1,2]} 16 | 06: {"position":[7,14],"velocity":[1,2]} 17 | 07: {"position":[8,16],"velocity":[1,2]} 18 | 08: {"position":[9,18],"velocity":[1,2]} 19 | 09: {"position":[10,20],"velocity":[1,2]} 20 | 10: {"position":[11,22],"velocity":[1,2]} 21 | 11: {"position":[12,24],"velocity":[1,2]} 22 | 12: {"position":[13,26],"velocity":[1,2]} 23 | 13: {"position":[14,28],"velocity":[1,2]} 24 | 14: {"position":[15,30],"velocity":[1,2]} 25 | 15: {"position":[16,32],"velocity":[1,2]} 26 | 16: {"position":[17,34],"velocity":[1,2]} 27 | 17: {"position":[18,36],"velocity":[1,2]} 28 | 18: {"position":[19,38],"velocity":[1,2]} 29 | 19: {"position":[20,40],"velocity":[1,2]} 30 | ``` 31 | -------------------------------------------------------------------------------- /examples/cache/README.md: -------------------------------------------------------------------------------- 1 | # Example: Key/Value Cache 2 | 3 | This example demonstrates a `Key` column type that allows you to perform `O(1)` lookups over that. This can be used in the case where you do not have a specific offset for the entry. 4 | 5 | ```go 6 | // Cache represents a key-value store 7 | type Cache struct { 8 | store *column.Collection 9 | } 10 | 11 | // New creates a new key-value cache 12 | func New() *Cache { 13 | db := column.NewCollection() 14 | db.CreateColumn("key", column.ForKey()) 15 | db.CreateColumn("val", column.ForString()) 16 | 17 | return &Cache{ 18 | store: db, 19 | } 20 | } 21 | 22 | // Get attempts to retrieve a value for a key 23 | func (c *Cache) Get(key string) (value string, found bool) { 24 | c.store.QueryKey(key, func(r column.Row) error { 25 | value, found = r.String("val") 26 | return nil 27 | }) 28 | return 29 | } 30 | 31 | // Set updates or inserts a new value 32 | func (c *Cache) Set(key, value string) { 33 | if err := c.store.QueryKey(key, func(r column.Row) error { 34 | r.SetString("val", value) 35 | return nil 36 | }); err != nil { 37 | panic(err) 38 | } 39 | } 40 | ``` 41 | 42 | ## Some Results 43 | 44 | ``` 45 | running insert of 50000 rows... 46 | -> inserted 10000 rows 47 | -> inserted 20000 rows 48 | -> inserted 30000 rows 49 | -> inserted 40000 rows 50 | -> inserted 50000 rows 51 | -> insert took 80.2478ms 52 | 53 | running query of user_11255... 54 | Hi, User 11255 true 55 | -> query took 1.271µs 56 | ``` 57 | -------------------------------------------------------------------------------- /examples/cache/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package main 5 | 6 | import ( 7 | "fmt" 8 | "os" 9 | "time" 10 | 11 | "github.com/kelindar/xxrand" 12 | ) 13 | 14 | func main() { 15 | amount := 50000 16 | cache := New() 17 | 18 | measure("insert", fmt.Sprintf("%v rows", amount), func() { 19 | for i := 0; i < amount; i++ { 20 | key := fmt.Sprintf("user_%d", i) 21 | val := fmt.Sprintf("Hi, User %d", i) 22 | cache.Set(key, val) 23 | 24 | if (i+1)%10000 == 0 { 25 | fmt.Printf("-> inserted %v rows\n", i+1) 26 | } 27 | } 28 | }, 1) 29 | 30 | key := fmt.Sprintf("user_%d", xxrand.Intn(amount)) 31 | measure("query", key, func() { 32 | xxrand.Intn(amount) 33 | fmt.Println(cache.Get(key)) 34 | }, 100000) 35 | } 36 | 37 | func measure(action, name string, fn func(), iterations int) { 38 | defer func(start time.Time, stdout *os.File) { 39 | os.Stdout = stdout 40 | elapsed := time.Since(start) / time.Duration(iterations) 41 | fmt.Printf("-> %v took %v\n", action, elapsed.String()) 42 | }(time.Now(), os.Stdout) 43 | 44 | fmt.Println() 45 | fmt.Printf("running %v of %v...\n", action, name) 46 | 47 | // Run a few times so the results are more stable 48 | null, _ := os.Open(os.DevNull) 49 | for i := 0; i < iterations; i++ { 50 | if i > 0 { // Silence subsequent runs 51 | os.Stdout = null 52 | } 53 | 54 | fn() 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /examples/snapshot/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/kelindar/column" 8 | ) 9 | 10 | func main() { 11 | 12 | // Create a collection and a corresponding schema 13 | players := column.NewCollection() 14 | players.CreateColumn("serial", column.ForKey()) 15 | players.CreateColumn("name", column.ForEnum()) 16 | players.CreateColumn("active", column.ForBool()) 17 | players.CreateColumn("class", column.ForEnum()) 18 | players.CreateColumn("race", column.ForEnum()) 19 | players.CreateColumn("age", column.ForFloat64()) 20 | players.CreateColumn("hp", column.ForFloat64()) 21 | players.CreateColumn("mp", column.ForFloat64()) 22 | players.CreateColumn("balance", column.ForFloat64()) 23 | players.CreateColumn("gender", column.ForEnum()) 24 | players.CreateColumn("guild", column.ForEnum()) 25 | fmt.Printf("snapshot: created an empty collection (%v rows)\n", players.Count()) 26 | 27 | // Open the file containing a snapshot 28 | src, err := os.Open("../../fixtures/players.bin") 29 | if err != nil { 30 | panic(err) 31 | } 32 | 33 | // Restore from an existing snapshot 34 | fmt.Printf("snapshot: restoring from '%v' ...\n", src.Name()) 35 | if err := players.Restore(src); err != nil { 36 | panic(err) 37 | } 38 | 39 | fmt.Printf("snapshot: restored %v rows\n", players.Count()) 40 | dst, err := os.Create("snapshot.bin") 41 | if err != nil { 42 | panic(err) 43 | } 44 | 45 | fmt.Printf("snapshot: saving state into '%v' ...\n", dst.Name()) 46 | if err := players.Snapshot(dst); err != nil { 47 | panic(err) 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /codegen/numbers.tpl: -------------------------------------------------------------------------------- 1 | // This code was generated, DO NOT EDIT. 2 | // Any changes will be lost if this file is regenerated. 3 | 4 | package column 5 | 6 | import ( 7 | "github.com/kelindar/bitmap" 8 | "github.com/kelindar/column/commit" 9 | ) 10 | 11 | {{ range . }} 12 | // --------------------------- {{.Name}} ---------------------------- 13 | 14 | // make{{.Name}}s creates a new vector for {{.Type}}s 15 | func make{{.Name}}s(opts ...func(*option[{{.Type}}])) Column { 16 | return makeNumeric( 17 | func(buffer *commit.Buffer, idx uint32, value {{.Type}}) { buffer.Put{{.Name}}(commit.Put, idx, value) }, 18 | func(r *commit.Reader, fill bitmap.Bitmap, data []{{.Type}}, opts option[{{.Type}}]) { 19 | for r.Next() { 20 | offset := r.IndexAtChunk() 21 | switch r.Type { 22 | case commit.Put: 23 | fill[offset>>6] |= 1 << (offset & 0x3f) 24 | data[offset] = r.{{.Name}}() 25 | case commit.Merge: 26 | fill[offset>>6] |= 1 << (offset & 0x3f) 27 | data[offset] = r.Swap{{.Name}}(opts.Merge(data[offset], r.{{.Name}}())) 28 | case commit.Delete: 29 | fill.Remove(offset) 30 | } 31 | } 32 | }, opts, 33 | ) 34 | } 35 | 36 | // rw{{.Name}} represents a read-write cursor for {{.Type}} 37 | type rw{{.Name}} struct { 38 | rdNumber[{{.Type}}] 39 | writer *commit.Buffer 40 | } 41 | 42 | // Set sets the value at the current transaction cursor 43 | func (s rw{{.Name}}) Set(value {{.Type}}) { 44 | s.writer.Put{{.Name}}(commit.Put, s.txn.cursor, value) 45 | } 46 | 47 | // Merge atomically merges a delta to the value at the current transaction cursor 48 | func (s rw{{.Name}}) Merge(delta {{.Type}}) { 49 | s.writer.Put{{.Name}}(commit.Merge, s.txn.cursor, delta) 50 | } 51 | 52 | // {{.Name}} returns a read-write accessor for {{.Type}} column 53 | func (txn *Txn) {{.Name}}(columnName string) rw{{.Name}} { 54 | return rw{{.Name}}{ 55 | rdNumber: readNumberOf[{{.Type}}](txn, columnName), 56 | writer: txn.bufferFor(columnName), 57 | } 58 | } 59 | 60 | {{ end }} -------------------------------------------------------------------------------- /column_bool.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "github.com/kelindar/bitmap" 8 | "github.com/kelindar/column/commit" 9 | ) 10 | 11 | // columnBool represents a boolean column 12 | type columnBool struct { 13 | data bitmap.Bitmap 14 | } 15 | 16 | // makeBools creates a new boolean column 17 | func makeBools() Column { 18 | return &columnBool{ 19 | data: make(bitmap.Bitmap, 0, 4), 20 | } 21 | } 22 | 23 | // Grow grows the size of the column until we have enough to store 24 | func (c *columnBool) Grow(idx uint32) { 25 | c.data.Grow(idx) 26 | } 27 | 28 | // Apply applies a set of operations to the column. 29 | func (c *columnBool) Apply(chunk commit.Chunk, r *commit.Reader) { 30 | for r.Next() { 31 | v := uint64(1) << (r.Offset & 0x3f) 32 | switch r.Type { 33 | case commit.PutTrue: 34 | c.data[r.Offset>>6] |= v 35 | case commit.PutFalse: // also "delete" 36 | c.data[r.Offset>>6] &^= v 37 | } 38 | } 39 | } 40 | 41 | // Value retrieves a value at a specified index 42 | func (c *columnBool) Value(idx uint32) (interface{}, bool) { 43 | value := c.data.Contains(idx) 44 | return value, value 45 | } 46 | 47 | // Contains checks whether the column has a value at a specified index. 48 | func (c *columnBool) Contains(idx uint32) bool { 49 | return c.data.Contains(idx) 50 | } 51 | 52 | // Index returns the fill list for the column 53 | func (c *columnBool) Index(chunk commit.Chunk) bitmap.Bitmap { 54 | return chunk.OfBitmap(c.data) 55 | } 56 | 57 | // Snapshot writes the entire column into the specified destination buffer 58 | func (c *columnBool) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 59 | dst.PutBitmap(commit.PutTrue, chunk, c.data) 60 | } 61 | 62 | // --------------------------- Writer ---------------------------- 63 | 64 | // rwBool represents read-write accessor for boolean values 65 | type rwBool struct { 66 | rdBool 67 | writer *commit.Buffer 68 | } 69 | 70 | // Set sets the value at the current transaction cursor 71 | func (s rwBool) Set(value bool) { 72 | s.writer.PutBool(*s.cursor, value) 73 | } 74 | 75 | // Bool returns a bool column accessor 76 | func (txn *Txn) Bool(columnName string) rwBool { 77 | return rwBool{ 78 | rdBool: readBoolOf(txn, columnName), 79 | writer: txn.bufferFor(columnName), 80 | } 81 | } 82 | 83 | // --------------------------- Reader ---------------------------- 84 | 85 | // rdBool represents a read-only accessor for boolean values 86 | type rdBool reader[Column] 87 | 88 | // Get loads the value at the current transaction cursor 89 | func (s rdBool) Get() bool { 90 | return s.reader.Contains(*s.cursor) 91 | } 92 | 93 | // readBoolOf creates a new boolean reader 94 | func readBoolOf(txn *Txn, columnName string) rdBool { 95 | return rdBool(readerFor[Column](txn, columnName)) 96 | } 97 | -------------------------------------------------------------------------------- /examples/merge/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package main 5 | 6 | import ( 7 | "encoding/json" 8 | "fmt" 9 | 10 | "github.com/kelindar/column" 11 | ) 12 | 13 | // Movement represents a movement with a position and velocity 14 | type Movement struct { 15 | Position [2]float64 `json:"position,omitempty"` 16 | Velocity [2]float64 `json:"velocity,omitempty"` 17 | } 18 | 19 | func main() { 20 | 21 | // A merging function that accepts a velocity vector and updates 22 | // the movement structure accordingly. 23 | mergeVectors := func(value, delta string) string { 24 | movement, ok := parseMovement(value) 25 | if !ok { 26 | movement = Movement{ 27 | Position: [2]float64{0, 0}, 28 | } 29 | } 30 | 31 | // Parse the incoming delta value 32 | velocity, ok := parseVector(delta) 33 | if !ok { 34 | return value 35 | } 36 | 37 | // Update the current velocity and recalculate the position 38 | movement.Velocity = velocity 39 | movement.Position[0] += velocity[0] // Update X 40 | movement.Position[1] += velocity[1] // Update Y 41 | 42 | // Encode the movement as JSON and return the updated value 43 | return encode(movement) 44 | } 45 | 46 | // Create a column with a specified merge function 47 | db := column.NewCollection() 48 | db.CreateColumn("location", column.ForString( 49 | column.WithMerge(mergeVectors), // use our merging function 50 | )) 51 | 52 | // Insert an empty row 53 | id, _ := db.Insert(func(r column.Row) error { 54 | r.SetString("location", "{}") 55 | return nil 56 | }) 57 | 58 | // Update several times 59 | for i := 0; i < 20; i++ { 60 | 61 | // Move the location by applying a same velocity vector 62 | db.Query(func(txn *column.Txn) error { 63 | location := txn.String("location") 64 | return txn.QueryAt(id, func(r column.Row) error { 65 | location.Merge(encode([2]float64{1, 2})) 66 | return nil 67 | }) 68 | }) 69 | 70 | // Print out current location 71 | db.Query(func(txn *column.Txn) error { 72 | location := txn.String("location") 73 | return txn.QueryAt(id, func(r column.Row) error { 74 | value, _ := location.Get() 75 | fmt.Printf("%.2d: %v \n", i, value) 76 | return nil 77 | }) 78 | }) 79 | } 80 | 81 | } 82 | 83 | // parseMovement parses a value string into a Movement struct 84 | func parseMovement(value string) (out Movement, ok bool) { 85 | if err := json.Unmarshal([]byte(value), &out); err != nil { 86 | return Movement{}, false 87 | } 88 | return out, true 89 | } 90 | 91 | // parseVector parses a value string into 2 dimensional array 92 | func parseVector(value string) (out [2]float64, ok bool) { 93 | if err := json.Unmarshal([]byte(value), &out); err != nil { 94 | return [2]float64{}, false 95 | } 96 | return out, true 97 | } 98 | 99 | // encodes encodes the value as JSON 100 | func encode(value any) string { 101 | encoded, _ := json.Marshal(value) 102 | return string(encoded) 103 | } 104 | -------------------------------------------------------------------------------- /txn_lock.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "github.com/kelindar/bitmap" 8 | "github.com/kelindar/column/commit" 9 | ) 10 | 11 | const ( 12 | bitmapShift = chunkShift - 6 13 | bitmapSize = 1 << bitmapShift 14 | chunkShift = 14 // 16K 15 | chunkSize = 1 << chunkShift 16 | ) 17 | 18 | // initialize ensures that the transaction is pre-initialized with the snapshot 19 | // of the owner's fill list. 20 | func (txn *Txn) initialize() { 21 | if txn.setup { 22 | return 23 | } 24 | 25 | txn.owner.lock.RLock() 26 | txn.index.Grow(uint32(txn.owner.opts.Capacity)) 27 | txn.owner.fill.Clone(&txn.index) 28 | txn.owner.lock.RUnlock() 29 | txn.setup = true 30 | } 31 | 32 | // --------------------------- Locked Seek --------------------------- 33 | 34 | // QueryAt jumps at a particular offset in the collection, sets the cursor to the 35 | // provided position and executes given callback fn. 36 | func (txn *Txn) QueryAt(index uint32, f func(Row) error) (err error) { 37 | lock := txn.owner.slock 38 | txn.cursor = index 39 | 40 | chunk := commit.ChunkAt(index) 41 | lock.RLock(uint(chunk)) 42 | err = f(Row{txn}) 43 | lock.RUnlock(uint(chunk)) 44 | return err 45 | } 46 | 47 | // --------------------------- Locked Range --------------------------- 48 | 49 | // rangeRead iterates over index, chunk by chunk and ensures that each 50 | // chunk is protected by an appropriate read lock. 51 | func (txn *Txn) rangeRead(f func(chunk commit.Chunk, index bitmap.Bitmap)) { 52 | limit := commit.Chunk(len(txn.index) >> bitmapShift) 53 | lock := txn.owner.slock 54 | 55 | for chunk := commit.Chunk(0); chunk <= limit; chunk++ { 56 | lock.RLock(uint(chunk)) 57 | f(chunk, chunk.OfBitmap(txn.index)) 58 | lock.RUnlock(uint(chunk)) 59 | } 60 | } 61 | 62 | // rangeReadPair iterates over the index and another bitmap, chunk by chunk and 63 | // ensures that each chunk is protected by an appropriate read lock. 64 | func (txn *Txn) rangeReadPair(column *column, f func(a, b bitmap.Bitmap)) { 65 | limit := commit.Chunk(len(txn.index) >> bitmapShift) 66 | lock := txn.owner.slock 67 | 68 | // Iterate through all of the chunks and acquire appropriate shard locks. 69 | for chunk := commit.Chunk(0); chunk <= limit; chunk++ { 70 | lock.RLock(uint(chunk)) 71 | f(chunk.OfBitmap(txn.index), column.Index(chunk)) 72 | lock.RUnlock(uint(chunk)) 73 | } 74 | } 75 | 76 | // rangeWrite ranges over the dirty chunks and acquires exclusive latches along 77 | // the way. This is used to commit a transaction. 78 | func (txn *Txn) rangeWrite(fn func(commitID uint64, chunk commit.Chunk, fill bitmap.Bitmap)) { 79 | lock := txn.owner.slock 80 | txn.dirty.Range(func(x uint32) { 81 | chunk := commit.Chunk(x) 82 | commitID := commit.Next() 83 | lock.Lock(uint(chunk)) 84 | 85 | // Compute the fill and set the last commit ID 86 | txn.owner.lock.RLock() 87 | fill := chunk.OfBitmap(txn.owner.fill) 88 | txn.owner.commits[chunk] = commitID // OK, since we have a shard lock 89 | txn.owner.lock.RUnlock() 90 | 91 | // Call the delegate 92 | fn(commitID, chunk, fill) 93 | lock.Unlock(uint(chunk)) 94 | }) 95 | } 96 | -------------------------------------------------------------------------------- /column_expire.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "context" 8 | "time" 9 | ) 10 | 11 | // --------------------------- Expiration (Vacuum) ---------------------------- 12 | 13 | // vacuum cleans up the expired objects on a specified interval. 14 | func (c *Collection) vacuum(ctx context.Context, interval time.Duration) { 15 | ticker := time.NewTicker(interval) 16 | for { 17 | select { 18 | case <-ctx.Done(): 19 | ticker.Stop() 20 | return 21 | case <-ticker.C: 22 | c.Query(func(txn *Txn) error { 23 | ttl, now := txn.TTL(), time.Now() 24 | return txn.With(expireColumn).Range(func(idx uint32) { 25 | if expiresAt, ok := ttl.ExpiresAt(); ok && now.After(expiresAt) { 26 | txn.DeleteAt(idx) 27 | } 28 | }) 29 | }) 30 | } 31 | } 32 | } 33 | 34 | // --------------------------- Expiration (Column) ---------------------------- 35 | 36 | // TTL returns a read-write accessor for the time-to-live column 37 | func (txn *Txn) TTL() rwTTL { 38 | return rwTTL{ 39 | rw: rwInt64{ 40 | rdNumber: readNumberOf[int64](txn, expireColumn), 41 | writer: txn.bufferFor(expireColumn), 42 | }, 43 | } 44 | } 45 | 46 | type rwTTL struct { 47 | rw rwInt64 48 | } 49 | 50 | // TTL returns the remaining time-to-live duration 51 | func (s rwTTL) TTL() (time.Duration, bool) { 52 | if expireAt, ok := s.rw.Get(); ok && expireAt != 0 { 53 | return readTTL(expireAt), true 54 | } 55 | return 0, false 56 | } 57 | 58 | // ExpiresAt returns the expiration time 59 | func (s rwTTL) ExpiresAt() (time.Time, bool) { 60 | if expireAt, ok := s.rw.Get(); ok && expireAt != 0 { 61 | return time.Unix(0, expireAt), true 62 | } 63 | return time.Time{}, false 64 | } 65 | 66 | // Set sets the time-to-live value at the current transaction cursor 67 | func (s rwTTL) Set(ttl time.Duration) { 68 | s.rw.Set(writeTTL(ttl)) 69 | } 70 | 71 | // Extend extends time-to-live of the row current transaction cursor by a specified amount 72 | func (s rwTTL) Extend(delta time.Duration) { 73 | s.rw.Merge(int64(delta.Nanoseconds())) 74 | } 75 | 76 | // readTTL converts expiration to a TTL 77 | func readTTL(expireAt int64) time.Duration { 78 | return time.Unix(0, expireAt).Sub(time.Now()) 79 | } 80 | 81 | // writeTTL converts ttl to expireAt 82 | func writeTTL(ttl time.Duration) int64 { 83 | if ttl > 0 { 84 | return time.Now().Add(ttl).UnixNano() 85 | } 86 | return 0 87 | } 88 | 89 | // --------------------------- Expiration (Row) ---------------------------- 90 | 91 | // TTL retrieves the time left before the row will be cleaned up 92 | func (r Row) TTL() (time.Duration, bool) { 93 | if expireAt, ok := r.Int64(expireColumn); ok { 94 | return readTTL(expireAt), true 95 | } 96 | return 0, false 97 | } 98 | 99 | // SetTTL sets a time-to-live for a row and returns the expiration time 100 | func (r Row) SetTTL(ttl time.Duration) (until time.Time) { 101 | var nanos int64 102 | if ttl > 0 { 103 | until = time.Now().Add(ttl) 104 | nanos = until.UnixNano() 105 | } 106 | 107 | // Otherwise, return zero time (never expires) 108 | r.SetInt64(expireColumn, nanos) 109 | return 110 | } 111 | -------------------------------------------------------------------------------- /commit/buffer_codec.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "encoding/binary" 8 | "io" 9 | "reflect" 10 | "unsafe" 11 | 12 | "github.com/kelindar/iostream" 13 | ) 14 | 15 | // --------------------------- WriteTo ---------------------------- 16 | 17 | // WriteTo writes data to w until there's no more data to write or when an error occurs. The return 18 | // value n is the number of bytes written. Any error encountered during the write is also returned. 19 | func (b *Buffer) WriteTo(dst io.Writer) (int64, error) { 20 | w := iostream.NewWriter(dst) 21 | if err := w.WriteString(b.Column); err != nil { 22 | return w.Offset(), err 23 | } 24 | 25 | if err := w.WriteInt32(b.last); err != nil { 26 | return w.Offset(), err 27 | } 28 | 29 | var temp [12]byte 30 | if err := w.WriteRange(len(b.chunks), func(i int, w *iostream.Writer) error { 31 | v := b.chunks[i] 32 | binary.BigEndian.PutUint32(temp[0:4], uint32(v.Chunk)) 33 | binary.BigEndian.PutUint32(temp[4:8], v.Start) 34 | binary.BigEndian.PutUint32(temp[8:12], v.Value) 35 | _, err := w.Write(temp[:]) 36 | return err 37 | }); err != nil { 38 | return w.Offset(), err 39 | } 40 | 41 | err := w.WriteBytes(b.buffer) 42 | return w.Offset(), err 43 | } 44 | 45 | // --------------------------- ReadFrom ---------------------------- 46 | 47 | // ReadFrom reads data from r until EOF or error. The return value n is the number of 48 | // bytes read. Any error except EOF encountered during the read is also returned. 49 | func (b *Buffer) ReadFrom(src io.Reader) (int64, error) { 50 | r := iostream.NewReader(src) 51 | var err error 52 | if b.Column, err = r.ReadString(); err != nil { 53 | return r.Offset(), err 54 | } 55 | 56 | if b.last, err = r.ReadInt32(); err != nil { 57 | return r.Offset(), err 58 | } 59 | 60 | if b.chunks, err = readChunksFrom(r); err != nil { 61 | return r.Offset(), err 62 | } 63 | 64 | if b.buffer, err = r.ReadBytes(); err != nil { 65 | return r.Offset(), err 66 | } 67 | 68 | if len(b.chunks) > 0 { 69 | last := b.chunks[len(b.chunks)-1] 70 | b.chunk = last.Chunk 71 | } 72 | 73 | return r.Offset(), nil 74 | } 75 | 76 | // readChunksFrom reads the list of chunks from the reader 77 | func readChunksFrom(r *iostream.Reader) ([]header, error) { 78 | size, err := r.ReadUvarint() 79 | if err != nil { 80 | return nil, err 81 | } 82 | 83 | v := make([]header, size) 84 | var temp [12]byte 85 | for i := 0; i < int(size); i++ { 86 | if _, err := io.ReadFull(r, temp[:]); err != nil { 87 | return nil, err 88 | } 89 | 90 | v[i].Chunk = Chunk(binary.BigEndian.Uint32(temp[0:4])) 91 | v[i].Start = binary.BigEndian.Uint32(temp[4:8]) 92 | v[i].Value = binary.BigEndian.Uint32(temp[8:12]) 93 | } 94 | return v, nil 95 | } 96 | 97 | // toBytes converts a string to a byte slice without allocating. 98 | func toBytes(v string) (b []byte) { 99 | strHeader := (*reflect.StringHeader)(unsafe.Pointer(&v)) 100 | byteHeader := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 101 | byteHeader.Data = strHeader.Data 102 | 103 | l := len(v) 104 | byteHeader.Len = l 105 | byteHeader.Cap = l 106 | return 107 | } 108 | -------------------------------------------------------------------------------- /examples/simple/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "os" 6 | 7 | "github.com/kelindar/column" 8 | ) 9 | 10 | func main() { 11 | 12 | // Create a new columnar collection 13 | players := column.NewCollection() 14 | players.CreateColumn("serial", column.ForKey()) 15 | players.CreateColumn("name", column.ForString()) 16 | players.CreateColumn("active", column.ForBool()) 17 | players.CreateColumn("class", column.ForEnum()) 18 | players.CreateColumn("race", column.ForEnum()) 19 | players.CreateColumn("age", column.ForInt()) 20 | players.CreateColumn("hp", column.ForInt()) 21 | players.CreateColumn("mp", column.ForInt()) 22 | players.CreateColumn("balance", column.ForFloat64()) 23 | players.CreateColumn("gender", column.ForEnum()) 24 | players.CreateColumn("guild", column.ForEnum()) 25 | players.CreateColumn("location", column.ForRecord(func() *Location { 26 | return new(Location) 27 | })) 28 | 29 | // index on humans 30 | players.CreateIndex("human", "race", func(r column.Reader) bool { 31 | return r.String() == "human" 32 | }) 33 | 34 | // index for mages 35 | players.CreateIndex("mage", "class", func(r column.Reader) bool { 36 | return r.String() == "mage" 37 | }) 38 | 39 | // index for old 40 | players.CreateIndex("old", "age", func(r column.Reader) bool { 41 | return r.Int() >= 30 42 | }) 43 | 44 | // Load the items into the collection 45 | loaded := loadFixture("players.json") 46 | players.Query(func(txn *column.Txn) error { 47 | for _, v := range loaded { 48 | txn.InsertKey(v.Serial, func(r column.Row) error { 49 | r.SetKey(v.Serial) 50 | r.SetString("name", v.Name) 51 | r.SetBool("active", v.Active) 52 | r.SetEnum("class", v.Class) 53 | r.SetEnum("race", v.Race) 54 | r.SetInt("age", v.Age) 55 | r.SetInt("hp", v.Hp) 56 | r.SetInt("mp", v.Mp) 57 | r.SetFloat64("balance", v.Balance) 58 | r.SetEnum("gender", v.Gender) 59 | r.SetEnum("guild", v.Guild) 60 | r.SetRecord("location", &v.Location) 61 | return nil 62 | }) 63 | } 64 | return nil 65 | }) 66 | 67 | // Run an indexed query 68 | players.Query(func(txn *column.Txn) error { 69 | name := txn.String("name") 70 | return txn.With("human", "mage", "old").Range(func(idx uint32) { 71 | value, _ := name.Get() 72 | println("old mage, human:", value) 73 | }) 74 | }) 75 | } 76 | 77 | // loadFixture loads a fixture by its name 78 | func loadFixture(name string) []Player { 79 | b, err := os.ReadFile("../../fixtures/" + name) 80 | if err != nil { 81 | panic(err) 82 | } 83 | 84 | var data []Player 85 | if err := json.Unmarshal(b, &data); err != nil { 86 | panic(err) 87 | } 88 | 89 | return data 90 | } 91 | 92 | // --------------------------- Player ---------------------------- 93 | 94 | type Player struct { 95 | Serial string `json:"serial"` 96 | Name string `json:"name"` 97 | Active bool `json:"active"` 98 | Class string `json:"class"` 99 | Race string `json:"race"` 100 | Age int `json:"age"` 101 | Hp int `json:"hp"` 102 | Mp int `json:"mp"` 103 | Balance float64 `json:"balance"` 104 | Gender string `json:"gender"` 105 | Guild string `json:"guild"` 106 | Location Location `json:"location"` 107 | } 108 | 109 | type Location struct { 110 | X float64 `json:"x"` 111 | Y float64 `json:"y"` 112 | } 113 | 114 | func (l Location) MarshalBinary() ([]byte, error) { 115 | return json.Marshal(l) 116 | } 117 | 118 | func (l *Location) UnmarshalBinary(b []byte) error { 119 | return json.Unmarshal(b, l) 120 | } 121 | -------------------------------------------------------------------------------- /commit/log_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "bytes" 8 | "fmt" 9 | "io" 10 | "os" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | // --------------------------- Commit Encoding ---------------------------- 17 | 18 | func TestCommitWriteToFailures(t *testing.T) { 19 | for size := 0; size < 30; size++ { 20 | output := &limitWriter{Limit: size} 21 | commit := newCommit(1) 22 | _, err := commit.WriteTo(output) 23 | assert.Error(t, err) 24 | } 25 | } 26 | 27 | func TestCommitReadFromFailures(t *testing.T) { 28 | commit := newCommit(1) 29 | buffer := bytes.NewBuffer(nil) 30 | n, err := commit.WriteTo(buffer) 31 | assert.NoError(t, err) 32 | 33 | for size := 0; size < int(n)-1; size++ { 34 | output := new(Commit) 35 | _, err := output.ReadFrom(bytes.NewReader(buffer.Bytes()[:size])) 36 | assert.Error(t, err) 37 | } 38 | } 39 | 40 | func TestFileCopy(t *testing.T) { 41 | dst := bytes.NewBuffer(nil) 42 | log := Open(&limitWriter{}) 43 | assert.NoError(t, log.Copy(dst)) 44 | } 45 | 46 | func TestFileCopySeekErr(t *testing.T) { 47 | dst := bytes.NewBuffer(nil) 48 | log := Open(&limitWriter{ 49 | SeekErr: io.ErrShortBuffer, 50 | }) 51 | 52 | assert.Error(t, log.Copy(dst)) 53 | } 54 | 55 | func newCommit(id int) Commit { 56 | return Commit{ 57 | ID: uint64(id), 58 | Chunk: 0, 59 | Updates: []*Buffer{ 60 | newInterleaved("a"), 61 | newInterleaved("b"), 62 | }, 63 | } 64 | } 65 | 66 | // --------------------------- Log Operations ---------------------------- 67 | 68 | func TestLogAppendRange(t *testing.T) { 69 | buffer := bytes.NewBuffer(nil) 70 | logger := Open(buffer) 71 | 72 | assert.NoError(t, logger.Append(newCommit(1))) 73 | assert.NoError(t, logger.Append(newCommit(2))) 74 | 75 | var arr []uint64 76 | assert.NoError(t, logger.Range(func(commit Commit) error { 77 | arr = append(arr, commit.ID) 78 | return nil 79 | })) 80 | 81 | assert.Equal(t, []uint64{1, 2}, arr) 82 | } 83 | 84 | func TestLogRangeFailures(t *testing.T) { 85 | buffer := bytes.NewBuffer(nil) 86 | logger := Open(buffer) 87 | assert.NoError(t, logger.Append(newCommit(1))) 88 | assert.NoError(t, logger.Append(newCommit(2))) 89 | 90 | n := buffer.Len() 91 | for size := 0; size < int(n)-1; size++ { 92 | tmp := bytes.NewBuffer(buffer.Bytes()[:size]) 93 | out := Open(tmp) 94 | 95 | count := 0 96 | out.Range(func(commit Commit) error { 97 | count++ 98 | return nil 99 | }) 100 | assert.Less(t, count, 2, fmt.Sprintf("size=%v", size)) 101 | } 102 | } 103 | 104 | func TestLogRangeStopOnError(t *testing.T) { 105 | buffer := bytes.NewBuffer(nil) 106 | logger := Open(buffer) 107 | assert.NoError(t, logger.Append(newCommit(1))) 108 | assert.NoError(t, logger.Append(newCommit(2))) 109 | 110 | assert.Error(t, logger.Range(func(commit Commit) error { 111 | return io.ErrClosedPipe 112 | })) 113 | } 114 | 115 | func TestLogOpenFile(t *testing.T) { 116 | name := "commit.log" 117 | logger, err := OpenFile(name) 118 | defer os.Remove(name) 119 | defer logger.Close() 120 | 121 | assert.NoError(t, err) 122 | assert.NotNil(t, logger) 123 | assert.Equal(t, "commit.log", logger.Name()) 124 | } 125 | 126 | func TestLogOpenTemp(t *testing.T) { 127 | logger, err := OpenTemp() 128 | defer os.Remove(logger.Name()) 129 | defer logger.Close() 130 | 131 | assert.NoError(t, err) 132 | assert.NotNil(t, logger) 133 | assert.NotEmpty(t, logger.Name()) 134 | } 135 | 136 | func TestLogOpenFileInvalid(t *testing.T) { 137 | logger, err := OpenFile("") 138 | assert.Error(t, err) 139 | assert.Nil(t, logger) 140 | } 141 | -------------------------------------------------------------------------------- /examples/bench/README.md: -------------------------------------------------------------------------------- 1 | # Concurrency Benchmark 2 | 3 | This is an example benchmark with various workloads (90% read / 10% write, etc) on a collection of 1 million elements with different goroutine pools. In this example we're combining two types of transactions: 4 | 5 | - Read transactions that update a random element (point-read). 6 | - Write transactions that update a random element (point-write). 7 | 8 | Note that the goal of this benchmark is to validate concurrency, not throughput this represents the current "best" case scenario when the updates are random and do less likely to incur contention. Reads, however quite often would hit the same chunks as only the index itself is randomized. 9 | 10 | ## Results 11 | 12 | Below are some results from running on my 8-core machine (Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz). 13 | 14 | ``` 15 | WORK PROCS READ RATE WRITE RATE 16 | 100%-0% 1 6,080,402 txn/s 0 txn/s 17 | 100%-0% 2 11,280,415 txn/s 0 txn/s 18 | 100%-0% 4 23,909,267 txn/s 0 txn/s 19 | 100%-0% 8 44,142,401 txn/s 0 txn/s 20 | 100%-0% 16 43,839,560 txn/s 0 txn/s 21 | 100%-0% 32 45,981,323 txn/s 0 txn/s 22 | 100%-0% 64 42,550,034 txn/s 0 txn/s 23 | 100%-0% 128 41,748,237 txn/s 0 txn/s 24 | 100%-0% 256 42,838,515 txn/s 0 txn/s 25 | 100%-0% 512 44,023,907 txn/s 0 txn/s 26 | 90%-10% 1 5,275,465 txn/s 582,720 txn/s 27 | 90%-10% 2 7,739,053 txn/s 895,427 txn/s 28 | 90%-10% 4 9,355,436 txn/s 1,015,179 txn/s 29 | 90%-10% 8 8,605,764 txn/s 972,278 txn/s 30 | 90%-10% 16 10,254,677 txn/s 1,138,855 txn/s 31 | 90%-10% 32 10,231,753 txn/s 1,146,337 txn/s 32 | 90%-10% 64 10,708,470 txn/s 1,190,486 txn/s 33 | 90%-10% 128 9,863,114 txn/s 1,111,391 txn/s 34 | 90%-10% 256 9,149,044 txn/s 1,008,791 txn/s 35 | 90%-10% 512 9,131,921 txn/s 1,017,933 txn/s 36 | 50%-50% 1 2,308,520 txn/s 2,323,510 txn/s 37 | 50%-50% 2 2,387,979 txn/s 2,370,993 txn/s 38 | 50%-50% 4 2,381,743 txn/s 2,321,850 txn/s 39 | 50%-50% 8 2,250,533 txn/s 2,293,409 txn/s 40 | 50%-50% 16 2,272,368 txn/s 2,272,368 txn/s 41 | 50%-50% 32 2,181,658 txn/s 2,268,687 txn/s 42 | 50%-50% 64 2,245,193 txn/s 2,228,612 txn/s 43 | 50%-50% 128 2,172,485 txn/s 2,124,144 txn/s 44 | 50%-50% 256 1,871,648 txn/s 1,830,572 txn/s 45 | 50%-50% 512 1,489,572 txn/s 1,525,730 txn/s 46 | 10%-90% 1 383,770 txn/s 3,350,996 txn/s 47 | 10%-90% 2 318,691 txn/s 2,969,129 txn/s 48 | 10%-90% 4 316,425 txn/s 2,826,869 txn/s 49 | 10%-90% 8 341,467 txn/s 2,751,654 txn/s 50 | 10%-90% 16 300,528 txn/s 2,861,470 txn/s 51 | 10%-90% 32 349,121 txn/s 2,932,224 txn/s 52 | 10%-90% 64 344,824 txn/s 2,869,017 txn/s 53 | 10%-90% 128 287,559 txn/s 2,718,741 txn/s 54 | 10%-90% 256 253,480 txn/s 2,366,967 txn/s 55 | 10%-90% 512 220,717 txn/s 2,102,277 txn/s 56 | 0%-100% 1 0 txn/s 3,601,751 txn/s 57 | 0%-100% 2 0 txn/s 3,054,833 txn/s 58 | 0%-100% 4 0 txn/s 3,171,539 txn/s 59 | 0%-100% 8 0 txn/s 2,962,326 txn/s 60 | 0%-100% 16 0 txn/s 2,986,498 txn/s 61 | 0%-100% 32 0 txn/s 3,068,877 txn/s 62 | 0%-100% 64 0 txn/s 2,994,055 txn/s 63 | 0%-100% 128 0 txn/s 2,802,362 txn/s 64 | 0%-100% 256 0 txn/s 2,444,133 txn/s 65 | 0%-100% 512 0 txn/s 2,180,372 txn/s 66 | ``` 67 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= 5 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= 6 | github.com/kelindar/async v1.1.0 h1:uCO6Wn7kuhmRoG9z26+onU2+MZ7vgscRMlFUIAjyPpo= 7 | github.com/kelindar/async v1.1.0/go.mod h1:bJRlwaRiqdHi+4dpVDNHdwgyRyk6TxpA21fByLf7hIY= 8 | github.com/kelindar/bitmap v1.4.1 h1:Ih0BWMYXkkZxPMU536DsQKRhdvqFl7tuNjImfLJWC6E= 9 | github.com/kelindar/bitmap v1.4.1/go.mod h1:4QyD+TDbfgy8oYB9oC4JzqfudYCYIjhbSP7iLraP+28= 10 | github.com/kelindar/intmap v1.1.0 h1:S+YEDvw5FQus5UJDEG+xsLp8il3BTYqBMkkuVVZPMH8= 11 | github.com/kelindar/intmap v1.1.0/go.mod h1:tDanawPWq1B0HC+X3W8Z6IKNrJqxjruy6CdyTlf6Nic= 12 | github.com/kelindar/iostream v1.3.0 h1:Bz2qQabipZlF1XCk64bnxsGLete+iHtayGPeWVpbwbo= 13 | github.com/kelindar/iostream v1.3.0/go.mod h1:MkjMuVb6zGdPQVdwLnFRO0xOTOdDvBWTztFmjRDQkXk= 14 | github.com/kelindar/simd v1.1.2 h1:KduKb+M9cMY2HIH8S/cdJyD+5n5EGgq+Aeeleos55To= 15 | github.com/kelindar/simd v1.1.2/go.mod h1:inq4DFudC7W8L5fhxoeZflLRNpWSs0GNx6MlWFvuvr0= 16 | github.com/kelindar/smutex v1.0.0 h1:+LIZYwPz+v3IWPOse764fNaVQGMVxKV6mbD6OWjQV3o= 17 | github.com/kelindar/smutex v1.0.0/go.mod h1:nMbCZeAHWCsY9Kt4JqX7ETd+NJeR6Swy9im+Th+qUZQ= 18 | github.com/kelindar/xxrand v1.0.2 h1:tODvTkfkYTPUE0W1Tslli7SWng8+Y1hiRI8upDUZIA0= 19 | github.com/kelindar/xxrand v1.0.2/go.mod h1:tb7XX0TvlKSIsCqkVUs7GAWdkeab3Ln2vWWxHEADDuA= 20 | github.com/klauspost/compress v1.16.6 h1:91SKEy4K37vkp255cJ8QesJhjyRO0hn9i9G0GoUwLsk= 21 | github.com/klauspost/compress v1.16.6/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= 22 | github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= 23 | github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 24 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 25 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 26 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 27 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 28 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 29 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 30 | github.com/tidwall/btree v1.6.0 h1:LDZfKfQIBHGHWSwckhXI0RPSXzlo+KYdjK7FWSqOzzg= 31 | github.com/tidwall/btree v1.6.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= 32 | github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= 33 | github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= 34 | github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= 35 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 36 | golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= 37 | golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 38 | golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 39 | golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= 40 | golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 41 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 42 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 43 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 44 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 45 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 46 | -------------------------------------------------------------------------------- /commit/log.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "io" 8 | "os" 9 | "sync" 10 | 11 | "github.com/kelindar/iostream" 12 | "github.com/klauspost/compress/s2" 13 | ) 14 | 15 | // Logger represents a contract that a commit logger must implement 16 | type Logger interface { 17 | Append(commit Commit) error 18 | } 19 | 20 | var _ Logger = new(Channel) 21 | var _ Logger = new(Log) 22 | 23 | // --------------------------- Channel ---------------------------- 24 | 25 | // Channel represents an impementation of a commit writer that simply sends each commit 26 | // into the channel. 27 | type Channel chan Commit 28 | 29 | // Append clones the commit and writes it into the logger 30 | func (w Channel) Append(commit Commit) error { 31 | w <- commit.Clone() 32 | return nil 33 | } 34 | 35 | // --------------------------- Log ---------------------------- 36 | 37 | // Log represents a commit log that can be used to write the changes to the collection 38 | // during a snapshot. It also supports reading a commit log back. 39 | type Log struct { 40 | lock sync.Mutex 41 | source io.Reader 42 | writer *iostream.Writer 43 | reader *iostream.Reader 44 | } 45 | 46 | // Open opens a commit log stream for both read and write. 47 | func Open(source io.Reader) *Log { 48 | log := &Log{ 49 | source: source, 50 | reader: iostream.NewReader(s2.NewReader(source)), 51 | } 52 | 53 | if rw, ok := source.(io.Writer); ok { 54 | log.writer = iostream.NewWriter(s2.NewWriter(rw)) 55 | } 56 | return log 57 | } 58 | 59 | // OpenFile opens a specified commit log file in a read/write mode. If 60 | // the file does not exist, it will create it. 61 | func OpenFile(filename string) (*Log, error) { 62 | return openFile(os.OpenFile(filename, os.O_RDWR|os.O_CREATE, os.ModePerm)) 63 | } 64 | 65 | // OpenTemp opens a temporary commit log file with read/write permissions 66 | func OpenTemp() (*Log, error) { 67 | return openFile(os.CreateTemp("", "column_*.log")) 68 | } 69 | 70 | // openFile opens a file or returns the error provided 71 | func openFile(file *os.File, err error) (*Log, error) { 72 | if err != nil { 73 | return nil, err 74 | } 75 | 76 | return Open(file), nil 77 | } 78 | 79 | // Append writes the commit into the log destination 80 | func (l *Log) Append(commit Commit) (err error) { 81 | l.lock.Lock() 82 | defer l.lock.Unlock() 83 | 84 | // Write the commit into the stream 85 | if _, err = commit.WriteTo(l.writer); err == nil { 86 | err = l.writer.Flush() 87 | } 88 | return 89 | } 90 | 91 | // Range iterates over all the commits in the log and calls the provided 92 | // callback function on each of them. If the callback returns an error, the 93 | // iteration will stop. 94 | func (l *Log) Range(fn func(Commit) error) error { 95 | l.lock.Lock() 96 | defer l.lock.Unlock() 97 | 98 | for { 99 | var commit Commit 100 | _, err := commit.ReadFrom(l.reader) 101 | switch { 102 | case err == io.EOF: 103 | return nil 104 | case err != nil: 105 | return err 106 | } 107 | 108 | // Read the commit 109 | if err := fn(commit); err != nil { 110 | return err 111 | } 112 | } 113 | } 114 | 115 | // Name calls the corresponding Name() method on the underlying source 116 | func (l *Log) Name() (name string) { 117 | if file, ok := l.source.(interface { 118 | Name() string 119 | }); ok { 120 | name = file.Name() 121 | } 122 | return 123 | } 124 | 125 | // Copy copies the contents of the log into the destination writer. 126 | func (l *Log) Copy(dst io.Writer) error { 127 | l.lock.Lock() 128 | defer l.lock.Unlock() 129 | 130 | // Rewind to the beginning of the file, the underlying source must 131 | // implement io.Seeker for this to work. 132 | if seeker, ok := l.source.(io.Seeker); ok { 133 | if _, err := seeker.Seek(0, io.SeekStart); err != nil { 134 | return err 135 | } 136 | } 137 | 138 | // Append the pending commits to the destination 139 | _, err := io.Copy(dst, l.source) 140 | return err 141 | } 142 | 143 | // Close closes the source log file. 144 | func (l *Log) Close() (err error) { 145 | l.lock.Lock() 146 | defer l.lock.Unlock() 147 | if closer, ok := l.source.(io.Closer); ok { 148 | err = closer.Close() 149 | } 150 | return 151 | } 152 | -------------------------------------------------------------------------------- /examples/bench/bench.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package main 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "sync" 10 | "sync/atomic" 11 | "time" 12 | 13 | "github.com/dustin/go-humanize" 14 | "github.com/kelindar/async" 15 | "github.com/kelindar/column" 16 | "github.com/kelindar/column/fixtures" 17 | "github.com/kelindar/xxrand" 18 | ) 19 | 20 | var ( 21 | classes = []string{"fighter", "mage", "rogue"} 22 | races = []string{"human", "elf", "dwarf", "orc"} 23 | ) 24 | 25 | func main() { 26 | amount := 1000000 27 | players := column.NewCollection(column.Options{ 28 | Capacity: amount, 29 | }) 30 | createCollection(players, amount) 31 | 32 | // This runs point query benchmarks 33 | runBenchmark("Point Reads/Writes", func(writeTxn bool) (reads int, writes int) { 34 | 35 | // To avoid task granuarity problem, load up a bit more work on each 36 | // of the goroutines, a few hundred reads should be enough to amortize 37 | // the cost of scheduling goroutines, so we can actually test our code. 38 | for i := 0; i < 1000; i++ { 39 | offset := xxrand.Uint32n(uint32(amount - 1)) 40 | if writeTxn { 41 | players.QueryAt(offset, func(r column.Row) error { 42 | r.SetFloat64("balance", 0) 43 | return nil 44 | }) 45 | writes++ 46 | } else { 47 | players.QueryAt(offset, func(r column.Row) error { 48 | _, _ = r.Float64("balance") 49 | return nil 50 | }) 51 | reads++ 52 | } 53 | } 54 | return 55 | }) 56 | } 57 | 58 | // runBenchmark runs a benchmark 59 | func runBenchmark(name string, fn func(bool) (int, int)) { 60 | fmt.Printf("Benchmarking %v ...\n", name) 61 | fmt.Printf("%7v\t%6v\t%17v\t%13v\n", "WORK", "PROCS", "READ RATE", "WRITE RATE") 62 | for _, workload := range []int{0, 10, 50, 90, 100} { 63 | 64 | // Iterate over various concurrency levels 65 | for _, n := range []int{1, 2, 4, 8, 16, 32, 64, 128, 256, 512} { 66 | work := make(chan async.Task, n) 67 | pool := async.Consume(context.Background(), n, work) 68 | 69 | var reads, writes int64 70 | var wg sync.WaitGroup 71 | start := time.Now() 72 | for time.Since(start) < time.Second { 73 | wg.Add(1) 74 | work <- async.NewTask(func(ctx context.Context) (interface{}, error) { 75 | defer wg.Done() 76 | 77 | r, w := fn(xxrand.Intn(100) < workload) 78 | atomic.AddInt64(&reads, int64(r)) 79 | atomic.AddInt64(&writes, int64(w)) 80 | return nil, nil 81 | }) 82 | } 83 | 84 | wg.Wait() 85 | pool.Cancel() 86 | 87 | elapsed := time.Since(start) 88 | fmt.Printf("%v%%-%v%%\t%6v\t%17v\t%13v\n", 100-workload, workload, n, 89 | humanize.Comma(int64(float64(reads)/elapsed.Seconds()))+" txn/s", 90 | humanize.Comma(int64(float64(writes)/elapsed.Seconds()))+" txn/s", 91 | ) 92 | } 93 | } 94 | } 95 | 96 | // createCollection loads a collection of players 97 | func createCollection(out *column.Collection, amount int) *column.Collection { 98 | out.CreateColumn("serial", column.ForEnum()) 99 | out.CreateColumn("name", column.ForEnum()) 100 | out.CreateColumn("active", column.ForBool()) 101 | out.CreateColumn("class", column.ForEnum()) 102 | out.CreateColumn("race", column.ForEnum()) 103 | out.CreateColumn("age", column.ForInt()) 104 | out.CreateColumn("hp", column.ForInt()) 105 | out.CreateColumn("mp", column.ForInt()) 106 | out.CreateColumn("balance", column.ForFloat64()) 107 | out.CreateColumn("gender", column.ForEnum()) 108 | out.CreateColumn("guild", column.ForEnum()) 109 | 110 | for _, v := range classes { 111 | class := v 112 | out.CreateIndex(class, "class", func(r column.Reader) bool { 113 | return r.String() == class 114 | }) 115 | } 116 | 117 | for _, v := range races { 118 | race := v 119 | out.CreateIndex(race, "race", func(r column.Reader) bool { 120 | return r.String() == race 121 | }) 122 | } 123 | // Load the data in 124 | data := fixtures.Players() 125 | for i := 0; i < amount/len(data); i++ { 126 | insertPlayers(out, data) 127 | } 128 | 129 | return out 130 | } 131 | 132 | // insertPlayers inserts players 133 | func insertPlayers(dst *column.Collection, data []fixtures.Player) error { 134 | return dst.Query(func(txn *column.Txn) error { 135 | for _, v := range data { 136 | txn.Insert(func(r column.Row) error { 137 | r.SetEnum("serial", v.Serial) 138 | r.SetEnum("name", v.Name) 139 | r.SetBool("active", v.Active) 140 | r.SetEnum("class", v.Class) 141 | r.SetEnum("race", v.Race) 142 | r.SetInt("age", v.Age) 143 | r.SetInt("hp", v.Hp) 144 | r.SetInt("mp", v.Mp) 145 | r.SetFloat64("balance", v.Balance) 146 | r.SetEnum("gender", v.Gender) 147 | r.SetEnum("guild", v.Guild) 148 | return nil 149 | }) 150 | } 151 | return nil 152 | }) 153 | } 154 | -------------------------------------------------------------------------------- /column_record.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "encoding" 8 | "reflect" 9 | "sync" 10 | "unsafe" 11 | 12 | "github.com/kelindar/column/commit" 13 | ) 14 | 15 | type recordType interface { 16 | encoding.BinaryMarshaler 17 | encoding.BinaryUnmarshaler 18 | } 19 | 20 | // --------------------------- Record ---------------------------- 21 | 22 | // columnRecord represents a typed column that is persisted using binary marshaler 23 | type columnRecord struct { 24 | columnString 25 | pool *sync.Pool 26 | } 27 | 28 | // ForRecord creates a new column that contains a type marshaled into/from binary. It requires 29 | // a constructor for the type as well as optional merge function. If merge function is 30 | // set to nil, "overwrite" strategy will be used. 31 | func ForRecord[T recordType](new func() T, opts ...func(*option[T])) Column { 32 | mergeFunc := configure(opts, option[T]{ 33 | Merge: func(value, delta T) T { return delta }, 34 | }).Merge 35 | 36 | pool := &sync.Pool{ 37 | New: func() any { return new() }, 38 | } 39 | 40 | // Merge function that decodes, merges and re-encodes records into their 41 | // respective binary representation. 42 | mergeRecord := func(v, d string) string { 43 | value := pool.Get().(T) 44 | delta := pool.Get().(T) 45 | defer pool.Put(value) 46 | defer pool.Put(delta) 47 | 48 | // Unmarshal the existing value 49 | err1 := value.UnmarshalBinary(s2b(v)) 50 | err2 := delta.UnmarshalBinary(s2b(d)) 51 | if err1 != nil || err2 != nil { 52 | return v 53 | } 54 | 55 | // Apply the user-defined merging strategy and marshal it back 56 | merged := mergeFunc(value, delta) 57 | if encoded, err := merged.MarshalBinary(); err == nil { 58 | return b2s(&encoded) 59 | } 60 | return v 61 | } 62 | 63 | return &columnRecord{ 64 | pool: pool, 65 | columnString: columnString{ 66 | chunks: make(chunks[string], 0, 4), 67 | option: option[string]{ 68 | Merge: mergeRecord, 69 | }, 70 | }, 71 | } 72 | } 73 | 74 | // Value returns the value at the given index 75 | // TODO: should probably get rid of this and use an `rdRecord` instead 76 | func (c *columnRecord) Value(idx uint32) (out any, has bool) { 77 | if v, ok := c.columnString.Value(idx); ok { 78 | out = c.pool.New() 79 | has = out.(encoding.BinaryUnmarshaler).UnmarshalBinary(s2b(v.(string))) == nil 80 | } 81 | return 82 | } 83 | 84 | // --------------------------- Writer ---------------------------- 85 | 86 | // rwRecord represents read-write accessor for primary keys. 87 | type rwRecord struct { 88 | rdRecord 89 | writer *commit.Buffer 90 | } 91 | 92 | // Set sets the value at the current transaction index 93 | func (s rwRecord) Set(value encoding.BinaryMarshaler) error { 94 | return s.write(commit.Put, value.MarshalBinary) 95 | } 96 | 97 | // Merge atomically merges a delta to the value at the current transaction cursor 98 | func (s rwRecord) Merge(delta encoding.BinaryMarshaler) error { 99 | return s.write(commit.Merge, delta.MarshalBinary) 100 | } 101 | 102 | // write writes the operation 103 | func (s rwRecord) write(op commit.OpType, encodeDelta func() ([]byte, error)) error { 104 | v, err := encodeDelta() 105 | if err == nil { 106 | s.writer.PutBytes(op, *s.cursor, v) 107 | } 108 | return err 109 | } 110 | 111 | // As creates a read-write accessor for a specific record type. 112 | func (txn *Txn) Record(columnName string) rwRecord { 113 | return rwRecord{ 114 | rdRecord: readRecordOf(txn, columnName), 115 | writer: txn.bufferFor(columnName), 116 | } 117 | } 118 | 119 | // --------------------------- Reader ---------------------------- 120 | 121 | // rdRecord represents a read-only accessor for records 122 | type rdRecord reader[*columnRecord] 123 | 124 | // Get loads the value at the current transaction index 125 | func (s rdRecord) Get() (any, bool) { 126 | value := s.reader.pool.New().(encoding.BinaryUnmarshaler) 127 | if s.Unmarshal(value.UnmarshalBinary) { 128 | return value, true 129 | } 130 | 131 | return nil, false 132 | } 133 | 134 | // Unmarshal loads the value at the current transaction index using a 135 | // specified function to decode the value. 136 | func (s rdRecord) Unmarshal(decode func(data []byte) error) bool { 137 | encoded, ok := s.reader.LoadString(*s.cursor) 138 | if !ok { 139 | return false 140 | } 141 | 142 | return decode(s2b(encoded)) == nil 143 | } 144 | 145 | // readRecordOf creates a read-only accessor for readers 146 | func readRecordOf(txn *Txn, columnName string) rdRecord { 147 | return rdRecord(readerFor[*columnRecord](txn, columnName)) 148 | } 149 | 150 | // --------------------------- Convert ---------------------------- 151 | 152 | // b2s converts byte slice to a string without allocating. 153 | func b2s(b *[]byte) string { 154 | return *(*string)(unsafe.Pointer(b)) 155 | } 156 | 157 | // s2b converts a string to a byte slice without allocating. 158 | func s2b(v string) (b []byte) { 159 | strHeader := (*reflect.StringHeader)(unsafe.Pointer(&v)) 160 | byteHeader := (*reflect.SliceHeader)(unsafe.Pointer(&b)) 161 | byteHeader.Data = strHeader.Data 162 | 163 | l := len(v) 164 | byteHeader.Len = l 165 | byteHeader.Cap = l 166 | return 167 | } 168 | -------------------------------------------------------------------------------- /examples/million/main.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package main 5 | 6 | import ( 7 | "bytes" 8 | "fmt" 9 | "os" 10 | "time" 11 | 12 | "github.com/kelindar/column" 13 | "github.com/kelindar/column/fixtures" 14 | ) 15 | 16 | func main() { 17 | amount, runs := 10000000, 20 18 | players := column.NewCollection(column.Options{ 19 | Capacity: amount, 20 | }) 21 | 22 | // insert the data first 23 | measure("insert", fmt.Sprintf("%v rows", amount), func() { 24 | createCollection(players, amount) 25 | }, 1) 26 | 27 | // snapshot the dataset 28 | measure("snapshot", fmt.Sprintf("%v rows", amount), func() { 29 | buffer := bytes.NewBuffer(nil) 30 | players.Snapshot(buffer) 31 | }, 10) 32 | 33 | // run a full scan 34 | measure("full scan", "age >= 30", func() { 35 | players.Query(func(txn *column.Txn) error { 36 | count := txn.WithFloat("age", func(v float64) bool { 37 | return v >= 30 38 | }).Count() 39 | fmt.Printf("-> result = %v\n", count) 40 | return nil 41 | }) 42 | }, runs) 43 | 44 | // run a full scan 45 | measure("full scan", `class == "rogue"`, func() { 46 | players.Query(func(txn *column.Txn) error { 47 | count := txn.WithString("class", func(v string) bool { 48 | return v == "rogue" 49 | }).Count() 50 | fmt.Printf("-> result = %v\n", count) 51 | return nil 52 | }) 53 | }, runs) 54 | 55 | // run a query over human mages 56 | measure("indexed query", "human mages", func() { 57 | players.Query(func(txn *column.Txn) error { 58 | fmt.Printf("-> result = %v\n", txn.With("human", "mage").Count()) 59 | return nil 60 | }) 61 | }, runs*1000) 62 | 63 | // run a query over human mages 64 | measure("indexed query", "human female mages", func() { 65 | players.Query(func(txn *column.Txn) error { 66 | fmt.Printf("-> result = %v\n", txn.With("human", "female", "mage").Count()) 67 | return nil 68 | }) 69 | }, runs*1000) 70 | 71 | // update everyone 72 | measure("update", "balance of everyone", func() { 73 | updates := 0 74 | players.Query(func(txn *column.Txn) error { 75 | balance := txn.Float64("balance") 76 | return txn.Range(func(idx uint32) { 77 | updates++ 78 | balance.Set(1000.0) 79 | }) 80 | }) 81 | fmt.Printf("-> updated %v rows\n", updates) 82 | }, runs) 83 | 84 | // update age of mages 85 | measure("update", "age of mages", func() { 86 | updates := 0 87 | players.Query(func(txn *column.Txn) error { 88 | age := txn.Int("age") 89 | return txn.With("mage").Range(func(idx uint32) { 90 | updates++ 91 | age.Set(99) 92 | }) 93 | }) 94 | fmt.Printf("-> updated %v rows\n", updates) 95 | }, runs) 96 | } 97 | 98 | // createCollection loads a collection of players 99 | func createCollection(out *column.Collection, amount int) *column.Collection { 100 | out.CreateColumn("serial", column.ForEnum()) 101 | out.CreateColumn("name", column.ForEnum()) 102 | out.CreateColumn("active", column.ForBool()) 103 | out.CreateColumn("class", column.ForEnum()) 104 | out.CreateColumn("race", column.ForEnum()) 105 | out.CreateColumn("age", column.ForInt()) 106 | out.CreateColumn("hp", column.ForInt()) 107 | out.CreateColumn("mp", column.ForInt()) 108 | out.CreateColumn("balance", column.ForFloat64()) 109 | out.CreateColumn("gender", column.ForEnum()) 110 | out.CreateColumn("guild", column.ForEnum()) 111 | 112 | // index for humans 113 | out.CreateIndex("human", "race", func(r column.Reader) bool { 114 | return r.String() == "human" 115 | }) 116 | 117 | // index for mages 118 | out.CreateIndex("mage", "class", func(r column.Reader) bool { 119 | return r.String() == "mage" 120 | }) 121 | 122 | // index for males 123 | out.CreateIndex("male", "gender", func(r column.Reader) bool { 124 | return r.String() == "male" 125 | }) 126 | 127 | // index for females 128 | out.CreateIndex("female", "gender", func(r column.Reader) bool { 129 | return r.String() == "female" 130 | }) 131 | 132 | // Load the data in 133 | data := fixtures.Players() 134 | for i := 0; i < amount/len(data); i++ { 135 | insertPlayers(out, data) 136 | } 137 | 138 | return out 139 | } 140 | 141 | // insertPlayers inserts players 142 | func insertPlayers(dst *column.Collection, data []fixtures.Player) error { 143 | return dst.Query(func(txn *column.Txn) error { 144 | for _, v := range data { 145 | txn.Insert(func(r column.Row) error { 146 | r.SetEnum("serial", v.Serial) 147 | r.SetEnum("name", v.Name) 148 | r.SetBool("active", v.Active) 149 | r.SetEnum("class", v.Class) 150 | r.SetEnum("race", v.Race) 151 | r.SetInt("age", v.Age) 152 | r.SetInt("hp", v.Hp) 153 | r.SetInt("mp", v.Mp) 154 | r.SetFloat64("balance", v.Balance) 155 | r.SetEnum("gender", v.Gender) 156 | r.SetEnum("guild", v.Guild) 157 | return nil 158 | }) 159 | } 160 | return nil 161 | }) 162 | } 163 | 164 | // measure runs a function and measures it 165 | func measure(action, name string, fn func(), iterations int) { 166 | defer func(start time.Time, stdout *os.File) { 167 | os.Stdout = stdout 168 | elapsed := time.Since(start) / time.Duration(iterations) 169 | fmt.Printf("-> %v took %v\n", action, elapsed.String()) 170 | }(time.Now(), os.Stdout) 171 | 172 | fmt.Println() 173 | fmt.Printf("running %v of %v...\n", action, name) 174 | 175 | // Run a few times so the results are more stable 176 | null, _ := os.Open(os.DevNull) 177 | for i := 0; i < iterations; i++ { 178 | if i > 0 { // Silence subsequent runs 179 | os.Stdout = null 180 | } 181 | 182 | fn() 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /commit/commit.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "io" 8 | "sync/atomic" 9 | "time" 10 | 11 | "github.com/kelindar/bitmap" 12 | "github.com/kelindar/iostream" 13 | ) 14 | 15 | // --------------------------- ID ---------------------------- 16 | 17 | var id uint64 = uint64(time.Now().UnixNano()) 18 | 19 | // Next returns the next commit ID 20 | func Next() uint64 { 21 | return atomic.AddUint64(&id, 1) 22 | } 23 | 24 | // --------------------------- Chunk ---------------------------- 25 | 26 | const ( 27 | bitmapShift = chunkShift - 6 28 | bitmapSize = 1 << bitmapShift 29 | chunkShift = 14 // 16K 30 | chunkSize = 1 << chunkShift 31 | ) 32 | 33 | // Chunk represents a chunk number 34 | type Chunk uint32 35 | 36 | // ChunkAt returns the chunk number at a given index 37 | func ChunkAt(index uint32) Chunk { 38 | return Chunk(index >> chunkShift) 39 | } 40 | 41 | // OfBitmap computes a chunk for a given bitmap 42 | func (c Chunk) OfBitmap(v bitmap.Bitmap) bitmap.Bitmap { 43 | const shift = chunkShift - 6 44 | x1 := min(int32(c+1)<> 31)) 71 | } 72 | 73 | // --------------------------- Commit ---------------------------- 74 | 75 | // Commit represents an individual transaction commit. If multiple chunks are committed 76 | // in the same transaction, it would result in multiple commits per transaction. 77 | type Commit struct { 78 | ID uint64 // The commit ID 79 | Chunk Chunk // The chunk number 80 | Updates []*Buffer // The update buffers 81 | } 82 | 83 | // Clone clones a commit into a new one 84 | func (c *Commit) Clone() (clone Commit) { 85 | clone.Chunk = c.Chunk 86 | for _, u := range c.Updates { 87 | if len(u.buffer) > 0 { 88 | clone.Updates = append(clone.Updates, u.Clone()) 89 | } 90 | } 91 | return 92 | } 93 | 94 | // WriteTo writes data to w until there's no more data to write or when an error occurs. The return 95 | // value n is the number of bytes written. Any error encountered during the write is also returned. 96 | func (c *Commit) WriteTo(dst io.Writer) (int64, error) { 97 | w := iostream.NewWriter(dst) 98 | 99 | // Write the chunk ID 100 | if err := w.WriteUvarint(uint64(c.Chunk)); err != nil { 101 | return w.Offset(), err 102 | } 103 | 104 | // Write the commit ID 105 | if err := w.WriteUvarint(c.ID); err != nil { 106 | return w.Offset(), err 107 | } 108 | 109 | // Write all of the columns for the current chunk 110 | reader := NewReader() 111 | if err := w.WriteRange(len(c.Updates), func(i int, w *iostream.Writer) error { 112 | buffer := c.Updates[i] 113 | 114 | // Write the column name for this buffer 115 | if err := w.WriteString(buffer.Column); err != nil { 116 | return err 117 | } 118 | 119 | // Write the number of shards in case of interleaved buffer 120 | shards := uint64(0) 121 | reader.Range(buffer, c.Chunk, func(r *Reader) { 122 | shards++ 123 | }) 124 | if err := w.WriteUvarint(shards); err != nil { 125 | return err 126 | } 127 | 128 | // Write chunk information 129 | offset := uint32(0) 130 | reader.Range(buffer, c.Chunk, func(r *Reader) { 131 | _ = w.WriteUint32(uint32(r.Offset)) // Value 132 | _ = w.WriteUint32(offset) // Offset 133 | offset += uint32(len(r.buffer)) 134 | }) 135 | 136 | // Write buffer length 137 | if err := w.WriteUvarint(uint64(offset)); err != nil { 138 | return err 139 | } 140 | 141 | // Write all chunk bytes together 142 | reader.Range(buffer, c.Chunk, func(r *Reader) { 143 | _, _ = w.Write(r.buffer) 144 | }) 145 | return nil 146 | }); err != nil { 147 | return w.Offset(), err 148 | } 149 | 150 | return w.Offset(), nil 151 | } 152 | 153 | // ReadFrom reads data from r until EOF or error. The return value n is the number of 154 | // bytes read. Any error except EOF encountered during the read is also returned. 155 | func (c *Commit) ReadFrom(src io.Reader) (int64, error) { 156 | r := iostream.NewReader(src) 157 | 158 | // Read chunk ID 159 | chunk, err := r.ReadUvarint() 160 | c.Chunk = Chunk(chunk) 161 | if err != nil { 162 | return r.Offset(), err 163 | } 164 | 165 | // Read commit ID 166 | if c.ID, err = r.ReadUvarint(); err != nil { 167 | return r.Offset(), err 168 | } 169 | 170 | // Read each update buffer in the commit 171 | if err := r.ReadRange(func(i int, r *iostream.Reader) error { 172 | buffer := NewBuffer(256) 173 | c.Updates = append(c.Updates, buffer) 174 | 175 | // Read the column name 176 | column, err := r.ReadString() 177 | if err != nil { 178 | return err 179 | } 180 | 181 | // Read the chunks array 182 | buffer.Reset(column) 183 | r.ReadRange(func(i int, r *iostream.Reader) error { 184 | header := header{ 185 | Chunk: Chunk(chunk), 186 | } 187 | 188 | // Previous offset and index in the byte array 189 | if header.Value, err = r.ReadUint32(); err != nil { 190 | return err 191 | } 192 | if header.Start, err = r.ReadUint32(); err != nil { 193 | return err 194 | } 195 | 196 | buffer.chunks = append(buffer.chunks, header) 197 | return nil 198 | }) 199 | 200 | // Read the combined buffer 201 | buffer.buffer, err = r.ReadBytes() 202 | return err 203 | }); err != nil { 204 | return r.Offset(), err 205 | } 206 | 207 | return r.Offset(), nil 208 | } 209 | -------------------------------------------------------------------------------- /commit/commit_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "bytes" 8 | "fmt" 9 | "io" 10 | "sync/atomic" 11 | "testing" 12 | 13 | "github.com/kelindar/bitmap" 14 | "github.com/stretchr/testify/assert" 15 | ) 16 | 17 | /* 18 | cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz 19 | BenchmarkColumn/chunkOf-8 8466814 136.2 ns/op 0 B/op 0 allocs/op 20 | */ 21 | func BenchmarkColumn(b *testing.B) { 22 | b.Run("chunkOf", func(b *testing.B) { 23 | var temp bitmap.Bitmap 24 | temp.Grow(2 * chunkSize) 25 | 26 | b.ReportAllocs() 27 | b.ResetTimer() 28 | for n := 0; n < b.N; n++ { 29 | for i := 0; i < 100; i++ { 30 | Chunk(1).OfBitmap(temp) 31 | } 32 | } 33 | }) 34 | } 35 | 36 | func TestCommitClone(t *testing.T) { 37 | commit := Commit{ 38 | Updates: []*Buffer{{ 39 | buffer: []byte{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, 40 | chunks: []header{{ 41 | Chunk: 0, 42 | }}, 43 | }}, 44 | } 45 | 46 | clone := commit.Clone() 47 | assert.EqualValues(t, commit, clone) 48 | } 49 | 50 | func TestWriterChannel(t *testing.T) { 51 | w := make(Channel, 1) 52 | w.Append(Commit{ 53 | Chunk: 123, 54 | }) 55 | 56 | out := <-w 57 | assert.Equal(t, 123, int(out.Chunk)) 58 | } 59 | 60 | func TestChunkMinMax(t *testing.T) { 61 | tests := []struct { 62 | chunk Chunk 63 | min, max uint32 64 | }{ 65 | {chunk: 0, min: 0, max: chunkSize - 1}, 66 | {chunk: 1, min: chunkSize, max: 2*chunkSize - 1}, 67 | {chunk: 2, min: 2 * chunkSize, max: 3*chunkSize - 1}, 68 | } 69 | 70 | for _, tc := range tests { 71 | assert.Equal(t, tc.min, tc.chunk.Min()) 72 | assert.Equal(t, tc.max, tc.chunk.Max()) 73 | } 74 | } 75 | 76 | func TestChunkAt(t *testing.T) { 77 | tests := []struct { 78 | index uint32 79 | chunk Chunk 80 | }{ 81 | {index: 0, chunk: 0}, 82 | {index: chunkSize - 1, chunk: 0}, 83 | {index: chunkSize, chunk: 1}, 84 | {index: chunkSize + 1, chunk: 1}, 85 | } 86 | 87 | for _, tc := range tests { 88 | assert.Equal(t, tc.chunk, ChunkAt(tc.index)) 89 | } 90 | } 91 | 92 | func TestChunkOf(t *testing.T) { 93 | tests := []struct { 94 | size uint32 95 | chunk Chunk 96 | expect int 97 | }{ 98 | {size: 3 * chunkSize, expect: chunkSize, chunk: 0}, 99 | {size: 3 * chunkSize, expect: chunkSize, chunk: 1}, 100 | {size: 3 * chunkSize, expect: chunkSize, chunk: 2}, 101 | {size: 3 * chunkSize, expect: 0, chunk: 3}, 102 | {size: 2*chunkSize - 70, expect: chunkSize, chunk: 0}, 103 | {size: 2*chunkSize - 70, expect: 16320, chunk: 1}, 104 | {size: 2*chunkSize - 70, expect: 0, chunk: 2}, 105 | {size: 2*chunkSize - 10, expect: chunkSize, chunk: 0}, 106 | {size: 2*chunkSize - 10, expect: chunkSize, chunk: 1}, 107 | {size: 2*chunkSize - 10, expect: 0, chunk: 2}, 108 | } 109 | 110 | for _, tc := range tests { 111 | t.Run(fmt.Sprintf("%v-%v", tc.chunk, tc.size), func(t *testing.T) { 112 | var tmp bitmap.Bitmap 113 | tmp.Grow(tc.size - 1) 114 | assert.Equal(t, tc.expect, len(tc.chunk.OfBitmap(tmp))*64) 115 | }) 116 | } 117 | } 118 | 119 | func TestMin(t *testing.T) { 120 | tests := []struct { 121 | v1, v2 int32 122 | expect int32 123 | }{ 124 | {v1: 0, v2: 0, expect: 0}, 125 | {v1: 10, v2: 0, expect: 0}, 126 | {v1: 0, v2: 10, expect: 0}, 127 | {v1: 10, v2: 20, expect: 10}, 128 | {v1: 20, v2: 10, expect: 10}, 129 | {v1: 20, v2: 20, expect: 20}, 130 | } 131 | 132 | for _, tc := range tests { 133 | t.Run(fmt.Sprintf("%v,%v", tc.v1, tc.v2), func(t *testing.T) { 134 | assert.Equal(t, int(tc.expect), int(min(tc.v1, tc.v2))) 135 | }) 136 | } 137 | } 138 | 139 | // --------------------------- Recorder ---------------------------- 140 | 141 | func TestCommitCodec(t *testing.T) { 142 | buffer := bytes.NewBuffer(nil) 143 | input := Commit{ 144 | ID: Next(), 145 | Chunk: 0, 146 | Updates: []*Buffer{ 147 | newInterleaved("a"), 148 | newInterleaved("b"), 149 | }, 150 | } 151 | 152 | // Write into the buffer 153 | n, err := input.WriteTo(buffer) 154 | assert.Equal(t, int64(197), n) 155 | assert.NoError(t, err) 156 | 157 | // Read the commit back 158 | output := Commit{} 159 | m, err := output.ReadFrom(buffer) 160 | assert.NoError(t, err) 161 | assert.Equal(t, n, m) 162 | 163 | // Make sure commit can be read back 164 | assert.Equal(t, input.ID, output.ID) 165 | assert.Equal(t, input.Chunk, output.Chunk) 166 | 167 | updates := make([]int64, 0, 64) 168 | reader := NewReader() 169 | reader.Range(output.Updates[0], 0, func(r *Reader) { 170 | for r.Next() { 171 | updates = append(updates, int64(r.Offset), r.Int64()) 172 | } 173 | }) 174 | assert.Equal(t, []int64{20, 1, 21, 2, 40, 4, 41, 5, 60, 7, 61, 8}, updates) 175 | } 176 | 177 | // newInterleaved creates a new interleaved buffer 178 | func newInterleaved(columnName string) *Buffer { 179 | buf := NewBuffer(10) 180 | buf.Reset(columnName) 181 | buf.PutInt64(Put, 20, 1) 182 | buf.PutInt64(Put, 21, 2) 183 | buf.PutInt64(Put, 20000, 3) 184 | buf.PutInt64(Put, 40, 4) 185 | buf.PutInt64(Put, 41, 5) 186 | buf.PutInt64(Put, 40000, 6) 187 | buf.PutInt64(Put, 60, 7) 188 | buf.PutInt64(Put, 61, 8) 189 | return buf 190 | } 191 | 192 | // updatesAt reads a set of int64 updates from a buffer at a given chunk 193 | func updatesAt(buffer *Buffer, chunk Chunk) (updates []int64) { 194 | reader := NewReader() 195 | reader.Range(buffer, chunk, func(r *Reader) { 196 | for r.Next() { 197 | updates = append(updates, r.Int64()) 198 | } 199 | }) 200 | return 201 | } 202 | 203 | // --------------------------- Mocks ---------------------------- 204 | 205 | type limitWriter struct { 206 | value uint32 207 | Limit int 208 | SeekErr error 209 | } 210 | 211 | func (w *limitWriter) Write(p []byte) (int, error) { 212 | if n := atomic.AddUint32(&w.value, uint32(len(p))); int(n) > w.Limit { 213 | return 0, io.ErrShortBuffer 214 | } 215 | return len(p), nil 216 | } 217 | 218 | func (w *limitWriter) Read(p []byte) (int, error) { 219 | return 0, io.EOF 220 | } 221 | 222 | func (w *limitWriter) Seek(offset int64, whence int) (int64, error) { 223 | return 0, w.SeekErr 224 | } 225 | -------------------------------------------------------------------------------- /snapshot.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "errors" 8 | "fmt" 9 | "io" 10 | "os" 11 | "sync/atomic" 12 | "unsafe" 13 | 14 | "github.com/kelindar/bitmap" 15 | "github.com/kelindar/column/commit" 16 | "github.com/kelindar/iostream" 17 | "github.com/klauspost/compress/s2" 18 | ) 19 | 20 | var ( 21 | errUnexpectedEOF = errors.New("column: unable to restore, unexpected EOF") 22 | ) 23 | 24 | // --------------------------- Commit Replay --------------------------- 25 | 26 | // Replay replays a commit on a collection, applying the changes. 27 | func (c *Collection) Replay(change commit.Commit) error { 28 | return c.Query(func(txn *Txn) error { 29 | txn.dirty.Set(uint32(change.Chunk)) 30 | for i := range change.Updates { 31 | if !change.Updates[i].IsEmpty() { 32 | txn.updates = append(txn.updates, change.Updates[i]) 33 | } 34 | } 35 | return nil 36 | }) 37 | } 38 | 39 | // --------------------------- Snapshotting --------------------------- 40 | 41 | // Restore restores the collection from the underlying snapshot reader. This operation 42 | // should be called before any of transactions, right after initialization. 43 | func (c *Collection) Restore(snapshot io.Reader) error { 44 | commits, err := c.readState(s2.NewReader(snapshot)) 45 | if err != nil { 46 | return err 47 | } 48 | 49 | // Reconcile the pending commit log 50 | return commit.Open(snapshot).Range(func(commit commit.Commit) error { 51 | lastCommit := commits[commit.Chunk] 52 | if commit.ID > lastCommit { 53 | return c.Replay(commit) 54 | } 55 | return nil 56 | }) 57 | } 58 | 59 | // Snapshot writes a collection snapshot into the underlying writer. 60 | func (c *Collection) Snapshot(dst io.Writer) error { 61 | recorder, err := c.recorderOpen() 62 | if err != nil { 63 | return err 64 | } 65 | 66 | // Take a snapshot of the current state 67 | defer os.Remove(recorder.Name()) 68 | if _, err := c.writeState(s2.NewWriter(dst)); err != nil { 69 | return err 70 | } 71 | 72 | // Close the recorder 73 | c.recorderClose() 74 | return recorder.Copy(dst) 75 | } 76 | 77 | // recorderOpen opens a recorder for commits while the snapshot is in progress 78 | func (c *Collection) recorderOpen() (log *commit.Log, err error) { 79 | if log, err = commit.OpenTemp(); err == nil { 80 | dst := (*unsafe.Pointer)(unsafe.Pointer(&c.record)) 81 | ptr := unsafe.Pointer(log) 82 | if !atomic.CompareAndSwapPointer(dst, nil, ptr) { 83 | return nil, fmt.Errorf("column: unable to snapshot, another one might be in progress") 84 | } 85 | } 86 | return 87 | } 88 | 89 | // recorderClose closes the pending commit recorder and deletes the file 90 | func (c *Collection) recorderClose() { 91 | if _, ok := c.isSnapshotting(); ok { 92 | dst := (*unsafe.Pointer)(unsafe.Pointer(&c.record)) 93 | atomic.StorePointer(dst, nil) 94 | } 95 | } 96 | 97 | // isSnapshotting loads a currently used commit log for a pending snapshot 98 | func (c *Collection) isSnapshotting() (*commit.Log, bool) { 99 | dst := (*unsafe.Pointer)(unsafe.Pointer(&c.record)) 100 | ptr := atomic.LoadPointer(dst) 101 | if ptr == nil { 102 | return nil, false 103 | } 104 | 105 | return (*commit.Log)(ptr), true 106 | } 107 | 108 | // --------------------------- Collection Encoding --------------------------- 109 | 110 | // writeState writes collection state into the specified writer. 111 | func (c *Collection) writeState(dst io.Writer) (int64, error) { 112 | writer := iostream.NewWriter(dst) 113 | buffer := c.txns.acquirePage(rowColumn) 114 | defer c.txns.releasePage(buffer) 115 | 116 | // Write the schema version 117 | if err := writer.WriteUvarint(0x1); err != nil { 118 | return writer.Offset(), err 119 | } 120 | 121 | // Load the number of columns and the max index 122 | chunks := c.chunks() 123 | columns := uint64(c.cols.Count()) + 1 // extra 'insert' column 124 | 125 | // Write the number of columns 126 | if err := writer.WriteUvarint(columns); err != nil { 127 | return writer.Offset(), err 128 | } 129 | 130 | // Write each chunk 131 | if err := writer.WriteRange(chunks, func(i int, w *iostream.Writer) error { 132 | return c.readChunk(commit.Chunk(i), func(lastCommit uint64, chunk commit.Chunk, fill bitmap.Bitmap) error { 133 | offset := chunk.Min() 134 | 135 | // Write the last written commit for this chunk 136 | if err := writer.WriteUvarint(lastCommit); err != nil { 137 | return err 138 | } 139 | 140 | // Write the inserts column 141 | buffer.Reset(rowColumn) 142 | fill.Range(func(idx uint32) { 143 | buffer.PutOperation(commit.Insert, offset+idx) 144 | }) 145 | if err := writer.WriteSelf(buffer); err != nil { 146 | return err 147 | } 148 | 149 | // Snapshot each column and write the buffer 150 | return c.cols.RangeUntil(func(column *column) error { 151 | if !column.Snapshot(chunk, buffer) { 152 | return nil // Skip indexes 153 | } 154 | return writer.WriteSelf(buffer) 155 | }) 156 | }) 157 | }); err != nil { 158 | return writer.Offset(), err 159 | } 160 | 161 | return writer.Offset(), writer.Flush() 162 | } 163 | 164 | // readState reads a collection snapshotted state from the underlying reader. It 165 | // returns the last commit IDs for each chunk. 166 | func (c *Collection) readState(src io.Reader) (map[commit.Chunk]uint64, error) { 167 | r := iostream.NewReader(src) 168 | commits := make(map[commit.Chunk]uint64) 169 | 170 | // Read the version and make sure it matches 171 | version, err := r.ReadUvarint() 172 | if err != nil || version != 0x1 { 173 | return nil, fmt.Errorf("column: unable to restore (version %d) %v", version, err) 174 | } 175 | 176 | // Read the number of columns 177 | columns, err := r.ReadUvarint() 178 | if err != nil { 179 | return nil, err 180 | } 181 | 182 | // Read each chunk 183 | return commits, r.ReadRange(func(chunk int, r *iostream.Reader) error { 184 | return c.Query(func(txn *Txn) error { 185 | txn.dirty.Set(uint32(chunk)) 186 | 187 | // Read the last written commit ID for the chunk 188 | if commits[commit.Chunk(chunk)], err = r.ReadUvarint(); err != nil { 189 | return err 190 | } 191 | 192 | for i := uint64(0); i < columns; i++ { 193 | buffer := txn.owner.txns.acquirePage("") 194 | _, err := buffer.ReadFrom(r) 195 | switch { 196 | case err == io.EOF && i < columns: 197 | return errUnexpectedEOF 198 | case err != nil: 199 | return err 200 | default: 201 | txn.updates = append(txn.updates, buffer) 202 | } 203 | } 204 | 205 | return nil 206 | }) 207 | }) 208 | } 209 | 210 | // chunks returns the number of chunks and columns 211 | func (c *Collection) chunks() int { 212 | c.lock.Lock() 213 | defer c.lock.Unlock() 214 | if len(c.fill) == 0 { 215 | return 0 216 | } 217 | 218 | max, _ := c.fill.Max() 219 | return int(commit.ChunkAt(max) + 1) 220 | } 221 | 222 | // readChunk acquires appropriate locks for a chunk and executes a read callback. 223 | // This is used for snapshotting purposes only. 224 | func (c *Collection) readChunk(chunk commit.Chunk, fn func(uint64, commit.Chunk, bitmap.Bitmap) error) error { 225 | 226 | // Lock both the chunk and the fill list 227 | c.slock.RLock(uint(chunk)) 228 | c.lock.Lock() 229 | defer c.slock.RUnlock(uint(chunk)) 230 | defer c.lock.Unlock() 231 | return fn(c.commits[chunk], chunk, chunk.OfBitmap(c.fill)) 232 | } 233 | -------------------------------------------------------------------------------- /column_numeric.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "fmt" 8 | 9 | "github.com/kelindar/bitmap" 10 | "github.com/kelindar/column/commit" 11 | "github.com/kelindar/simd" 12 | ) 13 | 14 | //go:generate go run ./codegen/main.go 15 | 16 | // readNumber is a helper function for point reads 17 | func readNumber[T simd.Number](txn *Txn, columnName string) (value T, found bool) { 18 | if column, ok := txn.columnAt(columnName); ok { 19 | if rdr, ok := column.Column.(*numericColumn[T]); ok { 20 | value, found = rdr.load(txn.cursor) 21 | } 22 | } 23 | return 24 | } 25 | 26 | // --------------------------- Generic Column ---------------------------- 27 | 28 | // numericColumn represents a numeric column 29 | type numericColumn[T simd.Number] struct { 30 | chunks[T] 31 | option[T] 32 | write func(*commit.Buffer, uint32, T) 33 | apply func(*commit.Reader, bitmap.Bitmap, []T, option[T]) 34 | } 35 | 36 | // makeNumeric creates a new vector for simd.Numbers 37 | func makeNumeric[T simd.Number]( 38 | write func(*commit.Buffer, uint32, T), 39 | apply func(*commit.Reader, bitmap.Bitmap, []T, option[T]), 40 | opts []func(*option[T]), 41 | ) *numericColumn[T] { 42 | return &numericColumn[T]{ 43 | chunks: make(chunks[T], 0, 4), 44 | write: write, 45 | apply: apply, 46 | option: configure(opts, option[T]{ 47 | Merge: func(value, delta T) T { return value + delta }, 48 | }), 49 | } 50 | } 51 | 52 | // --------------------------- Accessors ---------------------------- 53 | 54 | // Contains checks whether the column has a value at a specified index. 55 | func (c *numericColumn[T]) Contains(idx uint32) bool { 56 | chunk := commit.ChunkAt(idx) 57 | return c.chunks[chunk].fill.Contains(idx - chunk.Min()) 58 | } 59 | 60 | // load retrieves a float64 value at a specified index 61 | func (c *numericColumn[T]) load(idx uint32) (v T, ok bool) { 62 | chunk := commit.ChunkAt(idx) 63 | index := idx - chunk.Min() 64 | if int(chunk) < len(c.chunks) && c.chunks[chunk].fill.Contains(index) { 65 | v, ok = c.chunks[chunk].data[index], true 66 | } 67 | return 68 | } 69 | 70 | // Value retrieves a value at a specified index 71 | func (c *numericColumn[T]) Value(idx uint32) (any, bool) { 72 | return c.load(idx) 73 | } 74 | 75 | // LoadFloat64 retrieves a float64 value at a specified index 76 | func (c *numericColumn[T]) LoadFloat64(idx uint32) (float64, bool) { 77 | v, ok := c.load(idx) 78 | return float64(v), ok 79 | } 80 | 81 | // LoadInt64 retrieves an int64 value at a specified index 82 | func (c *numericColumn[T]) LoadInt64(idx uint32) (int64, bool) { 83 | v, ok := c.load(idx) 84 | return int64(v), ok 85 | } 86 | 87 | // LoadUint64 retrieves an uint64 value at a specified index 88 | func (c *numericColumn[T]) LoadUint64(idx uint32) (uint64, bool) { 89 | v, ok := c.load(idx) 90 | return uint64(v), ok 91 | } 92 | 93 | // --------------------------- Filtering ---------------------------- 94 | 95 | // filterNumbers filters down the values based on the specified predicate. 96 | func filterNumbers[T, C simd.Number](column *numericColumn[T], chunk commit.Chunk, index bitmap.Bitmap, predicate func(C) bool) { 97 | if int(chunk) < len(column.chunks) { 98 | fill, data := column.chunkAt(chunk) 99 | index.And(fill) 100 | index.Filter(func(idx uint32) bool { 101 | return predicate(C(data[idx])) 102 | }) 103 | } 104 | } 105 | 106 | // FilterFloat64 filters down the values based on the specified predicate. 107 | func (c *numericColumn[T]) FilterFloat64(chunk commit.Chunk, index bitmap.Bitmap, predicate func(float64) bool) { 108 | filterNumbers(c, chunk, index, predicate) 109 | } 110 | 111 | // FilterInt64 filters down the values based on the specified predicate. 112 | func (c *numericColumn[T]) FilterInt64(chunk commit.Chunk, index bitmap.Bitmap, predicate func(int64) bool) { 113 | filterNumbers(c, chunk, index, predicate) 114 | } 115 | 116 | // FilterUint64 filters down the values based on the specified predicate. 117 | func (c *numericColumn[T]) FilterUint64(chunk commit.Chunk, index bitmap.Bitmap, predicate func(uint64) bool) { 118 | filterNumbers(c, chunk, index, predicate) 119 | } 120 | 121 | // --------------------------- Apply & Snapshot ---------------------------- 122 | 123 | // Apply applies a set of operations to the column. 124 | func (c *numericColumn[T]) Apply(chunk commit.Chunk, r *commit.Reader) { 125 | fill, data := c.chunkAt(chunk) 126 | c.apply(r, fill, data, c.option) 127 | } 128 | 129 | // Snapshot writes the entire column into the specified destination buffer 130 | func (c *numericColumn[T]) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 131 | fill, data := c.chunkAt(chunk) 132 | fill.Range(func(x uint32) { 133 | c.write(dst, chunk.Min()+x, data[x]) 134 | }) 135 | } 136 | 137 | // --------------------------- Reader/Writer ---------------------------- 138 | 139 | // rdNumber represents a read-only accessor for simd.Numbers 140 | type rdNumber[T simd.Number] struct { 141 | reader *numericColumn[T] 142 | txn *Txn 143 | } 144 | 145 | // Get loads the value at the current transaction cursor 146 | func (s rdNumber[T]) Get() (T, bool) { 147 | return s.reader.load(s.txn.cursor) 148 | } 149 | 150 | // Sum computes a sum of the column values selected by this transaction 151 | func (s rdNumber[T]) Sum() (sum T) { 152 | s.txn.initialize() 153 | s.txn.rangeRead(func(chunk commit.Chunk, index bitmap.Bitmap) { 154 | if int(chunk) < len(s.reader.chunks) { 155 | sum += bitmap.Sum(s.reader.chunks[chunk].data, index) 156 | } 157 | }) 158 | return sum 159 | } 160 | 161 | // Avg computes an arithmetic mean of the column values selected by this transaction 162 | func (s rdNumber[T]) Avg() float64 { 163 | sum, ct := T(0), 0 164 | s.txn.initialize() 165 | s.txn.rangeRead(func(chunk commit.Chunk, index bitmap.Bitmap) { 166 | if int(chunk) < len(s.reader.chunks) { 167 | sum += bitmap.Sum(s.reader.chunks[chunk].data, index) 168 | ct += index.Count() 169 | } 170 | }) 171 | return float64(sum) / float64(ct) 172 | } 173 | 174 | // Min finds the smallest value from the column values selected by this transaction 175 | func (s rdNumber[T]) Min() (min T, ok bool) { 176 | s.txn.initialize() 177 | s.txn.rangeRead(func(chunk commit.Chunk, index bitmap.Bitmap) { 178 | if int(chunk) < len(s.reader.chunks) { 179 | if v, hit := bitmap.Min(s.reader.chunks[chunk].data, index); hit && (v < min || !ok) { 180 | min = v 181 | ok = true 182 | } 183 | } 184 | }) 185 | return 186 | } 187 | 188 | // Max finds the largest value from the column values selected by this transaction 189 | func (s rdNumber[T]) Max() (max T, ok bool) { 190 | s.txn.initialize() 191 | s.txn.rangeRead(func(chunk commit.Chunk, index bitmap.Bitmap) { 192 | if int(chunk) < len(s.reader.chunks) { 193 | if v, hit := bitmap.Max(s.reader.chunks[chunk].data, index); hit && (v > max || !ok) { 194 | max = v 195 | ok = true 196 | } 197 | } 198 | }) 199 | return 200 | } 201 | 202 | // readNumberOf creates a new numeric reader 203 | func readNumberOf[T simd.Number](txn *Txn, columnName string) rdNumber[T] { 204 | column, ok := txn.columnAt(columnName) 205 | if !ok { 206 | panic(fmt.Errorf("column: column '%s' does not exist", columnName)) 207 | } 208 | 209 | reader, ok := column.Column.(*numericColumn[T]) 210 | if !ok { 211 | panic(fmt.Errorf("column: column '%s' is not of type %T", columnName, T(0))) 212 | } 213 | 214 | return rdNumber[T]{ 215 | reader: reader, 216 | txn: txn, 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /column_index.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "strings" 8 | "sync" 9 | 10 | "github.com/kelindar/bitmap" 11 | "github.com/kelindar/column/commit" 12 | 13 | "github.com/tidwall/btree" 14 | ) 15 | 16 | // --------------------------- Reader --------------------------- 17 | 18 | // Reader represents a reader cursor for a specific row/column combination. 19 | type Reader interface { 20 | IsUpsert() bool 21 | IsDelete() bool 22 | Index() uint32 23 | String() string 24 | Bytes() []byte 25 | Float() float64 26 | Int() int 27 | Uint() uint 28 | Bool() bool 29 | } 30 | 31 | // Assert reader implementations. Both our cursor and commit reader need to implement 32 | // this so that we can feed it to the index transparently. 33 | var _ Reader = new(commit.Reader) 34 | 35 | // computed represents a computed column 36 | type computed interface { 37 | Column() string 38 | } 39 | 40 | // --------------------------- Index ---------------------------- 41 | 42 | // columnIndex represents the index implementation 43 | type columnIndex struct { 44 | fill bitmap.Bitmap // The fill list for the column 45 | name string // The name of the target column 46 | rule func(Reader) bool // The rule to apply when building the index 47 | } 48 | 49 | // newIndex creates a new bitmap index column. 50 | func newIndex(indexName, columnName string, rule func(Reader) bool) *column { 51 | return columnFor(indexName, &columnIndex{ 52 | fill: make(bitmap.Bitmap, 0, 4), 53 | name: columnName, 54 | rule: rule, 55 | }) 56 | } 57 | 58 | // Grow grows the size of the column until we have enough to store 59 | func (c *columnIndex) Grow(idx uint32) { 60 | c.fill.Grow(idx) 61 | } 62 | 63 | // Column returns the target name of the column on which this index should apply. 64 | func (c *columnIndex) Column() string { 65 | return c.name 66 | } 67 | 68 | // Apply applies a set of operations to the column. 69 | func (c *columnIndex) Apply(chunk commit.Chunk, r *commit.Reader) { 70 | 71 | // Index can only be updated based on the final stored value, so we can only work 72 | // with put operations here. The trick is to update the final value after applying 73 | // on the actual column. 74 | for r.Next() { 75 | switch r.Type { 76 | case commit.Put: 77 | if c.rule(r) { 78 | c.fill.Set(uint32(r.Offset)) 79 | } else { 80 | c.fill.Remove(uint32(r.Offset)) 81 | } 82 | case commit.Delete: 83 | c.fill.Remove(uint32(r.Offset)) 84 | } 85 | } 86 | } 87 | 88 | // Value retrieves a value at a specified index. 89 | func (c *columnIndex) Value(idx uint32) (v interface{}, ok bool) { 90 | if idx < uint32(len(c.fill))<<6 { 91 | v, ok = c.fill.Contains(idx), true 92 | } 93 | return 94 | } 95 | 96 | // Contains checks whether the column has a value at a specified index. 97 | func (c *columnIndex) Contains(idx uint32) bool { 98 | return c.fill.Contains(idx) 99 | } 100 | 101 | // Index returns the fill list for the column 102 | func (c *columnIndex) Index(chunk commit.Chunk) bitmap.Bitmap { 103 | return chunk.OfBitmap(c.fill) 104 | } 105 | 106 | // Snapshot writes the entire column into the specified destination buffer 107 | func (c *columnIndex) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 108 | dst.PutBitmap(commit.PutTrue, chunk, c.fill) 109 | } 110 | 111 | // --------------------------- Trigger ---------------------------- 112 | 113 | // columnTrigger represents the trigger implementation 114 | type columnTrigger struct { 115 | name string // The name of the target column 116 | clbk func(Reader) // The trigger callback 117 | } 118 | 119 | // newTrigger creates a new trigger column. 120 | func newTrigger(indexName, columnName string, callback func(r Reader)) *column { 121 | return columnFor(indexName, &columnTrigger{ 122 | name: columnName, 123 | clbk: callback, 124 | }) 125 | } 126 | 127 | // Column returns the target name of the column on which this index should apply. 128 | func (c *columnTrigger) Column() string { 129 | return c.name 130 | } 131 | 132 | // Grow grows the size of the column until we have enough to store 133 | func (c *columnTrigger) Grow(idx uint32) { 134 | // Noop 135 | } 136 | 137 | // Apply applies a set of operations to the column. 138 | func (c *columnTrigger) Apply(chunk commit.Chunk, r *commit.Reader) { 139 | for r.Next() { 140 | if r.Type == commit.Put || r.Type == commit.Delete { 141 | c.clbk(r) 142 | } 143 | } 144 | } 145 | 146 | // Value retrieves a value at a specified index. 147 | func (c *columnTrigger) Value(idx uint32) (v any, ok bool) { 148 | return nil, false 149 | } 150 | 151 | // Contains checks whether the column has a value at a specified index. 152 | func (c *columnTrigger) Contains(idx uint32) bool { 153 | return false 154 | } 155 | 156 | // Index returns the fill list for the column 157 | func (c *columnTrigger) Index(chunk commit.Chunk) bitmap.Bitmap { 158 | return nil 159 | } 160 | 161 | // Snapshot writes the entire column into the specified destination buffer 162 | func (c *columnTrigger) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 163 | // Noop 164 | } 165 | 166 | // ----------------------- Sorted Index -------------------------- 167 | 168 | type sortIndexItem struct { 169 | Key string 170 | Value uint32 171 | } 172 | 173 | // columnSortIndex implements a constantly sorted column via BTree 174 | type columnSortIndex struct { 175 | btree *btree.BTreeG[sortIndexItem] // 1 constantly sorted data structure 176 | backMap map[uint32]string // for constant key lookups 177 | backLock sync.Mutex // protect backMap access 178 | name string // The name of the target column 179 | } 180 | 181 | // newSortIndex creates a new bitmap index column. 182 | func newSortIndex(indexName, columnName string) *column { 183 | byKeys := func(a, b sortIndexItem) bool { 184 | return a.Key < b.Key 185 | } 186 | return columnFor(indexName, &columnSortIndex{ 187 | btree: btree.NewBTreeG(byKeys), 188 | backMap: make(map[uint32]string), 189 | name: columnName, 190 | }) 191 | } 192 | 193 | // Grow grows the size of the column until we have enough to store 194 | func (c *columnSortIndex) Grow(idx uint32) { 195 | return 196 | } 197 | 198 | // Column returns the target name of the column on which this index should apply. 199 | func (c *columnSortIndex) Column() string { 200 | return c.name 201 | } 202 | 203 | // Apply applies a set of operations to the column. 204 | func (c *columnSortIndex) Apply(chunk commit.Chunk, r *commit.Reader) { 205 | 206 | // Index can only be updated based on the final stored value, so we can only work 207 | // with put, merge, & delete operations here. 208 | for r.Next() { 209 | c.backLock.Lock() 210 | switch r.Type { 211 | case commit.Put: 212 | if delKey, exists := c.backMap[r.Index()]; exists { 213 | c.btree.Delete(sortIndexItem{ 214 | Key: delKey, 215 | Value: r.Index(), 216 | }) 217 | } 218 | upsertKey := strings.Clone(r.String()) // alloc required 219 | c.backMap[r.Index()] = upsertKey 220 | c.btree.Set(sortIndexItem{ 221 | Key: upsertKey, 222 | Value: r.Index(), 223 | }) 224 | case commit.Delete: 225 | delKey, _ := c.backMap[r.Index()] 226 | c.btree.Delete(sortIndexItem{ 227 | Key: delKey, 228 | Value: r.Index(), 229 | }) 230 | } 231 | c.backLock.Unlock() 232 | } 233 | } 234 | 235 | // Value retrieves a value at a specified index. 236 | func (c *columnSortIndex) Value(idx uint32) (v interface{}, ok bool) { 237 | return nil, false 238 | } 239 | 240 | // Contains checks whether the column has a value at a specified index. 241 | func (c *columnSortIndex) Contains(idx uint32) bool { 242 | return false 243 | } 244 | 245 | // Index returns the fill list for the column 246 | func (c *columnSortIndex) Index(chunk commit.Chunk) bitmap.Bitmap { 247 | return nil 248 | } 249 | 250 | // Snapshot writes the entire column into the specified destination buffer 251 | func (c *columnSortIndex) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 252 | // No-op 253 | } 254 | -------------------------------------------------------------------------------- /column.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "fmt" 8 | "reflect" 9 | "sync" 10 | 11 | "github.com/kelindar/bitmap" 12 | "github.com/kelindar/column/commit" 13 | ) 14 | 15 | // columnType represents a type of a column. 16 | type columnType uint8 17 | 18 | const ( 19 | typeGeneric = columnType(0) // Generic column, every column should support this 20 | typeNumeric = columnType(1 << 0) // Numeric column supporting float64, int64 or uint64 21 | typeTextual = columnType(1 << 1) // Textual column supporting strings 22 | ) 23 | 24 | // typeOf resolves all supported types of the column 25 | func typeOf(column Column) (typ columnType) { 26 | if _, ok := column.(Numeric); ok { 27 | typ = typ | typeNumeric 28 | } 29 | if _, ok := column.(Textual); ok { 30 | typ = typ | typeTextual 31 | } 32 | return 33 | } 34 | 35 | // --------------------------- Contracts ---------------------------- 36 | 37 | // Column represents a column implementation 38 | type Column interface { 39 | Grow(idx uint32) 40 | Apply(commit.Chunk, *commit.Reader) 41 | Value(idx uint32) (interface{}, bool) 42 | Contains(idx uint32) bool 43 | Index(commit.Chunk) bitmap.Bitmap 44 | Snapshot(chunk commit.Chunk, dst *commit.Buffer) 45 | } 46 | 47 | // Numeric represents a column that stores numbers. 48 | type Numeric interface { 49 | Column 50 | LoadFloat64(uint32) (float64, bool) 51 | LoadUint64(uint32) (uint64, bool) 52 | LoadInt64(uint32) (int64, bool) 53 | FilterFloat64(commit.Chunk, bitmap.Bitmap, func(v float64) bool) 54 | FilterUint64(commit.Chunk, bitmap.Bitmap, func(v uint64) bool) 55 | FilterInt64(commit.Chunk, bitmap.Bitmap, func(v int64) bool) 56 | } 57 | 58 | // Textual represents a column that stores strings. 59 | type Textual interface { 60 | Column 61 | LoadString(uint32) (string, bool) 62 | FilterString(commit.Chunk, bitmap.Bitmap, func(v string) bool) 63 | } 64 | 65 | // --------------------------- Constructors ---------------------------- 66 | 67 | // Various column constructor functions for a specific types. 68 | var ( 69 | ForString = makeStrings 70 | ForFloat32 = makeFloat32s 71 | ForFloat64 = makeFloat64s 72 | ForInt = makeInts 73 | ForInt16 = makeInt16s 74 | ForInt32 = makeInt32s 75 | ForInt64 = makeInt64s 76 | ForUint = makeUints 77 | ForUint16 = makeUint16s 78 | ForUint32 = makeUint32s 79 | ForUint64 = makeUint64s 80 | ForBool = makeBools 81 | ForEnum = makeEnum 82 | ForKey = makeKey 83 | ) 84 | 85 | // ForKind creates a new column instance for a specified reflect.Kind 86 | func ForKind(kind reflect.Kind) (Column, error) { 87 | switch kind { 88 | case reflect.Float32: 89 | return makeFloat32s(), nil 90 | case reflect.Float64: 91 | return makeFloat64s(), nil 92 | case reflect.Int: 93 | return makeInts(), nil 94 | case reflect.Int16: 95 | return makeInt16s(), nil 96 | case reflect.Int32: 97 | return makeInt32s(), nil 98 | case reflect.Int64: 99 | return makeInt64s(), nil 100 | case reflect.Uint: 101 | return makeUints(), nil 102 | case reflect.Uint16: 103 | return makeUint16s(), nil 104 | case reflect.Uint32: 105 | return makeUint32s(), nil 106 | case reflect.Uint64: 107 | return makeUint64s(), nil 108 | case reflect.Bool: 109 | return makeBools(), nil 110 | case reflect.String: 111 | return makeStrings(), nil 112 | default: 113 | return nil, fmt.Errorf("column: unsupported column kind (%v)", kind) 114 | } 115 | } 116 | 117 | // --------------------------- Generic Options ---------------------------- 118 | 119 | // option represents options for variouos columns. 120 | type option[T any] struct { 121 | Merge func(value, delta T) T 122 | } 123 | 124 | // configure applies options 125 | func configure[T any](opts []func(*option[T]), dst option[T]) option[T] { 126 | for _, fn := range opts { 127 | fn(&dst) 128 | } 129 | return dst 130 | } 131 | 132 | // WithMerge sets an optional merge function that allows you to merge a delta value to 133 | // an existing value, atomically. The operation is performed transactionally. 134 | func WithMerge[T any](fn func(value, delta T) T) func(*option[T]) { 135 | return func(v *option[T]) { 136 | v.Merge = fn 137 | } 138 | } 139 | 140 | // --------------------------- Column ---------------------------- 141 | 142 | // column represents a column wrapper that synchronizes operations 143 | type column struct { 144 | Column 145 | lock sync.RWMutex // The lock to protect the entire column 146 | kind columnType // The type of the colum 147 | name string // The name of the column 148 | } 149 | 150 | // columnFor creates a synchronized column for a column implementation 151 | func columnFor(name string, v Column) *column { 152 | return &column{ 153 | kind: typeOf(v), 154 | name: name, 155 | Column: v, 156 | } 157 | } 158 | 159 | // IsIndex returns whether the column is an index 160 | func (c *column) IsIndex() bool { 161 | _, ok := c.Column.(*columnIndex) 162 | return ok 163 | } 164 | 165 | // IsNumeric checks whether a column type supports certain numerical operations. 166 | func (c *column) IsNumeric() bool { 167 | return (c.kind & typeNumeric) == typeNumeric 168 | } 169 | 170 | // IsTextual checks whether a column type supports certain string operations. 171 | func (c *column) IsTextual() bool { 172 | return (c.kind & typeTextual) == typeTextual 173 | } 174 | 175 | // Grow grows the size of the column 176 | func (c *column) Grow(idx uint32) { 177 | c.lock.Lock() 178 | defer c.lock.Unlock() 179 | 180 | c.Column.Grow(idx) 181 | } 182 | 183 | // Apply performs a series of operations on a column. 184 | func (c *column) Apply(chunk commit.Chunk, r *commit.Reader) { 185 | c.lock.RLock() 186 | defer c.lock.RUnlock() 187 | 188 | r.Rewind() 189 | c.Column.Apply(chunk, r) 190 | } 191 | 192 | // Index loads the appropriate column index for a given chunk 193 | func (c *column) Index(chunk commit.Chunk) bitmap.Bitmap { 194 | c.lock.RLock() 195 | defer c.lock.RUnlock() 196 | return c.Column.Index(chunk) 197 | } 198 | 199 | // Snapshot takes a snapshot of a column, skipping indexes 200 | func (c *column) Snapshot(chunk commit.Chunk, buffer *commit.Buffer) bool { 201 | if c.IsIndex() { 202 | return false 203 | } 204 | 205 | buffer.Reset(c.name) 206 | c.Column.Snapshot(chunk, buffer) 207 | return true 208 | } 209 | 210 | // Value retrieves a value at a specified index 211 | func (c *column) Value(idx uint32) (v interface{}, ok bool) { 212 | v, ok = c.Column.Value(idx) 213 | return 214 | } 215 | 216 | // --------------------------- Accessor ---------------------------- 217 | 218 | // Reader represents a generic reader 219 | type reader[T any] struct { 220 | cursor *uint32 221 | reader T 222 | } 223 | 224 | // readerFor creates a read-only accessor 225 | func readerFor[T any](txn *Txn, columnName string) reader[T] { 226 | column, ok := txn.columnAt(columnName) 227 | if !ok { 228 | panic(fmt.Errorf("column: column '%s' does not exist", columnName)) 229 | } 230 | 231 | target, ok := column.Column.(T) 232 | if !ok { 233 | var want T 234 | panic(fmt.Errorf("column: column '%s' is not of specified type (has=%T, want=%T)", 235 | columnName, column.Column, want)) 236 | } 237 | 238 | return reader[T]{ 239 | cursor: &txn.cursor, 240 | reader: target, 241 | } 242 | } 243 | 244 | // --------------------------- Any Writer ---------------------------- 245 | 246 | // rwAny represents read-write accessor for any column type 247 | type rwAny struct { 248 | rdAny 249 | writer *commit.Buffer 250 | } 251 | 252 | // Set sets the value at the current transaction cursor 253 | func (s rwAny) Set(value any) error { 254 | return s.writer.PutAny(commit.Put, *s.cursor, value) 255 | } 256 | 257 | // --------------------------- Any Reader ---------------------------- 258 | 259 | // rdAny represents a read-only accessor for any value 260 | type rdAny reader[Column] 261 | 262 | // Get loads the value at the current transaction cursor 263 | func (s rdAny) Get() (any, bool) { 264 | return s.reader.Value(*s.cursor) 265 | } 266 | 267 | // readAnyOf creates a new any reader 268 | func readAnyOf(txn *Txn, columnName string) rdAny { 269 | return rdAny(readerFor[Column](txn, columnName)) 270 | } 271 | 272 | // Any returns a column accessor 273 | func (txn *Txn) Any(columnName string) rwAny { 274 | return rwAny{ 275 | rdAny: readAnyOf(txn, columnName), 276 | writer: txn.bufferFor(columnName), 277 | } 278 | } 279 | 280 | // --------------------------- segment list ---------------------------- 281 | 282 | // Chunks represents a chunked array storage 283 | type chunks[T any] []struct { 284 | fill bitmap.Bitmap // The fill-list 285 | data []T // The actual values 286 | } 287 | 288 | // chunkAt loads the fill and data list at a particular chunk 289 | func (s chunks[T]) chunkAt(chunk commit.Chunk) (bitmap.Bitmap, []T) { 290 | fill := s[chunk].fill 291 | data := s[chunk].data 292 | return fill, data 293 | } 294 | 295 | // Grow grows a segment list 296 | func (s *chunks[T]) Grow(idx uint32) { 297 | chunk := int(commit.ChunkAt(idx)) 298 | for i := len(*s); i <= chunk; i++ { 299 | *s = append(*s, struct { 300 | fill bitmap.Bitmap 301 | data []T 302 | }{ 303 | fill: make(bitmap.Bitmap, chunkSize/64), 304 | data: make([]T, chunkSize), 305 | }) 306 | } 307 | } 308 | 309 | // Index returns the fill list for the segment 310 | func (s chunks[T]) Index(chunk commit.Chunk) (fill bitmap.Bitmap) { 311 | if int(chunk) < len(s) { 312 | fill = s[chunk].fill 313 | } 314 | return 315 | } 316 | -------------------------------------------------------------------------------- /txn_row.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "encoding" 8 | "fmt" 9 | 10 | "github.com/kelindar/column/commit" 11 | ) 12 | 13 | // Row represents a cursor at a particular row offest in the transaction. 14 | type Row struct { 15 | txn *Txn 16 | } 17 | 18 | // Index returns the index of the row 19 | func (r Row) Index() uint32 { 20 | return r.txn.Index() 21 | } 22 | 23 | // --------------------------- Numbers ---------------------------- 24 | 25 | // Int loads a int value at a particular column 26 | func (r Row) Int(columnName string) (v int, ok bool) { 27 | return readNumber[int](r.txn, columnName) 28 | } 29 | 30 | // SetInt stores a int value at a particular column 31 | func (r Row) SetInt(columnName string, value int) { 32 | r.txn.Int(columnName).Set(value) 33 | } 34 | 35 | // MergeInt atomically merges a delta into int value at a particular column 36 | func (r Row) MergeInt(columnName string, value int) { 37 | r.txn.Int(columnName).Merge(value) 38 | } 39 | 40 | // Int16 loads a int16 value at a particular column 41 | func (r Row) Int16(columnName string) (v int16, ok bool) { 42 | return readNumber[int16](r.txn, columnName) 43 | } 44 | 45 | // SetInt16 stores a int16 value at a particular column 46 | func (r Row) SetInt16(columnName string, value int16) { 47 | r.txn.Int16(columnName).Set(value) 48 | } 49 | 50 | // MergeInt16 atomically merges a delta into int16 value at a particular column 51 | func (r Row) MergeInt16(columnName string, value int16) { 52 | r.txn.Int16(columnName).Merge(value) 53 | } 54 | 55 | // Int32 loads a int32 value at a particular column 56 | func (r Row) Int32(columnName string) (v int32, ok bool) { 57 | return readNumber[int32](r.txn, columnName) 58 | } 59 | 60 | // SetInt32 stores a int32 value at a particular column 61 | func (r Row) SetInt32(columnName string, value int32) { 62 | r.txn.Int32(columnName).Set(value) 63 | } 64 | 65 | // MergeInt32 atomically merges a delta into int32 value at a particular column 66 | func (r Row) MergeInt32(columnName string, value int32) { 67 | r.txn.Int32(columnName).Merge(value) 68 | } 69 | 70 | // Int64 loads a int64 value at a particular column 71 | func (r Row) Int64(columnName string) (v int64, ok bool) { 72 | return readNumber[int64](r.txn, columnName) 73 | } 74 | 75 | // SetInt64 stores a int64 value at a particular column 76 | func (r Row) SetInt64(columnName string, value int64) { 77 | r.txn.Int64(columnName).Set(value) 78 | } 79 | 80 | // MergeInt64 atomically merges a delta into int64 value at a particular column 81 | func (r Row) MergeInt64(columnName string, value int64) { 82 | r.txn.Int64(columnName).Merge(value) 83 | } 84 | 85 | // Uint loads a uint value at a particular column 86 | func (r Row) Uint(columnName string) (v uint, ok bool) { 87 | return readNumber[uint](r.txn, columnName) 88 | } 89 | 90 | // SetUint stores a uint value at a particular column 91 | func (r Row) SetUint(columnName string, value uint) { 92 | r.txn.Uint(columnName).Set(value) 93 | } 94 | 95 | // MergeUint atomically merges a delta into uint value at a particular column 96 | func (r Row) MergeUint(columnName string, value uint) { 97 | r.txn.Uint(columnName).Merge(value) 98 | } 99 | 100 | // Uint16 loads a uint16 value at a particular column 101 | func (r Row) Uint16(columnName string) (v uint16, ok bool) { 102 | return readNumber[uint16](r.txn, columnName) 103 | } 104 | 105 | // SetUint16 stores a uint16 value at a particular column 106 | func (r Row) SetUint16(columnName string, value uint16) { 107 | r.txn.Uint16(columnName).Set(value) 108 | } 109 | 110 | // MergeUint16 atomically merges a delta into uint16 value at a particular column 111 | func (r Row) MergeUint16(columnName string, value uint16) { 112 | r.txn.Uint16(columnName).Merge(value) 113 | } 114 | 115 | // Uint32 loads a uint32 value at a particular column 116 | func (r Row) Uint32(columnName string) (v uint32, ok bool) { 117 | return readNumber[uint32](r.txn, columnName) 118 | } 119 | 120 | // SetUint32 stores a uint32 value at a particular column 121 | func (r Row) SetUint32(columnName string, value uint32) { 122 | r.txn.Uint32(columnName).Set(value) 123 | } 124 | 125 | // MergeUint32 atomically merges a delta into uint32 value at a particular column 126 | func (r Row) MergeUint32(columnName string, value uint32) { 127 | r.txn.Uint32(columnName).Merge(value) 128 | } 129 | 130 | // Uint64 loads a uint64 value at a particular column 131 | func (r Row) Uint64(columnName string) (v uint64, ok bool) { 132 | return readNumber[uint64](r.txn, columnName) 133 | } 134 | 135 | // SetUint64 stores a uint64 value at a particular column 136 | func (r Row) SetUint64(columnName string, value uint64) { 137 | r.txn.Uint64(columnName).Set(value) 138 | } 139 | 140 | // MergeUint64 atomically merges a delta into uint64 value at a particular column 141 | func (r Row) MergeUint64(columnName string, value uint64) { 142 | r.txn.Uint64(columnName).Merge(value) 143 | } 144 | 145 | // Float32 loads a float32 value at a particular column 146 | func (r Row) Float32(columnName string) (v float32, ok bool) { 147 | return readNumber[float32](r.txn, columnName) 148 | } 149 | 150 | // SetFloat32 stores a float32 value at a particular column 151 | func (r Row) SetFloat32(columnName string, value float32) { 152 | r.txn.Float32(columnName).Set(value) 153 | } 154 | 155 | // MergeFloat32 atomically merges a delta into float32 value at a particular column 156 | func (r Row) MergeFloat32(columnName string, value float32) { 157 | r.txn.Float32(columnName).Merge(value) 158 | } 159 | 160 | // Float64 loads a float64 value at a particular column 161 | func (r Row) Float64(columnName string) (float64, bool) { 162 | return readNumber[float64](r.txn, columnName) 163 | } 164 | 165 | // SetFloat64 stores a float64 value at a particular column 166 | func (r Row) SetFloat64(columnName string, value float64) { 167 | r.txn.Float64(columnName).Set(value) 168 | } 169 | 170 | // MergeFloat64 atomically merges a delta into float64 value at a particular column 171 | func (r Row) MergeFloat64(columnName string, value float64) { 172 | r.txn.Float64(columnName).Merge(value) 173 | } 174 | 175 | // --------------------------- Strings ---------------------------- 176 | 177 | // Key loads a primary key value at a particular column 178 | func (r Row) Key() (v string, ok bool) { 179 | if pk := r.txn.owner.pk; pk != nil { 180 | v, ok = pk.LoadString(r.txn.cursor) 181 | } 182 | return 183 | } 184 | 185 | // SetKey stores a primary key value at a particular column 186 | func (r Row) SetKey(key string) { 187 | r.txn.Key().Set(key) 188 | } 189 | 190 | // String loads a string value at a particular column 191 | func (r Row) String(columnName string) (v string, ok bool) { 192 | return readStringOf[*columnString](r.txn, columnName).Get() 193 | } 194 | 195 | // SetString stores a string value at a particular column 196 | func (r Row) SetString(columnName string, value string) { 197 | r.txn.String(columnName).Set(value) 198 | } 199 | 200 | // MergeString merges a string value at a particular column 201 | func (r Row) MergeString(columnName string, value string) { 202 | r.txn.String(columnName).Merge(value) 203 | } 204 | 205 | // Enum loads a string value at a particular column 206 | func (r Row) Enum(columnName string) (v string, ok bool) { 207 | return readStringOf[*columnEnum](r.txn, columnName).Get() 208 | } 209 | 210 | // SetEnum stores a string value at a particular column 211 | func (r Row) SetEnum(columnName string, value string) { 212 | r.txn.Enum(columnName).Set(value) 213 | } 214 | 215 | // --------------------------- Records ---------------------------- 216 | 217 | // Record loads a record value at a particular column 218 | func (r Row) Record(columnName string) (any, bool) { 219 | return readRecordOf(r.txn, columnName).Get() 220 | } 221 | 222 | // SetRecord stores a record value at a particular column 223 | func (r Row) SetRecord(columnName string, value encoding.BinaryMarshaler) error { 224 | return r.txn.Record(columnName).Set(value) 225 | } 226 | 227 | // MergeRecord merges a record value at a particular column 228 | func (r Row) MergeRecord(columnName string, delta encoding.BinaryMarshaler) error { 229 | return r.txn.Record(columnName).Merge(delta) 230 | } 231 | 232 | // --------------------------- Map ---------------------------- 233 | 234 | // SetMany stores a set of columns for a given map 235 | func (r Row) SetMany(value map[string]any) error { 236 | for k, v := range value { 237 | if _, ok := r.txn.columnAt(k); !ok { 238 | return fmt.Errorf("unable to set '%s', no such column", k) 239 | } 240 | 241 | if err := r.txn.bufferFor(k).PutAny(commit.Put, r.txn.cursor, v); err != nil { 242 | return err 243 | } 244 | } 245 | return nil 246 | } 247 | 248 | // --------------------------- Others ---------------------------- 249 | 250 | // Bool loads a bool value at a particular column 251 | func (r Row) Bool(columnName string) bool { 252 | return readBoolOf(r.txn, columnName).Get() 253 | } 254 | 255 | // SetBool stores a bool value at a particular column 256 | func (r Row) SetBool(columnName string, value bool) { 257 | r.txn.Bool(columnName).Set(value) 258 | } 259 | 260 | // Any loads a bool value at a particular column 261 | func (r Row) Any(columnName string) (any, bool) { 262 | return readAnyOf(r.txn, columnName).Get() 263 | } 264 | 265 | // SetAny stores a bool value at a particular column 266 | func (r Row) SetAny(columnName string, value interface{}) { 267 | r.txn.Any(columnName).Set(value) 268 | } 269 | -------------------------------------------------------------------------------- /commit/reader_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "fmt" 8 | "math/rand" 9 | "strconv" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestQueue(t *testing.T) { 16 | buf := NewBuffer(0) 17 | buf.Reset("test") 18 | for i := uint32(0); i < 10; i++ { 19 | buf.PutUint64(Put, i, 2*uint64(i)) 20 | } 21 | 22 | i := 0 23 | assert.Equal(t, 91, len(buf.buffer)) 24 | 25 | r := NewReader() 26 | for r.Seek(buf); r.Next(); { 27 | assert.Equal(t, Put, r.Type) 28 | assert.Equal(t, i, int(r.Offset)) 29 | assert.Equal(t, int(i*2), int(r.Uint64())) 30 | i++ 31 | } 32 | } 33 | 34 | func TestRandom(t *testing.T) { 35 | seq := make([]uint32, 1024) 36 | for i := 0; i < len(seq); i++ { 37 | seq[i] = uint32(rand.Int31n(10000000)) 38 | } 39 | 40 | buf := NewBuffer(0) 41 | for i := uint32(0); i < 1000; i++ { 42 | buf.PutUint32(Put, seq[i], uint32(rand.Int31())) 43 | } 44 | 45 | i := 0 46 | r := NewReader() 47 | for r.Seek(buf); r.Next(); { 48 | assert.Equal(t, Put, r.Type) 49 | assert.Equal(t, int(seq[i]), int(r.Offset)) 50 | i++ 51 | } 52 | } 53 | 54 | func TestRange(t *testing.T) { 55 | const count = 10000 56 | 57 | seq := make([]uint32, count) 58 | for i := 0; i < len(seq); i++ { 59 | seq[i] = uint32(rand.Int31n(1000000)) 60 | } 61 | 62 | buf := NewBuffer(0) 63 | for i := uint32(0); i < count; i++ { 64 | buf.PutUint32(Put, seq[i], uint32(rand.Int31())) 65 | } 66 | 67 | r := NewReader() 68 | for i := 0; i < 100; i++ { 69 | r.Range(buf, Chunk(i), func(r *Reader) { 70 | for r.Next() { 71 | assert.Equal(t, Put, r.Type) 72 | assert.Equal(t, i, int(r.Offset>>chunkShift)) 73 | } 74 | }) 75 | } 76 | } 77 | 78 | func TestReadSwap(t *testing.T) { 79 | buf := NewBuffer(0) 80 | buf.PutAny(Put, 10, int16(100)) 81 | buf.PutAny(Put, 20, int32(200)) 82 | buf.PutAny(Put, 30, int64(300)) 83 | buf.PutAny(Put, 40, uint16(400)) 84 | buf.PutAny(Put, 50, uint32(500)) 85 | buf.PutAny(Put, 60, uint64(600)) 86 | buf.PutAny(Put, 70, float32(700)) 87 | buf.PutAny(Put, 80, float64(800)) 88 | buf.PutAny(Put, 90, "900") 89 | buf.PutAny(Put, 100, []byte("binary")) 90 | buf.PutAny(Put, 110, true) 91 | buf.PutAny(Put, 120, int8(100)) 92 | buf.PutAny(Put, 130, uint8(100)) 93 | buf.PutAny(Put, 140, int(100)) 94 | buf.PutAny(Put, 150, uint(100)) 95 | buf.PutAny(Put, 160, float64(100)) 96 | 97 | // Should only have 1 chunk 98 | assert.False(t, buf.IsEmpty()) 99 | assert.Equal(t, 1, len(buf.chunks)) 100 | buf.RangeChunks(func(chunk Chunk) { 101 | assert.Equal(t, Chunk(0), chunk) 102 | }) 103 | 104 | r := NewReader() 105 | r.Seek(buf) 106 | assert.True(t, r.Next()) 107 | assert.Equal(t, int16(100), r.Int16()) 108 | assert.True(t, r.Next()) 109 | assert.Equal(t, int32(200), r.Int32()) 110 | assert.True(t, r.Next()) 111 | assert.Equal(t, int64(300), r.Int64()) 112 | assert.True(t, r.Next()) 113 | assert.Equal(t, uint16(400), r.Uint16()) 114 | assert.True(t, r.Next()) 115 | assert.Equal(t, uint32(500), r.Uint32()) 116 | assert.True(t, r.Next()) 117 | assert.Equal(t, uint64(600), r.Uint64()) 118 | assert.True(t, r.Next()) 119 | assert.Equal(t, float32(700), r.Float32()) 120 | assert.True(t, r.Next()) 121 | assert.Equal(t, float64(800), r.Float64()) 122 | assert.True(t, r.Next()) 123 | assert.Equal(t, "900", r.String()) 124 | assert.True(t, r.Next()) 125 | assert.Equal(t, "binary", string(r.Bytes())) 126 | assert.True(t, r.Next()) 127 | assert.Equal(t, true, r.Bool()) 128 | assert.True(t, r.Next()) 129 | assert.Equal(t, int16(100), r.Int16()) 130 | assert.True(t, r.Next()) 131 | assert.Equal(t, uint16(100), r.Uint16()) 132 | assert.True(t, r.Next()) 133 | assert.Equal(t, int(100), r.Int()) 134 | assert.True(t, r.Next()) 135 | assert.Equal(t, uint(100), r.Uint()) 136 | 137 | // Rewind back and swap values 138 | r.Rewind() 139 | assert.True(t, r.Next()) 140 | r.SwapInt16(99) 141 | assert.Equal(t, int16(99), r.Int16()) 142 | assert.True(t, r.Next()) 143 | r.SwapInt32(199) 144 | assert.Equal(t, int32(199), r.Int32()) 145 | assert.True(t, r.Next()) 146 | r.SwapInt64(299) 147 | assert.Equal(t, int64(299), r.Int64()) 148 | assert.True(t, r.Next()) 149 | r.SwapUint16(399) 150 | assert.Equal(t, uint16(399), r.Uint16()) 151 | assert.True(t, r.Next()) 152 | r.SwapUint32(499) 153 | assert.Equal(t, uint32(499), r.Uint32()) 154 | assert.True(t, r.Next()) 155 | r.SwapUint64(599) 156 | assert.Equal(t, uint64(599), r.Uint64()) 157 | assert.True(t, r.Next()) 158 | r.SwapFloat32(699) 159 | assert.Equal(t, float32(699), r.Float32()) 160 | assert.True(t, r.Next()) 161 | r.SwapFloat64(799) 162 | assert.Equal(t, float64(799), r.Float64()) 163 | assert.True(t, r.Next()) 164 | assert.True(t, r.Next()) 165 | assert.True(t, r.Next()) 166 | r.SwapBool(true) 167 | assert.Equal(t, true, r.Bool()) 168 | assert.True(t, r.Next()) 169 | assert.True(t, r.Next()) 170 | assert.True(t, r.Next()) 171 | r.SwapInt(300) 172 | assert.Equal(t, int(300), r.Int()) 173 | assert.True(t, r.Next()) 174 | r.SwapUint(400) 175 | assert.Equal(t, uint(400), r.Uint()) 176 | assert.True(t, r.Next()) 177 | } 178 | 179 | func TestWriteUnsupported(t *testing.T) { 180 | buf := NewBuffer(0) 181 | assert.Error(t, buf.PutAny(Put, 10, complex64(1))) 182 | } 183 | 184 | func TestReaderIface(t *testing.T) { 185 | buf := NewBuffer(0) 186 | buf.PutFloat64(Put, 777, float64(1)) 187 | 188 | r := NewReader() 189 | r.Seek(buf) 190 | assert.True(t, r.Next()) 191 | assert.Equal(t, float64(1), r.Float()) 192 | assert.Equal(t, uint32(777), r.Index()) 193 | } 194 | 195 | func TestReadIntMixedSize(t *testing.T) { 196 | buf := NewBuffer(0) 197 | buf.PutInt16(Put, 0, 10) 198 | buf.PutInt32(Put, 1, 20) 199 | buf.PutInt64(Put, 2, 30) 200 | buf.PutString(Put, 3, "hello") 201 | 202 | r := NewReader() 203 | r.Seek(buf) 204 | assert.True(t, r.Next()) 205 | assert.Equal(t, 10, r.Int()) 206 | assert.True(t, r.Next()) 207 | assert.Equal(t, 20, r.Int()) 208 | assert.True(t, r.Next()) 209 | assert.Equal(t, 30, r.Int()) 210 | assert.True(t, r.Next()) 211 | assert.Panics(t, func() { 212 | r.Int() 213 | }) 214 | } 215 | 216 | func TestReadFloatMixedSize(t *testing.T) { 217 | buf := NewBuffer(0) 218 | buf.PutFloat32(Put, 0, 10) 219 | buf.PutFloat64(Put, 1, 20) 220 | buf.PutString(Put, 3, "hello") 221 | 222 | r := NewReader() 223 | r.Seek(buf) 224 | assert.True(t, r.Next()) 225 | assert.Equal(t, 10.0, r.Float()) 226 | assert.True(t, r.Next()) 227 | assert.Equal(t, 20.0, r.Float()) 228 | assert.True(t, r.Next()) 229 | assert.Panics(t, func() { 230 | r.Float() 231 | }) 232 | } 233 | 234 | func TestReadSize(t *testing.T) { 235 | buf := NewBuffer(0) 236 | buf.Reset("test") 237 | buf.PutBool(123, true) 238 | 239 | r := NewReader() 240 | r.readFixed(buf.buffer[0]) 241 | assert.Equal(t, 0, r.i1-r.i0) 242 | } 243 | 244 | func TestIndexAtChunk(t *testing.T) { 245 | buf := NewBuffer(0) 246 | buf.PutFloat32(Put, 10000, 10) 247 | buf.PutFloat32(Put, 20000, 10) 248 | buf.PutFloat32(Put, 30000, 10) 249 | 250 | r := NewReader() 251 | r.Seek(buf) 252 | assert.True(t, r.Next()) 253 | assert.Equal(t, uint32(10000), r.IndexAtChunk()) 254 | assert.True(t, r.Next()) 255 | assert.Equal(t, uint32(3616), r.IndexAtChunk()) 256 | } 257 | 258 | func TestSwapOpChange(t *testing.T) { 259 | buf := NewBuffer(0) 260 | buf.PutInt32(Merge, 10, int32(1)) 261 | assert.Equal(t, []byte{0x23, 0x0, 0x0, 0x0, 0x1, 0xa}, buf.buffer) 262 | 263 | // Swap the value, this should also change the type 264 | r := NewReader() 265 | r.Seek(buf) 266 | assert.True(t, r.Next()) 267 | assert.Equal(t, Merge, r.Type) 268 | r.SwapInt32(int32(2)) 269 | assert.Equal(t, int32(2), r.Int32()) 270 | 271 | // Once swapped, op type should be changed to "Put" 272 | r.Seek(buf) 273 | assert.Equal(t, []byte{0x22, 0x0, 0x0, 0x0, 0x2, 0xa}, buf.buffer) 274 | assert.True(t, r.Next()) 275 | assert.Equal(t, Put, r.Type) 276 | } 277 | 278 | func TestMergeBytes(t *testing.T) { 279 | buf := NewBuffer(0) 280 | buf.PutBytes(Merge, 10, []byte("A")) 281 | assert.Equal(t, []byte{0x53, 0x0, 0x1, 0x41, 0xa}, buf.buffer) 282 | 283 | // Swap the value, this should also change the type 284 | r := NewReader() 285 | r.Seek(buf) 286 | assert.True(t, r.Next()) 287 | assert.Equal(t, Merge, r.Type) 288 | r.SwapBytes([]byte("B")) 289 | 290 | // Once swapped, op type should be changed to "Put" 291 | r.Seek(buf) 292 | assert.Equal(t, []byte{0x52, 0x0, 0x1, 0x42, 0xa}, buf.buffer) 293 | assert.True(t, r.Next()) 294 | assert.Equal(t, Put, r.Type) 295 | } 296 | 297 | func TestMergeStrings(t *testing.T) { 298 | buf := NewBuffer(0) 299 | buf.PutBytes(Merge, 30, []byte("5")) 300 | buf.PutBytes(Merge, 40, []byte("6")) 301 | buf.PutBytes(Merge, 10, []byte("2")) 302 | buf.PutBytes(Merge, 20, []byte("3")) 303 | 304 | var scanned []string 305 | 306 | // Swap the value, this should also change the type 307 | r := NewReader() 308 | r.Range(buf, 0, func(r *Reader) { 309 | for r.Rewind(); r.Next(); { 310 | i, _ := strconv.Atoi(r.String()) 311 | r.SwapString(strconv.Itoa(i * i)) 312 | } 313 | }) 314 | 315 | r.Range(buf, 0, func(r *Reader) { 316 | for r.Rewind(); r.Next(); { 317 | scanned = append(scanned, fmt.Sprintf("(%s) %v", r.Type, r.String())) 318 | } 319 | }) 320 | 321 | assert.Equal(t, []string{ 322 | "(skip) 5", 323 | "(skip) 6", 324 | "(put) 4", 325 | "(put) 9", 326 | "(put) 25", 327 | "(put) 36", 328 | }, scanned) 329 | } 330 | 331 | func TestReaderIsUpsert(t *testing.T) { 332 | buf := NewBuffer(0) 333 | buf.PutFloat32(Put, 0, 10) 334 | buf.PutFloat32(Delete, 0, 0) 335 | 336 | r := NewReader() 337 | r.Seek(buf) 338 | assert.True(t, r.Next()) 339 | assert.True(t, r.IsUpsert()) 340 | assert.True(t, r.Next()) 341 | assert.True(t, r.IsDelete()) 342 | } 343 | -------------------------------------------------------------------------------- /commit/buffer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "bytes" 8 | "testing" 9 | "time" 10 | "unsafe" 11 | 12 | "github.com/kelindar/bitmap" 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | /* 17 | cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz 18 | BenchmarkQueue/u16-rw-8 154 7691836 ns/op 19 B/op 0 allocs/op 19 | BenchmarkQueue/u16-next-8 214 5542922 ns/op 7 B/op 0 allocs/op 20 | BenchmarkQueue/u32-rw-8 152 7743216 ns/op 20 B/op 0 allocs/op 21 | BenchmarkQueue/u32-next-8 212 5616605 ns/op 7 B/op 0 allocs/op 22 | BenchmarkQueue/u64-rw-8 148 8000536 ns/op 20 B/op 0 allocs/op 23 | BenchmarkQueue/u64-next-8 194 6126377 ns/op 7 B/op 0 allocs/op 24 | BenchmarkQueue/str-rw-8 91 12935521 ns/op 33 B/op 0 allocs/op 25 | BenchmarkQueue/str-next-8 98 10901156 ns/op 15 B/op 0 allocs/op 26 | BenchmarkQueue/bool-rw-8 169 6950441 ns/op 18 B/op 0 allocs/op 27 | BenchmarkQueue/bool-next-8 228 5195821 ns/op 6 B/op 0 allocs/op 28 | */ 29 | func BenchmarkQueue(b *testing.B) { 30 | const count = 1000000 31 | 32 | run("u16-rw", b, count, func(buf *Buffer, r *Reader) { 33 | for i := uint32(0); i < count*2; i += 2 { 34 | buf.PutUint16(Put, i, uint16(i)) 35 | } 36 | for r.Seek(buf); r.Next(); { 37 | _ = r.Uint16() 38 | } 39 | }) 40 | 41 | run("u16-next", b, count, func(buf *Buffer, r *Reader) { 42 | for i := uint32(0); i < count; i++ { 43 | buf.PutUint16(Put, i, uint16(i)) 44 | } 45 | for r.Seek(buf); r.Next(); { 46 | _ = r.Uint16() 47 | } 48 | }) 49 | 50 | run("u32-rw", b, count, func(buf *Buffer, r *Reader) { 51 | for i := uint32(0); i < count*2; i += 2 { 52 | buf.PutUint32(Put, i, i) 53 | } 54 | for r.Seek(buf); r.Next(); { 55 | _ = r.Uint32() 56 | } 57 | }) 58 | 59 | run("u32-next", b, count, func(buf *Buffer, r *Reader) { 60 | for i := uint32(0); i < count; i++ { 61 | buf.PutUint32(Put, i, i) 62 | } 63 | for r.Seek(buf); r.Next(); { 64 | _ = r.Uint32() 65 | } 66 | }) 67 | 68 | run("u64-rw", b, count, func(buf *Buffer, r *Reader) { 69 | for i := uint32(0); i < count*2; i += 2 { 70 | buf.PutUint64(Put, i, uint64(i)) 71 | } 72 | for r.Seek(buf); r.Next(); { 73 | _ = r.Uint64() 74 | } 75 | }) 76 | 77 | run("u64-next", b, count, func(buf *Buffer, r *Reader) { 78 | for i := uint32(0); i < count; i++ { 79 | buf.PutUint64(Put, i, uint64(i)) 80 | } 81 | for r.Seek(buf); r.Next(); { 82 | _ = r.Uint64() 83 | } 84 | }) 85 | 86 | run("str-rw", b, count, func(buf *Buffer, r *Reader) { 87 | for i := uint32(0); i < count*2; i += 2 { 88 | buf.PutString(Put, i, "hello world") 89 | } 90 | for r.Seek(buf); r.Next(); { 91 | _ = r.String() 92 | } 93 | }) 94 | 95 | run("str-next", b, count, func(buf *Buffer, r *Reader) { 96 | for i := uint32(0); i < count; i++ { 97 | buf.PutString(Put, i, "hello world") 98 | } 99 | for r.Seek(buf); r.Next(); { 100 | _ = r.String() 101 | } 102 | }) 103 | 104 | run("bool-rw", b, count, func(buf *Buffer, r *Reader) { 105 | for i := uint32(0); i < count*2; i += 2 { 106 | buf.PutBool(i, true) 107 | } 108 | for r.Seek(buf); r.Next(); { 109 | _ = r.Bool() 110 | } 111 | }) 112 | 113 | run("bool-next", b, count, func(buf *Buffer, r *Reader) { 114 | for i := uint32(0); i < count; i++ { 115 | buf.PutBool(i, true) 116 | } 117 | for r.Seek(buf); r.Next(); { 118 | _ = r.Bool() 119 | } 120 | }) 121 | } 122 | 123 | // Run runs a single benchmark 124 | func run(name string, b *testing.B, count int, fn func(buf *Buffer, r *Reader)) { 125 | b.Run(name, func(b *testing.B) { 126 | buf := NewBuffer(count * 20) 127 | r := NewReader() 128 | b.ReportAllocs() 129 | b.ResetTimer() 130 | for n := 0; n < b.N; n++ { 131 | buf.Reset("test") 132 | fn(buf, r) 133 | } 134 | }) 135 | } 136 | 137 | func TestSizeof(t *testing.T) { 138 | assert.Equal(t, 96, int(unsafe.Sizeof(Reader{}))) 139 | assert.Equal(t, 80, int(unsafe.Sizeof(Buffer{}))) 140 | } 141 | 142 | func TestReadWrite(t *testing.T) { 143 | buf := NewBuffer(0) 144 | buf.PutInt16(Put, 10, 100) 145 | buf.PutInt16(Put, 11, 100) 146 | buf.PutInt32(Put, 20, 200) 147 | buf.PutInt32(Put, 21, 200) 148 | buf.PutInt64(Put, 30, 300) 149 | buf.PutInt64(Put, 31, 300) 150 | buf.PutUint16(Put, 40, 400) 151 | buf.PutUint16(Put, 41, 400) 152 | buf.PutUint32(Put, 50, 500) 153 | buf.PutUint32(Put, 51, 500) 154 | buf.PutUint64(Put, 60, 600) 155 | buf.PutUint64(Put, 61, 600) 156 | buf.PutFloat32(Put, 70, 700) 157 | buf.PutFloat32(Put, 71, 700) 158 | buf.PutFloat64(Put, 80, 800) 159 | buf.PutFloat64(Put, 81, 800) 160 | buf.PutString(Put, 90, "900") 161 | buf.PutString(Put, 91, "hello world") 162 | buf.PutBytes(Put, 100, []byte("binary")) 163 | buf.PutBool(110, true) 164 | buf.PutBool(111, false) 165 | buf.PutInt(Put, 120, 1000) 166 | buf.PutUint(Put, 130, 1100) 167 | buf.PutNumber(Put, 140, 12.34) 168 | 169 | // Read values back 170 | r := NewReader() 171 | r.Seek(buf) 172 | assert.True(t, r.Next()) 173 | assert.Equal(t, int16(100), r.Int16()) 174 | assert.True(t, r.Next()) 175 | assert.Equal(t, int16(100), r.Int16()) 176 | assert.True(t, r.Next()) 177 | assert.Equal(t, int32(200), r.Int32()) 178 | assert.True(t, r.Next()) 179 | assert.Equal(t, int32(200), r.Int32()) 180 | assert.True(t, r.Next()) 181 | assert.Equal(t, int64(300), r.Int64()) 182 | assert.True(t, r.Next()) 183 | assert.Equal(t, int64(300), r.Int64()) 184 | assert.True(t, r.Next()) 185 | assert.Equal(t, uint16(400), r.Uint16()) 186 | assert.True(t, r.Next()) 187 | assert.Equal(t, uint16(400), r.Uint16()) 188 | assert.True(t, r.Next()) 189 | assert.Equal(t, uint32(500), r.Uint32()) 190 | assert.True(t, r.Next()) 191 | assert.Equal(t, uint32(500), r.Uint32()) 192 | assert.True(t, r.Next()) 193 | assert.Equal(t, uint64(600), r.Uint64()) 194 | assert.True(t, r.Next()) 195 | assert.Equal(t, uint64(600), r.Uint64()) 196 | assert.True(t, r.Next()) 197 | assert.Equal(t, float32(700), r.Float32()) 198 | assert.True(t, r.Next()) 199 | assert.Equal(t, float32(700), r.Float32()) 200 | assert.True(t, r.Next()) 201 | assert.Equal(t, float64(800), r.Float64()) 202 | assert.True(t, r.Next()) 203 | assert.Equal(t, float64(800), r.Float64()) 204 | assert.True(t, r.Next()) 205 | assert.Equal(t, "900", r.String()) 206 | assert.True(t, r.Next()) 207 | assert.Equal(t, "hello world", r.String()) 208 | assert.True(t, r.Next()) 209 | assert.Equal(t, "binary", string(r.Bytes())) 210 | assert.True(t, r.Next()) 211 | assert.Equal(t, true, r.Bool()) 212 | assert.True(t, r.Next()) 213 | assert.Equal(t, false, r.Bool()) 214 | assert.True(t, r.Next()) 215 | assert.Equal(t, int(1000), r.Int()) 216 | assert.True(t, r.Next()) 217 | assert.Equal(t, uint(1100), r.Uint()) 218 | assert.True(t, r.Next()) 219 | assert.Equal(t, 12.34, r.Number()) 220 | assert.False(t, r.Next()) 221 | } 222 | 223 | func TestBufferClone(t *testing.T) { 224 | buf := NewBuffer(0) 225 | buf.PutInt16(Put, 10, 100) 226 | buf.PutString(Put, 20, "hello") 227 | 228 | cloned := buf.Clone() 229 | assert.EqualValues(t, buf, cloned) 230 | } 231 | 232 | func TestPutNil(t *testing.T) { 233 | buf := NewBuffer(0) 234 | buf.PutAny(PutTrue, 0, nil) 235 | 236 | r := NewReader() 237 | r.Seek(buf) 238 | assert.True(t, r.Next()) 239 | assert.True(t, r.Bool()) 240 | } 241 | 242 | func TestPutTime(t *testing.T) { 243 | buf := NewBuffer(0) 244 | buf.PutAny(Put, 0, time.Unix(0, 0)) 245 | 246 | r := NewReader() 247 | r.Seek(buf) 248 | assert.True(t, r.Next()) 249 | assert.NotEmpty(t, r.Bytes()) 250 | } 251 | 252 | func TestPutBitmap(t *testing.T) { 253 | buf := NewBuffer(0) 254 | buf.PutBitmap(Insert, 0, bitmap.Bitmap{0xff}) 255 | 256 | r := NewReader() 257 | r.Seek(buf) 258 | assert.True(t, r.Next()) 259 | assert.Equal(t, Insert, r.Type) 260 | } 261 | 262 | func TestBufferWriteTo(t *testing.T) { 263 | input := NewBuffer(0) 264 | input.Column = "test" 265 | input.PutInt16(Put, 10, 100) 266 | input.PutString(Put, 20, "hello") 267 | 268 | buffer := bytes.NewBuffer(nil) 269 | n, err := input.WriteTo(buffer) 270 | assert.NoError(t, err) 271 | assert.Equal(t, int64(buffer.Len()), n) 272 | assert.Equal(t, int64(36), n) 273 | 274 | output := NewBuffer(0) 275 | m, err := output.ReadFrom(buffer) 276 | assert.Equal(t, int64(buffer.Len()), m) 277 | assert.Equal(t, input, output) 278 | } 279 | 280 | func TestBufferWriteToFailures(t *testing.T) { 281 | buf := NewBuffer(0) 282 | buf.Column = "test" 283 | buf.PutInt16(Put, 10, 100) 284 | buf.PutString(Put, 20, "hello") 285 | 286 | for size := 0; size < 30; size++ { 287 | output := &limitWriter{Limit: size} 288 | _, err := buf.WriteTo(output) 289 | assert.Error(t, err) 290 | } 291 | } 292 | 293 | func TestBufferReadFromFailures(t *testing.T) { 294 | input := NewBuffer(0) 295 | input.Column = "test" 296 | input.PutInt16(Put, 10, 100) 297 | input.PutString(Put, 20, "hello") 298 | 299 | buffer := bytes.NewBuffer(nil) 300 | n, err := input.WriteTo(buffer) 301 | assert.NoError(t, err) 302 | 303 | for size := 0; size < int(n)-1; size++ { 304 | output := NewBuffer(0) 305 | _, err := output.ReadFrom(bytes.NewReader(buffer.Bytes()[:size])) 306 | assert.Error(t, err) 307 | } 308 | } 309 | 310 | func FuzzBufferString(f *testing.F) { 311 | f.Add(uint32(1), "test") 312 | 313 | f.Fuzz(func(t *testing.T, i uint32, v string) { 314 | buf := NewBuffer(0) 315 | buf.PutString(Put, i, v) 316 | 317 | r := NewReader() 318 | r.Seek(buf) 319 | assert.True(t, r.Next()) 320 | assert.Equal(t, v, r.String()) 321 | }) 322 | } 323 | 324 | func TestOpString(t *testing.T) { 325 | for i := 0; i < 255; i++ { 326 | assert.NotEmpty(t, OpType(i).String()) 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /commit/buffer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "encoding" 8 | "fmt" 9 | "math" 10 | 11 | "github.com/kelindar/bitmap" 12 | ) 13 | 14 | const ( 15 | size0 = 0 // 0 byte in size 16 | size2 = 1 << 4 // 2 bytes in size 17 | size4 = 2 << 4 // 4 bytes in size 18 | size8 = 3 << 4 // 8 bytes in size 19 | isNext = 1 << 7 // is immediate next 20 | isString = 1 << 6 // is variable-size string 21 | ) 22 | 23 | // --------------------------- Operation Type ---------------------------- 24 | 25 | // OpType represents a type of an operation. 26 | type OpType uint8 27 | 28 | // Various update operations supported. 29 | const ( 30 | Delete OpType = 0 // Delete deletes an entire row or a set of rows 31 | Insert OpType = 1 // Insert inserts a new row or a set of rows 32 | PutFalse OpType = 0 // PutFalse is a combination of Put+False for boolean values 33 | PutTrue OpType = 2 // PutTrue is a combination of Put+True for boolean values 34 | Put OpType = 2 // Put stores a value regardless of a previous value 35 | Merge OpType = 3 // Applies a merge function 36 | Skip OpType = 4 // Skips the value 37 | ) 38 | 39 | // String returns a string representation 40 | func (o OpType) String() string { 41 | switch o { 42 | case Delete: 43 | return "delete" 44 | case Insert: 45 | return "insert" 46 | case Put: 47 | return "put" 48 | case Merge: 49 | return "merge" 50 | case Skip: 51 | return "skip" 52 | default: 53 | return "unknown" 54 | } 55 | } 56 | 57 | // --------------------------- Delta log ---------------------------- 58 | 59 | // Buffer represents a buffer of delta operations. 60 | type Buffer struct { 61 | last int32 // The last offset written 62 | chunk Chunk // The current chunk 63 | buffer []byte // The destination buffer 64 | chunks []header // The offsets of chunks 65 | _ [8]byte // padding 66 | Column string // The column for the queue 67 | } 68 | 69 | // header represents a chunk metadata header. 70 | type header struct { 71 | Chunk Chunk // The chunk number 72 | Start uint32 // The offset at which the chunk starts in the buffer 73 | Value uint32 // The previous offset value for delta 74 | } 75 | 76 | // NewBuffer creates a new queue to store individual operations. 77 | func NewBuffer(capacity int) *Buffer { 78 | return &Buffer{ 79 | chunk: math.MaxUint32, 80 | buffer: make([]byte, 0, capacity), 81 | } 82 | } 83 | 84 | // Clone clones the buffer 85 | func (b *Buffer) Clone() *Buffer { 86 | buffer := make([]byte, len(b.buffer)) 87 | copy(buffer, b.buffer) 88 | 89 | chunks := make([]header, 0, len(b.chunks)) 90 | chunks = append(chunks, b.chunks...) 91 | return &Buffer{ 92 | Column: b.Column, 93 | buffer: buffer, 94 | chunks: chunks, 95 | last: b.last, 96 | chunk: b.chunk, 97 | } 98 | } 99 | 100 | // Reset resets the queue so it can be reused. 101 | func (b *Buffer) Reset(column string) { 102 | b.last = 0 103 | b.chunk = math.MaxUint32 104 | b.buffer = b.buffer[:0] 105 | b.chunks = b.chunks[:0] 106 | b.Column = column 107 | } 108 | 109 | // IsEmpty returns whether the buffer is empty or not. 110 | func (b *Buffer) IsEmpty() bool { 111 | return len(b.buffer) == 0 112 | } 113 | 114 | // Range iterates over the chunks present in the buffer 115 | func (b *Buffer) RangeChunks(fn func(chunk Chunk)) { 116 | for _, c := range b.chunks { 117 | fn(c.Chunk) 118 | } 119 | } 120 | 121 | // PutAny appends a supported value onto the buffer. 122 | func (b *Buffer) PutAny(op OpType, idx uint32, value any) error { 123 | switch v := value.(type) { 124 | case uint64: 125 | b.PutUint64(op, idx, v) 126 | case uint32: 127 | b.PutUint32(op, idx, v) 128 | case uint16: 129 | b.PutUint16(op, idx, v) 130 | case uint8: 131 | b.PutUint16(op, idx, uint16(v)) 132 | case int64: 133 | b.PutInt64(op, idx, v) 134 | case int32: 135 | b.PutInt32(op, idx, v) 136 | case int16: 137 | b.PutInt16(op, idx, v) 138 | case int8: 139 | b.PutInt16(op, idx, int16(v)) 140 | case string: 141 | b.PutString(op, idx, v) 142 | case []byte: 143 | b.PutBytes(op, idx, v) 144 | case float32: 145 | b.PutFloat32(op, idx, v) 146 | case float64: 147 | b.PutFloat64(op, idx, v) 148 | case int: 149 | b.PutInt64(op, idx, int64(v)) 150 | case uint: 151 | b.PutUint64(op, idx, uint64(v)) 152 | case bool: 153 | b.PutBool(idx, v) 154 | case nil: 155 | b.PutOperation(op, idx) 156 | case encoding.BinaryMarshaler: 157 | data, err := v.MarshalBinary() 158 | if err == nil { 159 | b.PutBytes(op, idx, data) 160 | } 161 | return err 162 | default: 163 | return fmt.Errorf("column: unsupported type (%T)", value) 164 | } 165 | return nil 166 | } 167 | 168 | // --------------------------- Numbers ---------------------------- 169 | 170 | // PutUint64 appends an uint64 value. 171 | func (b *Buffer) PutUint64(op OpType, idx uint32, value uint64) { 172 | b.writeUint64(op, idx, value) 173 | } 174 | 175 | // PutUint32 appends an uint32 value. 176 | func (b *Buffer) PutUint32(op OpType, idx uint32, value uint32) { 177 | b.writeUint32(op, idx, value) 178 | } 179 | 180 | // PutUint16 appends an uint16 value. 181 | func (b *Buffer) PutUint16(op OpType, idx uint32, value uint16) { 182 | b.writeUint16(op, idx, value) 183 | } 184 | 185 | // PutUint appends a uint64 value. 186 | func (b *Buffer) PutUint(op OpType, idx uint32, value uint) { 187 | b.writeUint64(op, idx, uint64(value)) 188 | } 189 | 190 | // PutInt64 appends an int64 value. 191 | func (b *Buffer) PutInt64(op OpType, idx uint32, value int64) { 192 | b.writeUint64(op, idx, uint64(value)) 193 | } 194 | 195 | // PutInt32 appends an int32 value. 196 | func (b *Buffer) PutInt32(op OpType, idx uint32, value int32) { 197 | b.writeUint32(op, idx, uint32(value)) 198 | } 199 | 200 | // PutInt16 appends an int16 value. 201 | func (b *Buffer) PutInt16(op OpType, idx uint32, value int16) { 202 | b.writeUint16(op, idx, uint16(value)) 203 | } 204 | 205 | // PutInt appends a int64 value. 206 | func (b *Buffer) PutInt(op OpType, idx uint32, value int) { 207 | b.writeUint64(op, idx, uint64(value)) 208 | } 209 | 210 | // PutFloat64 appends a float64 value. 211 | func (b *Buffer) PutFloat64(op OpType, idx uint32, value float64) { 212 | b.writeUint64(op, idx, math.Float64bits(value)) 213 | } 214 | 215 | // PutFloat32 appends an int32 value. 216 | func (b *Buffer) PutFloat32(op OpType, idx uint32, value float32) { 217 | b.writeUint32(op, idx, math.Float32bits(value)) 218 | } 219 | 220 | // PutNumber appends a float64 value. 221 | func (b *Buffer) PutNumber(op OpType, idx uint32, value float64) { 222 | b.writeUint64(op, idx, math.Float64bits(value)) 223 | } 224 | 225 | // --------------------------- Others ---------------------------- 226 | 227 | // PutOperation appends an operation type without a value. 228 | func (b *Buffer) PutOperation(op OpType, idx uint32) { 229 | delta := b.writeChunk(idx) 230 | switch delta { 231 | case 1: 232 | b.buffer = append(b.buffer, byte(op)|size0|isNext) 233 | default: 234 | b.buffer = append(b.buffer, byte(op)|size0) 235 | b.writeOffset(uint32(delta)) 236 | } 237 | } 238 | 239 | // PutBool appends a boolean value. 240 | func (b *Buffer) PutBool(idx uint32, value bool) { 241 | 242 | // let the compiler do its magic: https://github.com/golang/go/issues/6011 243 | op := PutFalse 244 | if value { 245 | op = PutTrue 246 | } 247 | 248 | b.PutOperation(op, idx) 249 | } 250 | 251 | // PutBytes appends a binary value. 252 | func (b *Buffer) PutBytes(op OpType, idx uint32, value []byte) { 253 | delta := b.writeChunk(idx) 254 | length := len(value) // max 65K slices 255 | switch delta { 256 | case 1: 257 | b.buffer = append(b.buffer, 258 | byte(op)|size2|isString|isNext, 259 | byte(length>>8), byte(length), 260 | ) 261 | b.buffer = append(b.buffer, value...) 262 | default: 263 | b.buffer = append(b.buffer, 264 | byte(op)|size2|isString, 265 | byte(length>>8), byte(length), 266 | ) 267 | 268 | // Write the the data itself and the offset 269 | b.buffer = append(b.buffer, value...) 270 | b.writeOffset(uint32(delta)) 271 | } 272 | } 273 | 274 | // PutString appends a string value. 275 | func (b *Buffer) PutString(op OpType, idx uint32, value string) { 276 | b.PutBytes(op, idx, toBytes(value)) 277 | } 278 | 279 | // PutBitmap iterates over the bitmap values and appends an operation for each bit set to one 280 | func (b *Buffer) PutBitmap(op OpType, chunk Chunk, value bitmap.Bitmap) { 281 | chunk.Range(value, func(idx uint32) { 282 | b.PutOperation(op, idx) 283 | }) 284 | } 285 | 286 | // writeUint64 appends a uint64 value. 287 | func (b *Buffer) writeUint64(op OpType, idx uint32, value uint64) { 288 | delta := b.writeChunk(idx) 289 | switch delta { 290 | case 1: 291 | b.buffer = append(b.buffer, 292 | byte(op)|size8|isNext, 293 | byte(value>>56), byte(value>>48), byte(value>>40), byte(value>>32), 294 | byte(value>>24), byte(value>>16), byte(value>>8), byte(value), 295 | ) 296 | default: 297 | b.buffer = append(b.buffer, 298 | byte(op)|size8, 299 | byte(value>>56), byte(value>>48), byte(value>>40), byte(value>>32), 300 | byte(value>>24), byte(value>>16), byte(value>>8), byte(value), 301 | ) 302 | b.writeOffset(uint32(delta)) 303 | } 304 | } 305 | 306 | // writeUint32 appends a uint32 value. 307 | func (b *Buffer) writeUint32(op OpType, idx uint32, value uint32) { 308 | delta := b.writeChunk(idx) 309 | switch delta { 310 | case 1: 311 | b.buffer = append(b.buffer, 312 | byte(op)|size4|isNext, 313 | byte(value>>24), byte(value>>16), byte(value>>8), byte(value), 314 | ) 315 | default: 316 | b.buffer = append(b.buffer, 317 | byte(op)|size4, 318 | byte(value>>24), byte(value>>16), byte(value>>8), byte(value), 319 | ) 320 | b.writeOffset(uint32(delta)) 321 | } 322 | } 323 | 324 | // writeUint16 appends a uint16 value. 325 | func (b *Buffer) writeUint16(op OpType, idx uint32, value uint16) { 326 | delta := b.writeChunk(idx) 327 | switch delta { 328 | case 1: 329 | b.buffer = append(b.buffer, byte(op)|size2|isNext, byte(value>>8), byte(value)) 330 | default: 331 | b.buffer = append(b.buffer, byte(op)|size2, byte(value>>8), byte(value)) 332 | b.writeOffset(uint32(delta)) 333 | } 334 | } 335 | 336 | // writeOffset writes the offset at the current head. 337 | func (b *Buffer) writeOffset(delta uint32) { 338 | for delta >= 0x80 { 339 | b.buffer = append(b.buffer, byte(delta)|0x80) 340 | delta >>= 7 341 | } 342 | 343 | b.buffer = append(b.buffer, byte(delta)) 344 | } 345 | 346 | // writeChunk writes a chunk if changed and returns the delta 347 | func (b *Buffer) writeChunk(idx uint32) int32 { 348 | if chunk := Chunk(idx >> chunkShift); b.chunk != chunk { 349 | b.chunk = chunk 350 | b.chunks = append(b.chunks, header{ 351 | Chunk: Chunk(chunk), 352 | Start: uint32(len(b.buffer)), 353 | Value: uint32(b.last), 354 | }) 355 | } 356 | 357 | delta := int32(idx) - b.last 358 | b.last = int32(idx) 359 | return delta 360 | } 361 | -------------------------------------------------------------------------------- /column_strings.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "fmt" 8 | "math" 9 | "sync" 10 | 11 | "github.com/kelindar/bitmap" 12 | "github.com/kelindar/column/commit" 13 | "github.com/kelindar/intmap" 14 | "github.com/zeebo/xxh3" 15 | ) 16 | 17 | // --------------------------- Enum ---------------------------- 18 | 19 | var _ Textual = new(columnEnum) 20 | 21 | // columnEnum represents a string column 22 | type columnEnum struct { 23 | chunks[uint32] 24 | seek *intmap.Sync // The hash->location table 25 | data []string // The string data 26 | } 27 | 28 | // makeEnum creates a new column 29 | func makeEnum() Column { 30 | return &columnEnum{ 31 | chunks: make(chunks[uint32], 0, 4), 32 | seek: intmap.NewSync(64, .95), 33 | data: make([]string, 0, 64), 34 | } 35 | } 36 | 37 | // Apply applies a set of operations to the column. 38 | func (c *columnEnum) Apply(chunk commit.Chunk, r *commit.Reader) { 39 | fill, locs := c.chunkAt(chunk) 40 | for r.Next() { 41 | offset := r.IndexAtChunk() 42 | switch r.Type { 43 | case commit.Put: 44 | fill[offset>>6] |= 1 << (offset & 0x3f) 45 | locs[offset] = c.findOrAdd(r.Bytes()) 46 | case commit.Delete: 47 | fill.Remove(offset) 48 | // TODO: remove unused strings, need some reference counting for that 49 | // and can proably be done during vacuum() instead 50 | } 51 | } 52 | } 53 | 54 | // Search for the string or adds it and returns the offset 55 | func (c *columnEnum) findOrAdd(v []byte) uint32 { 56 | target := uint32(xxh3.Hash(v)) 57 | at, _ := c.seek.LoadOrStore(target, func() uint32 { 58 | c.data = append(c.data, string(v)) 59 | return uint32(len(c.data)) - 1 60 | }) 61 | return at 62 | } 63 | 64 | // readAt reads a string at a location 65 | func (c *columnEnum) readAt(at uint32) string { 66 | return c.data[at] 67 | } 68 | 69 | // Value retrieves a value at a specified index 70 | func (c *columnEnum) Value(idx uint32) (v interface{}, ok bool) { 71 | return c.LoadString(idx) 72 | } 73 | 74 | // LoadString retrieves a value at a specified index 75 | func (c *columnEnum) LoadString(idx uint32) (v string, ok bool) { 76 | chunk := commit.ChunkAt(idx) 77 | index := idx - chunk.Min() 78 | if int(chunk) < len(c.chunks) && c.chunks[chunk].fill.Contains(index) { 79 | v, ok = c.readAt(c.chunks[chunk].data[index]), true 80 | } 81 | return 82 | } 83 | 84 | // FilterString filters down the values based on the specified predicate. The column for 85 | // this filter must be a string. 86 | func (c *columnEnum) FilterString(chunk commit.Chunk, index bitmap.Bitmap, predicate func(v string) bool) { 87 | if int(chunk) >= len(c.chunks) { 88 | return 89 | } 90 | 91 | fill, locs := c.chunkAt(chunk) 92 | cache := struct { 93 | index uint32 // Last seen offset 94 | value bool // Last evaluated predicate 95 | }{ 96 | index: math.MaxUint32, 97 | value: false, 98 | } 99 | 100 | // Do a quick ellimination of elements which are NOT contained in this column, this 101 | // allows us not to check contains during the filter itself 102 | index.And(fill) 103 | 104 | // Filters down the strings, if strings repeat we avoid reading every time by 105 | // caching the last seen index/value combination. 106 | index.Filter(func(idx uint32) bool { 107 | if at := locs[idx]; at != cache.index { 108 | cache.index = at 109 | cache.value = predicate(c.readAt(at)) 110 | return cache.value 111 | } 112 | 113 | // The value is cached, avoid evaluating it 114 | return cache.value 115 | }) 116 | } 117 | 118 | // Contains checks whether the column has a value at a specified index. 119 | func (c *columnEnum) Contains(idx uint32) bool { 120 | chunk := commit.ChunkAt(idx) 121 | return c.chunks[chunk].fill.Contains(idx - chunk.Min()) 122 | } 123 | 124 | // Snapshot writes the entire column into the specified destination buffer 125 | func (c *columnEnum) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 126 | fill, locs := c.chunkAt(chunk) 127 | fill.Range(func(idx uint32) { 128 | dst.PutString(commit.Put, idx, c.readAt(locs[idx])) 129 | }) 130 | } 131 | 132 | // rwEnum represents read-write accessor for enum 133 | type rwEnum struct { 134 | rdString[*columnEnum] 135 | writer *commit.Buffer 136 | } 137 | 138 | // Set sets the value at the current transaction cursor 139 | func (s rwEnum) Set(value string) { 140 | s.writer.PutString(commit.Put, *s.cursor, value) 141 | } 142 | 143 | // Enum returns a enumerable column accessor 144 | func (txn *Txn) Enum(columnName string) rwEnum { 145 | return rwEnum{ 146 | rdString: readStringOf[*columnEnum](txn, columnName), 147 | writer: txn.bufferFor(columnName), 148 | } 149 | } 150 | 151 | // --------------------------- String ---------------------------- 152 | 153 | var _ Textual = new(columnString) 154 | 155 | // columnString represents a string column 156 | type columnString struct { 157 | chunks[string] 158 | option[string] 159 | } 160 | 161 | // makeString creates a new string column 162 | func makeStrings(opts ...func(*option[string])) Column { 163 | return &columnString{ 164 | chunks: make(chunks[string], 0, 4), 165 | option: configure(opts, option[string]{ 166 | Merge: func(_, delta string) string { return delta }, 167 | }), 168 | } 169 | } 170 | 171 | // Apply applies a set of operations to the column. 172 | func (c *columnString) Apply(chunk commit.Chunk, r *commit.Reader) { 173 | fill, data := c.chunkAt(chunk) 174 | from := chunk.Min() 175 | 176 | // Update the values of the column, for this one we can only process stores 177 | for r.Next() { 178 | offset := r.Offset - int32(from) 179 | switch r.Type { 180 | case commit.Put: 181 | fill[offset>>6] |= 1 << (offset & 0x3f) 182 | data[offset] = string(r.Bytes()) 183 | case commit.Merge: 184 | fill[offset>>6] |= 1 << (offset & 0x3f) 185 | data[offset] = r.SwapString(c.Merge(data[offset], r.String())) 186 | case commit.Delete: 187 | fill.Remove(uint32(offset)) 188 | } 189 | } 190 | } 191 | 192 | // Value retrieves a value at a specified index 193 | func (c *columnString) Value(idx uint32) (v interface{}, ok bool) { 194 | return c.LoadString(idx) 195 | } 196 | 197 | // Contains checks whether the column has a value at a specified index. 198 | func (c *columnString) Contains(idx uint32) bool { 199 | chunk := commit.ChunkAt(idx) 200 | index := idx - chunk.Min() 201 | return c.chunks[chunk].fill.Contains(index) 202 | } 203 | 204 | // LoadString retrieves a value at a specified index 205 | func (c *columnString) LoadString(idx uint32) (v string, ok bool) { 206 | chunk := commit.ChunkAt(idx) 207 | index := idx - chunk.Min() 208 | 209 | if int(chunk) < len(c.chunks) && c.chunks[chunk].fill.Contains(index) { 210 | v, ok = c.chunks[chunk].data[index], true 211 | } 212 | return 213 | } 214 | 215 | // FilterString filters down the values based on the specified predicate. The column for 216 | // this filter must be a string. 217 | func (c *columnString) FilterString(chunk commit.Chunk, index bitmap.Bitmap, predicate func(v string) bool) { 218 | if int(chunk) < len(c.chunks) { 219 | fill, data := c.chunkAt(chunk) 220 | index.And(fill) 221 | index.Filter(func(idx uint32) bool { 222 | return predicate(data[idx]) 223 | }) 224 | } 225 | } 226 | 227 | // Snapshot writes the entire column into the specified destination buffer 228 | func (c *columnString) Snapshot(chunk commit.Chunk, dst *commit.Buffer) { 229 | fill, data := c.chunkAt(chunk) 230 | fill.Range(func(x uint32) { 231 | dst.PutString(commit.Put, chunk.Min()+x, data[x]) 232 | }) 233 | } 234 | 235 | // rwString represents read-write accessor for strings 236 | type rwString struct { 237 | rdString[*columnString] 238 | writer *commit.Buffer 239 | } 240 | 241 | // Set sets the value at the current transaction cursor 242 | func (s rwString) Set(value string) { 243 | s.writer.PutString(commit.Put, *s.cursor, value) 244 | } 245 | 246 | // Merge merges the value at the current transaction cursor 247 | func (s rwString) Merge(value string) { 248 | s.writer.PutString(commit.Merge, *s.cursor, value) 249 | } 250 | 251 | // String returns a string column accessor 252 | func (txn *Txn) String(columnName string) rwString { 253 | return rwString{ 254 | rdString: readStringOf[*columnString](txn, columnName), 255 | writer: txn.bufferFor(columnName), 256 | } 257 | } 258 | 259 | // --------------------------- Key ---------------------------- 260 | 261 | // columnKey represents the primary key column implementation 262 | type columnKey struct { 263 | columnString 264 | name string // Name of the column 265 | lock sync.RWMutex // Lock to protect the lookup table 266 | seek map[string]uint32 // Lookup table for O(1) index seek 267 | } 268 | 269 | // makeKey creates a new primary key column 270 | func makeKey() Column { 271 | return &columnKey{ 272 | seek: make(map[string]uint32, 64), 273 | columnString: columnString{ 274 | chunks: make(chunks[string], 0, 4), 275 | }, 276 | } 277 | } 278 | 279 | // Apply applies a set of operations to the column. 280 | func (c *columnKey) Apply(chunk commit.Chunk, r *commit.Reader) { 281 | fill, data := c.chunkAt(chunk) 282 | from := chunk.Min() 283 | 284 | for r.Next() { 285 | offset := r.Offset - int32(from) 286 | switch r.Type { 287 | case commit.Put: 288 | value := string(r.Bytes()) 289 | 290 | fill[offset>>6] |= 1 << (offset & 0x3f) 291 | data[offset] = value 292 | c.lock.Lock() 293 | c.seek[value] = uint32(r.Offset) 294 | c.lock.Unlock() 295 | 296 | case commit.Delete: 297 | fill.Remove(uint32(offset)) 298 | c.lock.Lock() 299 | delete(c.seek, string(data[offset])) 300 | c.lock.Unlock() 301 | } 302 | } 303 | } 304 | 305 | // OffsetOf returns the offset for a particular value 306 | func (c *columnKey) OffsetOf(v string) (uint32, bool) { 307 | c.lock.RLock() 308 | idx, ok := c.seek[v] 309 | c.lock.RUnlock() 310 | return idx, ok 311 | } 312 | 313 | // rwKey represents read-write accessor for primary keys. 314 | type rwKey struct { 315 | cursor *uint32 316 | writer *commit.Buffer 317 | reader *columnKey 318 | } 319 | 320 | // Set sets the value at the current transaction index 321 | func (s rwKey) Set(value string) error { 322 | if _, ok := s.reader.OffsetOf(value); !ok { 323 | s.writer.PutString(commit.Put, *s.cursor, value) 324 | return nil 325 | } 326 | 327 | return fmt.Errorf("column: unable to set duplicate key '%s'", value) 328 | } 329 | 330 | // Get loads the value at the current transaction index 331 | func (s rwKey) Get() (string, bool) { 332 | return s.reader.LoadString(*s.cursor) 333 | } 334 | 335 | // Enum returns a enumerable column accessor 336 | func (txn *Txn) Key() rwKey { 337 | if txn.owner.pk == nil { 338 | panic(fmt.Errorf("column: primary key column does not exist")) 339 | } 340 | 341 | return rwKey{ 342 | cursor: &txn.cursor, 343 | writer: txn.bufferFor(txn.owner.pk.name), 344 | reader: txn.owner.pk, 345 | } 346 | } 347 | 348 | // --------------------------- Reader ---------------------------- 349 | 350 | // rdString represents a read-only accessor for strings 351 | type rdString[T Textual] reader[T] 352 | 353 | // Get loads the value at the current transaction cursor 354 | func (s rdString[T]) Get() (string, bool) { 355 | return s.reader.LoadString(*s.cursor) 356 | } 357 | 358 | // readStringOf creates a new string reader 359 | func readStringOf[T Textual](txn *Txn, columnName string) rdString[T] { 360 | return rdString[T](readerFor[T](txn, columnName)) 361 | } 362 | -------------------------------------------------------------------------------- /commit/reader.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package commit 5 | 6 | import ( 7 | "encoding/binary" 8 | "math" 9 | "unsafe" 10 | ) 11 | 12 | // Reader represnts a commit log reader (iterator). 13 | type Reader struct { 14 | Type OpType // The current operation type 15 | i0, i1 int // The value start and end 16 | buffer []byte // The log slice 17 | Offset int32 // The current offset 18 | last int // The read position 19 | start int32 // The start offset 20 | x0, x1 uint32 // The lower and upper bounds of the underlying buffer 21 | headString int // The starting position of a string value 22 | parent *Buffer // The parent buffer 23 | } 24 | 25 | // NewReader creates a new reader for a commit log. 26 | func NewReader() *Reader { 27 | return &Reader{} 28 | } 29 | 30 | // Seek resets the reader so it can be reused. 31 | func (r *Reader) Seek(b *Buffer) { 32 | r.parent = b 33 | r.use(b.buffer) 34 | } 35 | 36 | // Rewind rewinds the reader back to zero. 37 | func (r *Reader) Rewind() { 38 | r.use(r.buffer) 39 | r.Offset = r.start 40 | } 41 | 42 | // use sets the buffer and resets the reader. 43 | func (r *Reader) use(buffer []byte) { 44 | r.buffer = buffer 45 | r.headString = 0 46 | r.last = 0 47 | r.i0 = 0 48 | r.i1 = 0 49 | r.Offset = 0 50 | r.Type = Put 51 | } 52 | 53 | // --------------------------- Value Read ---------------------------- 54 | 55 | // Int16 reads a uint16 value. 56 | func (r *Reader) Int16() int16 { 57 | return int16(binary.BigEndian.Uint16(r.buffer[r.i0:r.i1])) 58 | } 59 | 60 | // Int32 reads a uint32 value. 61 | func (r *Reader) Int32() int32 { 62 | return int32(binary.BigEndian.Uint32(r.buffer[r.i0:r.i1])) 63 | } 64 | 65 | // Int64 reads a uint64 value. 66 | func (r *Reader) Int64() int64 { 67 | return int64(binary.BigEndian.Uint64(r.buffer[r.i0:r.i1])) 68 | } 69 | 70 | // Uint16 reads a uint16 value. 71 | func (r *Reader) Uint16() uint16 { 72 | return binary.BigEndian.Uint16(r.buffer[r.i0:r.i1]) 73 | } 74 | 75 | // Uint32 reads a uint32 value. 76 | func (r *Reader) Uint32() uint32 { 77 | return binary.BigEndian.Uint32(r.buffer[r.i0:r.i1]) 78 | } 79 | 80 | // Uint64 reads a uint64 value. 81 | func (r *Reader) Uint64() uint64 { 82 | return binary.BigEndian.Uint64(r.buffer[r.i0:r.i1]) 83 | } 84 | 85 | // Float32 reads a float32 value. 86 | func (r *Reader) Float32() float32 { 87 | return math.Float32frombits(binary.BigEndian.Uint32(r.buffer[r.i0:r.i1])) 88 | } 89 | 90 | // Float64 reads a float64 value. 91 | func (r *Reader) Float64() float64 { 92 | return math.Float64frombits(binary.BigEndian.Uint64(r.buffer[r.i0:r.i1])) 93 | } 94 | 95 | // Number reads a float64 value. This is used for codegen, equivalent to Float64(). 96 | func (r *Reader) Number() float64 { 97 | return r.Float64() 98 | } 99 | 100 | // Bytes reads a binary value. 101 | func (r *Reader) Bytes() []byte { 102 | return r.buffer[r.i0:r.i1] 103 | } 104 | 105 | // --------------------------- Reader Interface ---------------------------- 106 | 107 | // Index returns the current index of the reader. 108 | func (r *Reader) Index() uint32 { 109 | return uint32(r.Offset) 110 | } 111 | 112 | // IndexAtChunk returns the current index assuming chunk starts at 0. 113 | func (r *Reader) IndexAtChunk() uint32 { 114 | return uint32(r.Offset) - ((uint32(r.Offset) >> chunkShift) << chunkShift) 115 | } 116 | 117 | // Int reads a int value of any size. 118 | func (r *Reader) Int() int { 119 | return int(r.Uint()) 120 | } 121 | 122 | // Uint reads a uint value of any size. 123 | func (r *Reader) Uint() uint { 124 | switch r.i1 - r.i0 { 125 | case 2: 126 | return uint(binary.BigEndian.Uint16(r.buffer[r.i0:r.i1])) 127 | case 4: 128 | return uint(binary.BigEndian.Uint32(r.buffer[r.i0:r.i1])) 129 | case 8: 130 | return uint(binary.BigEndian.Uint64(r.buffer[r.i0:r.i1])) 131 | default: 132 | panic("column: unable to read, unsupported integer size") 133 | } 134 | } 135 | 136 | // Float reads a floating-point value of any size. 137 | func (r *Reader) Float() float64 { 138 | switch r.i1 - r.i0 { 139 | case 4: 140 | return float64(r.Float32()) 141 | case 8: 142 | return r.Float64() 143 | default: 144 | panic("column: unable to read, unsupported float size") 145 | } 146 | } 147 | 148 | // String reads a string value. 149 | func (r *Reader) String() string { 150 | b := r.buffer[r.i0:r.i1] 151 | return *(*string)(unsafe.Pointer(&b)) 152 | } 153 | 154 | // Bool reads a boolean value. 155 | func (r *Reader) Bool() bool { 156 | return r.Type == PutTrue 157 | } 158 | 159 | // IsUpsert returns true if the current operation is an insert or update 160 | func (r *Reader) IsUpsert() bool { 161 | return r.Type == Put 162 | } 163 | 164 | // IsDelete returns true if the current operation is a deletion 165 | func (r *Reader) IsDelete() bool { 166 | return r.Type == Delete 167 | } 168 | 169 | // --------------------------- Value Swap ---------------------------- 170 | 171 | // SwapInt16 swaps a uint16 value with a new one. 172 | func (r *Reader) SwapInt16(v int16) int16 { 173 | binary.BigEndian.PutUint16(r.buffer[r.i0:r.i1], uint16(v)) 174 | r.writeSwap() 175 | return v 176 | } 177 | 178 | // SwapInt32 swaps a uint32 value with a new one. 179 | func (r *Reader) SwapInt32(v int32) int32 { 180 | binary.BigEndian.PutUint32(r.buffer[r.i0:r.i1], uint32(v)) 181 | r.writeSwap() 182 | return v 183 | } 184 | 185 | // SwapInt64 swaps a uint64 value with a new one. 186 | func (r *Reader) SwapInt64(v int64) int64 { 187 | binary.BigEndian.PutUint64(r.buffer[r.i0:r.i1], uint64(v)) 188 | r.writeSwap() 189 | return v 190 | } 191 | 192 | // SwapInt swaps a uint64 value with a new one. 193 | func (r *Reader) SwapInt(v int) int { 194 | binary.BigEndian.PutUint64(r.buffer[r.i0:r.i1], uint64(v)) 195 | r.writeSwap() 196 | return v 197 | } 198 | 199 | // SwapUint16 swaps a uint16 value with a new one. 200 | func (r *Reader) SwapUint16(v uint16) uint16 { 201 | binary.BigEndian.PutUint16(r.buffer[r.i0:r.i1], v) 202 | r.writeSwap() 203 | return v 204 | } 205 | 206 | // SwapUint32 swaps a uint32 value with a new one. 207 | func (r *Reader) SwapUint32(v uint32) uint32 { 208 | binary.BigEndian.PutUint32(r.buffer[r.i0:r.i1], v) 209 | r.writeSwap() 210 | return v 211 | } 212 | 213 | // SwapUint64 swaps a uint64 value with a new one. 214 | func (r *Reader) SwapUint64(v uint64) uint64 { 215 | binary.BigEndian.PutUint64(r.buffer[r.i0:r.i1], v) 216 | r.writeSwap() 217 | return v 218 | } 219 | 220 | // SwapUint swaps a uint64 value with a new one. 221 | func (r *Reader) SwapUint(v uint) uint { 222 | binary.BigEndian.PutUint64(r.buffer[r.i0:r.i1], uint64(v)) 223 | r.writeSwap() 224 | return v 225 | } 226 | 227 | // SwapFloat32 swaps a float32 value with a new one. 228 | func (r *Reader) SwapFloat32(v float32) float32 { 229 | binary.BigEndian.PutUint32(r.buffer[r.i0:r.i1], math.Float32bits(v)) 230 | r.writeSwap() 231 | return v 232 | } 233 | 234 | // SwapFloat64 swaps a float64 value with a new one. 235 | func (r *Reader) SwapFloat64(v float64) float64 { 236 | binary.BigEndian.PutUint64(r.buffer[r.i0:r.i1], math.Float64bits(v)) 237 | r.writeSwap() 238 | return v 239 | } 240 | 241 | // SwapBool swaps a boolean value with a new one. 242 | func (r *Reader) SwapBool(b bool) bool { 243 | r.buffer[r.i0] = 0 244 | if b { 245 | r.buffer[r.i0] = 1 246 | } 247 | r.writeSwap() 248 | return b 249 | } 250 | 251 | // SwapString swaps a string value with a new one. 252 | func (r *Reader) SwapString(v string) string { 253 | r.SwapBytes(toBytes(v)) 254 | return v 255 | } 256 | 257 | // SwapBytes swaps a binary value with a new one. 258 | func (r *Reader) SwapBytes(v []byte) []byte { 259 | if (r.i1 - r.i0) == len(v) { 260 | copy(r.buffer[r.i0:r.i1], v) 261 | r.buffer[r.headString] &= 0xf0 262 | r.buffer[r.headString] |= byte(Put) 263 | return v 264 | } 265 | 266 | // If the value we write is of different size, we append a new value 267 | // to the end of the underlying buffer. In doing so, we may lose our 268 | // existing slice due to re-allocation. Hence, we reslice. 269 | r.parent.PutBytes(Put, r.Index(), v) 270 | r.buffer = r.parent.buffer[r.x0:r.x1] 271 | r.buffer[r.headString] &= 0xf0 272 | r.buffer[r.headString] |= byte(Skip) 273 | return v 274 | } 275 | 276 | // writeSwap marks the current value to be a store (only for fixed length) 277 | func (r *Reader) writeSwap() { 278 | r.buffer[r.i0-1] &= 0xf0 279 | r.buffer[r.i0-1] |= byte(Put) 280 | } 281 | 282 | // --------------------------- Chunk Iterator ---------------------------- 283 | 284 | // Range iterates over parts of the buffer which match the specified chunk. 285 | func (r *Reader) Range(buf *Buffer, chunk Chunk, fn func(*Reader)) { 286 | for i, c := range buf.chunks { 287 | if c.Chunk != chunk { 288 | continue // Not the right chunk, skip it 289 | } 290 | 291 | // Find the next offset 292 | r.x0 = uint32(c.Start) 293 | r.x1 = uint32(len(buf.buffer)) 294 | if len(buf.chunks) > i+1 { 295 | r.x1 = uint32(buf.chunks[i+1].Start) 296 | } 297 | 298 | // Set the reader to the subset buffer and call the delegate 299 | r.use(buf.buffer[r.x0:r.x1]) 300 | r.parent = buf 301 | r.Offset = int32(c.Value) 302 | r.start = int32(c.Value) 303 | fn(r) 304 | } 305 | } 306 | 307 | // --------------------------- Next Iterator ---------------------------- 308 | 309 | // Next reads the current operation and returns false if there is no more 310 | // operations in the log. 311 | func (r *Reader) Next() bool { 312 | if r.last >= len(r.buffer) { 313 | return false 314 | } 315 | 316 | header := r.buffer[r.last] 317 | switch header & 0xc0 { 318 | 319 | // If this is a variable-size value but not a next neighbour, read the 320 | // string and its offset. 321 | case isString: 322 | r.headString = r.last 323 | r.readString(header) 324 | r.readOffset() 325 | return true 326 | 327 | // If this is both a variable-size value and a next neighbour, read the 328 | // string and skip the offset. 329 | case isNext | isString: 330 | r.headString = r.last 331 | r.readString(header) 332 | r.Offset++ 333 | return true 334 | 335 | // If the first bit is set, this means that the delta is one and we 336 | // can skip reading the actual offset. (special case) 337 | case isNext: 338 | r.readFixed(header) 339 | r.Offset++ 340 | return true 341 | 342 | // If it's not a string nor it is an immediate neighbor, we need to read 343 | // the full offset. 344 | default: 345 | r.readFixed(header) 346 | r.readOffset() 347 | return true 348 | } 349 | } 350 | 351 | // readOffset reads the signed variable-size integer at the current tail. While 352 | // this is a signed integer, it is encoded as a variable-size unsigned integer. 353 | // This would lead to negative values not being packed well, but given the 354 | // rarity of negative values in the data, this is acceptable. 355 | func (r *Reader) readOffset() { 356 | b := uint32(r.buffer[r.last]) 357 | if b < 0x80 { 358 | r.last++ 359 | r.Offset += int32(b) 360 | return 361 | } 362 | 363 | x := b & 0x7f 364 | b = uint32(r.buffer[r.last+1]) 365 | if b < 0x80 { 366 | r.last += 2 367 | r.Offset += int32(x | (b << 7)) 368 | return 369 | } 370 | 371 | x |= (b & 0x7f) << 7 372 | b = uint32(r.buffer[r.last+2]) 373 | if b < 0x80 { 374 | r.last += 3 375 | r.Offset += int32(x | (b << 14)) 376 | return 377 | } 378 | 379 | x |= (b & 0x7f) << 14 380 | b = uint32(r.buffer[r.last+3]) 381 | if b < 0x80 { 382 | r.last += 4 383 | r.Offset += int32(x | (b << 21)) 384 | return 385 | } 386 | 387 | x |= (b & 0x7f) << 21 388 | b = uint32(r.buffer[r.last+4]) 389 | if b < 0x80 { 390 | r.last += 5 391 | r.Offset += int32(x | (b << 28)) 392 | return 393 | } 394 | } 395 | 396 | // readFixed reads the fixed-size value at the current position. 397 | func (r *Reader) readFixed(v byte) { 398 | size := int(1 << (v >> 4 & 0b11) & 0b1110) 399 | r.last++ 400 | r.i0 = r.last 401 | r.last += size 402 | r.i1 = r.last 403 | r.Type = OpType(v & 0x0f) 404 | } 405 | 406 | // readString reads the operation type and the value at the current position. 407 | func (r *Reader) readString(v byte) { 408 | size := int(r.buffer[r.last+2]) | int(r.buffer[r.last+1])<<8 409 | r.last += 3 410 | r.i0 = r.last 411 | r.last += size 412 | r.i1 = r.last 413 | r.Type = OpType(v & 0x0f) 414 | } 415 | -------------------------------------------------------------------------------- /snapshot_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) Roman Atachiants and contributors. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for details. 3 | 4 | package column 5 | 6 | import ( 7 | "bytes" 8 | "context" 9 | "fmt" 10 | "io" 11 | "math" 12 | "math/rand" 13 | "os" 14 | "runtime" 15 | "sync" 16 | "sync/atomic" 17 | "testing" 18 | 19 | "github.com/kelindar/async" 20 | "github.com/kelindar/column/commit" 21 | "github.com/klauspost/compress/s2" 22 | "github.com/stretchr/testify/assert" 23 | ) 24 | 25 | /* 26 | cpu: Intel(R) Core(TM) i7-9700K CPU @ 3.60GHz 27 | BenchmarkSave/write-state-8 10 108239410 ns/op 1250.99 MB/s 41891580 B/op 817 allocs/op 28 | BenchmarkSave/read-state-8 22 55612727 ns/op 2434.82 MB/s 140954620 B/op 3247 allocs/op 29 | */ 30 | func BenchmarkSave(b *testing.B) { 31 | b.Run("write-state", func(b *testing.B) { 32 | output := bytes.NewBuffer(nil) 33 | input := loadPlayers(1e6) 34 | 35 | runtime.GC() 36 | b.ReportAllocs() 37 | b.ResetTimer() 38 | for n := 0; n < b.N; n++ { 39 | output.Reset() 40 | n, _ := input.writeState(output) 41 | b.SetBytes(n) 42 | } 43 | }) 44 | 45 | b.Run("read-state", func(b *testing.B) { 46 | buffer := bytes.NewBuffer(nil) 47 | output := NewCollection() 48 | input := loadPlayers(1e6) 49 | input.writeState(buffer) 50 | 51 | runtime.GC() 52 | b.ReportAllocs() 53 | b.ResetTimer() 54 | for n := 0; n < b.N; n++ { 55 | output.readState(bytes.NewBuffer(buffer.Bytes())) 56 | b.SetBytes(int64(buffer.Len())) 57 | } 58 | }) 59 | } 60 | 61 | // --------------------------- Streaming ---------------------------- 62 | 63 | // Test replication many times 64 | func TestReplicate(t *testing.T) { 65 | for x := 0; x < 20; x++ { 66 | rand.Seed(int64(x)) 67 | runReplication(t, 10000, 50, runtime.NumCPU()) 68 | } 69 | } 70 | 71 | // runReplication runs a concurrent replication test 72 | func runReplication(t *testing.T, updates, inserts, concurrency int) { 73 | t.Run(fmt.Sprintf("replicate-%v-%v", updates, inserts), func(t *testing.T) { 74 | writer := make(commit.Channel, 10) 75 | object := map[string]interface{}{ 76 | "float64": float64(0), 77 | "int32": int32(0), 78 | "string": "", 79 | } 80 | 81 | // Create a primary 82 | primary := NewCollection(Options{ 83 | Capacity: inserts, 84 | Writer: &writer, 85 | }) 86 | // Replica with the same schema 87 | replica := NewCollection(Options{ 88 | Capacity: inserts, 89 | }) 90 | 91 | // Create schemas and start streaming replication into the replica 92 | primary.CreateColumnsOf(object) 93 | replica.CreateColumnsOf(object) 94 | var done sync.WaitGroup 95 | done.Add(1) 96 | go func() { 97 | defer done.Done() // Drained 98 | for change := range writer { 99 | assert.NoError(t, replica.Replay(change)) 100 | } 101 | }() 102 | 103 | // Write some objects 104 | for i := 0; i < inserts; i++ { 105 | primary.Insert(func(r Row) error { 106 | return r.SetMany(object) 107 | }) 108 | } 109 | 110 | work := make(chan async.Task) 111 | pool := async.Consume(context.Background(), 50, work) 112 | defer pool.Cancel() 113 | 114 | // Random concurrent updates 115 | var wg sync.WaitGroup 116 | wg.Add(updates) 117 | for i := 0; i < updates; i++ { 118 | work <- async.NewTask(func(ctx context.Context) (interface{}, error) { 119 | defer wg.Done() 120 | 121 | // Randomly update a column 122 | primary.Query(func(txn *Txn) error { 123 | txn.cursor = uint32(rand.Int31n(int32(inserts - 1))) 124 | switch rand.Int31n(3) { 125 | case 0: 126 | col := txn.Float64("float64") 127 | col.Set(math.Round(rand.Float64()*1000) / 100) 128 | case 1: 129 | col := txn.Int32("int32") 130 | col.Set(rand.Int31n(100000)) 131 | case 2: 132 | col := txn.String("string") 133 | col.Set(fmt.Sprintf("hi %v", rand.Int31n(10))) 134 | } 135 | return nil 136 | }) 137 | 138 | // Randomly delete an item 139 | if rand.Int31n(5) == 0 { 140 | primary.DeleteAt(uint32(rand.Int31n(int32(inserts - 1)))) 141 | } 142 | 143 | // Randomly insert an item 144 | if rand.Int31n(5) == 0 { 145 | primary.Insert(func(r Row) error { 146 | return r.SetMany(object) 147 | }) 148 | } 149 | return nil, nil 150 | }) 151 | } 152 | 153 | // Replay all of the changes into the replica 154 | wg.Wait() 155 | close(writer) 156 | done.Wait() 157 | 158 | // Check if replica and primary are the same 159 | if !assert.Equal(t, primary.Count(), replica.Count(), "replica and primary should be the same size") { 160 | return 161 | } 162 | 163 | /*primary.Query(func(txn *Txn) error { 164 | col1 := txn.Float64("float64") 165 | 166 | return txn.Range(func(idx uint32) { 167 | if v1, ok := col1.Get(idx); ok && v1 != 0 { 168 | replica.SelectAt(idx, func(v Selector) { 169 | assert.Equal(t, v1, v.FloatAt("float64")) 170 | }) 171 | } 172 | }) 173 | })*/ 174 | }) 175 | } 176 | 177 | // --------------------------- Snapshotting ---------------------------- 178 | 179 | func TestSnapshot(t *testing.T) { 180 | amount := 50000 181 | buffer := bytes.NewBuffer(nil) 182 | input := loadPlayers(amount) 183 | 184 | var wg sync.WaitGroup 185 | wg.Add(amount) 186 | go func() { 187 | for i := 0; i < amount; i++ { 188 | assert.NoError(t, input.QueryAt(uint32(i), func(r Row) error { 189 | r.SetString("name", "Roman") 190 | return nil 191 | })) 192 | wg.Done() 193 | } 194 | }() 195 | 196 | // Start snapshotting 197 | assert.NoError(t, input.Snapshot(buffer)) 198 | assert.NotZero(t, buffer.Len()) 199 | 200 | // Restore the snapshot 201 | wg.Wait() 202 | output := newEmpty(amount) 203 | assert.NoError(t, output.Restore(buffer)) 204 | assert.Equal(t, amount, output.Count()) 205 | } 206 | 207 | func TestLargeSnapshot(t *testing.T) { 208 | const amount = 3_000_000 209 | 210 | encoded, err := os.ReadFile("fixtures/3million.bin.s2") 211 | assert.NoError(t, err) 212 | input, err := s2.Decode(nil, encoded) 213 | assert.NoError(t, err) 214 | 215 | // Restore the snapshot 216 | output := newEmpty(amount) 217 | assert.NoError(t, output.Restore(bytes.NewBuffer(input))) 218 | assert.Equal(t, amount, output.Count()) 219 | } 220 | 221 | func TestSnapshotFailures(t *testing.T) { 222 | input := NewCollection() 223 | input.CreateColumn("name", ForString()) 224 | input.Insert(func(r Row) error { 225 | r.SetString("name", "Roman") 226 | return nil 227 | }) 228 | 229 | go input.Insert(func(r Row) error { 230 | r.SetString("name", "Roman") 231 | return nil 232 | }) 233 | 234 | for size := 0; size < 80; size++ { 235 | output := &limitWriter{Limit: size} 236 | 237 | assert.Error(t, input.Snapshot(output), 238 | fmt.Sprintf("write failure size=%d", size)) 239 | } 240 | } 241 | 242 | func TestRestoreIncomplete(t *testing.T) { 243 | buffer := bytes.NewBuffer(nil) 244 | output := newEmpty(500) 245 | assert.Error(t, output.Restore(buffer)) 246 | } 247 | 248 | func TestSnapshotFailedAppendCommit(t *testing.T) { 249 | input := NewCollection() 250 | input.CreateColumn("name", ForString()) 251 | input.record = commit.Open(&limitWriter{Limit: 0}) 252 | _, err := input.Insert(func(r Row) error { 253 | r.SetString("name", "Roman") 254 | return nil 255 | }) 256 | assert.NoError(t, err) 257 | } 258 | 259 | func TestSnapshotDoubleApply(t *testing.T) { 260 | amount := 500 261 | input := loadPlayers(amount) 262 | var startVal int 263 | 264 | // Op 1 265 | input.QueryAt(0, func(r Row) error { 266 | age, _ := r.Int("age") 267 | startVal = age 268 | 269 | r.MergeInt("age", 1) 270 | return nil 271 | }) 272 | 273 | // Save snapshot with Op 1 274 | buffer := bytes.NewBuffer(nil) 275 | assert.NoError(t, input.Snapshot(buffer)) 276 | 277 | // Op 2 278 | input.QueryAt(0, func(r Row) error { 279 | r.MergeInt("age", 1) 280 | return nil 281 | }) 282 | 283 | // Save snapshot with Op 2 284 | buffer2 := bytes.NewBuffer(nil) 285 | assert.NoError(t, input.Snapshot(buffer2)) 286 | 287 | // Apply Snapshot 1, check for op 1 288 | output := newEmpty(amount) 289 | assert.NoError(t, output.Restore(buffer)) 290 | output.QueryAt(0, func(r Row) error { 291 | age, _ := r.Int("age") 292 | assert.Equal(t, startVal+1, age) 293 | return nil 294 | }) 295 | 296 | // Apply Snapshot 2, check for op 2 297 | // Verify that only second delete is applied, not both 298 | assert.NoError(t, output.Restore(buffer2)) 299 | output.QueryAt(0, func(r Row) error { 300 | age, _ := r.Int("age") 301 | assert.Equal(t, startVal+2, age) 302 | return nil 303 | }) 304 | } 305 | 306 | // --------------------------- State Codec ---------------------------- 307 | 308 | func TestWriteTo(t *testing.T) { 309 | input := NewCollection() 310 | input.CreateColumn("name", ForEnum()) 311 | for i := 0; i < 2e4; i++ { 312 | input.Insert(func(r Row) error { 313 | r.SetEnum("name", "Roman") 314 | return nil 315 | }) 316 | } 317 | 318 | // Write a snapshot into a buffer 319 | buffer := bytes.NewBuffer(nil) 320 | n, err := input.writeState(buffer) 321 | assert.NotZero(t, n) 322 | assert.NoError(t, err) 323 | 324 | // Restore the collection from the snapshot 325 | output := NewCollection() 326 | output.CreateColumn("name", ForEnum()) 327 | m, err := output.readState(buffer) 328 | assert.NotEmpty(t, m) 329 | assert.NoError(t, err) 330 | assert.Equal(t, input.Count(), output.Count()) 331 | 332 | assert.NoError(t, output.QueryAt(0, func(r Row) error { 333 | name, _ := r.Enum("name") 334 | assert.Equal(t, "Roman", name) 335 | return nil 336 | })) 337 | } 338 | 339 | func TestCollectionCodec(t *testing.T) { 340 | input := loadPlayers(5e4) 341 | 342 | // Write a snapshot into a buffer 343 | buffer := bytes.NewBuffer(nil) 344 | n, err := input.writeState(buffer) 345 | assert.NotZero(t, n) 346 | assert.NoError(t, err) 347 | 348 | // Restore the collection from the snapshot 349 | output := newEmpty(5e4) 350 | m, err := output.readState(buffer) 351 | assert.NotEmpty(t, m) 352 | assert.NoError(t, err) 353 | assert.Equal(t, input.Count(), output.Count()) 354 | } 355 | 356 | func TestWriteToSizeUncompresed(t *testing.T) { 357 | input := loadPlayers(1e4) // 10K 358 | output := bytes.NewBuffer(nil) 359 | _, err := input.writeState(output) 360 | assert.NoError(t, err) 361 | assert.NotZero(t, output.Len()) 362 | } 363 | 364 | func TestWriteToFailures(t *testing.T) { 365 | input := NewCollection() 366 | input.CreateColumn("name", ForString()) 367 | input.Insert(func(r Row) error { 368 | r.SetString("name", "Roman") 369 | return nil 370 | }) 371 | 372 | for size := 0; size < 69; size++ { 373 | output := &limitWriter{Limit: size} 374 | _, err := input.writeState(output) 375 | assert.Error(t, err, fmt.Sprintf("write failure size=%d", size)) 376 | } 377 | } 378 | 379 | func TestWriteEmpty(t *testing.T) { 380 | buffer := bytes.NewBuffer(nil) 381 | 382 | { // Write the collection 383 | input := NewCollection() 384 | input.CreateColumn("name", ForString()) 385 | _, err := input.writeState(buffer) 386 | assert.NoError(t, err) 387 | } 388 | 389 | { // Read the collection back 390 | output := NewCollection() 391 | output.CreateColumn("name", ForString()) 392 | _, err := output.readState(buffer) 393 | assert.NoError(t, err) 394 | assert.Equal(t, 0, output.Count()) 395 | } 396 | } 397 | 398 | func TestReadFromFailures(t *testing.T) { 399 | input := NewCollection() 400 | input.CreateColumn("name", ForString()) 401 | input.Insert(func(r Row) error { 402 | r.SetString("name", "Roman") 403 | return nil 404 | }) 405 | 406 | buffer := bytes.NewBuffer(nil) 407 | _, err := input.writeState(buffer) 408 | assert.NoError(t, err) 409 | 410 | for size := 0; size < buffer.Len()-1; size++ { 411 | output := NewCollection() 412 | 413 | output.CreateColumn("name", ForString()) 414 | _, err := output.readState(bytes.NewReader(buffer.Bytes()[:size])) 415 | assert.Error(t, err, fmt.Sprintf("read size %v", size)) 416 | } 417 | } 418 | 419 | // --------------------------- Mocks & Fixtures ---------------------------- 420 | 421 | // noopWriter is a writer that simply counts the commits 422 | type noopWriter struct { 423 | commits uint64 424 | } 425 | 426 | // Write clones the commit and writes it into the writer 427 | func (w *noopWriter) Append(commit commit.Commit) error { 428 | atomic.AddUint64(&w.commits, 1) 429 | return nil 430 | } 431 | 432 | // limitWriter is a io.Writer that allows for limiting input 433 | type limitWriter struct { 434 | value uint32 435 | Limit int 436 | } 437 | 438 | // Write returns either an error or no error, depending on whether the limit is reached 439 | func (w *limitWriter) Write(p []byte) (int, error) { 440 | if n := atomic.AddUint32(&w.value, uint32(len(p))); int(n) > w.Limit { 441 | return 0, io.ErrShortBuffer 442 | } 443 | return len(p), nil 444 | } 445 | 446 | func (w *limitWriter) Read(p []byte) (int, error) { 447 | return 0, nil 448 | } 449 | -------------------------------------------------------------------------------- /column_numbers.go: -------------------------------------------------------------------------------- 1 | // This code was generated, DO NOT EDIT. 2 | // Any changes will be lost if this file is regenerated. 3 | 4 | package column 5 | 6 | import ( 7 | "github.com/kelindar/bitmap" 8 | "github.com/kelindar/column/commit" 9 | ) 10 | 11 | 12 | // --------------------------- Int ---------------------------- 13 | 14 | // makeInts creates a new vector for ints 15 | func makeInts(opts ...func(*option[int])) Column { 16 | return makeNumeric( 17 | func(buffer *commit.Buffer, idx uint32, value int) { buffer.PutInt(commit.Put, idx, value) }, 18 | func(r *commit.Reader, fill bitmap.Bitmap, data []int, opts option[int]) { 19 | for r.Next() { 20 | offset := r.IndexAtChunk() 21 | switch r.Type { 22 | case commit.Put: 23 | fill[offset>>6] |= 1 << (offset & 0x3f) 24 | data[offset] = r.Int() 25 | case commit.Merge: 26 | fill[offset>>6] |= 1 << (offset & 0x3f) 27 | data[offset] = r.SwapInt(opts.Merge(data[offset], r.Int())) 28 | case commit.Delete: 29 | fill.Remove(offset) 30 | } 31 | } 32 | }, opts, 33 | ) 34 | } 35 | 36 | // rwInt represents a read-write cursor for int 37 | type rwInt struct { 38 | rdNumber[int] 39 | writer *commit.Buffer 40 | } 41 | 42 | // Set sets the value at the current transaction cursor 43 | func (s rwInt) Set(value int) { 44 | s.writer.PutInt(commit.Put, s.txn.cursor, value) 45 | } 46 | 47 | // Merge atomically merges a delta to the value at the current transaction cursor 48 | func (s rwInt) Merge(delta int) { 49 | s.writer.PutInt(commit.Merge, s.txn.cursor, delta) 50 | } 51 | 52 | // Int returns a read-write accessor for int column 53 | func (txn *Txn) Int(columnName string) rwInt { 54 | return rwInt{ 55 | rdNumber: readNumberOf[int](txn, columnName), 56 | writer: txn.bufferFor(columnName), 57 | } 58 | } 59 | 60 | 61 | // --------------------------- Int16 ---------------------------- 62 | 63 | // makeInt16s creates a new vector for int16s 64 | func makeInt16s(opts ...func(*option[int16])) Column { 65 | return makeNumeric( 66 | func(buffer *commit.Buffer, idx uint32, value int16) { buffer.PutInt16(commit.Put, idx, value) }, 67 | func(r *commit.Reader, fill bitmap.Bitmap, data []int16, opts option[int16]) { 68 | for r.Next() { 69 | offset := r.IndexAtChunk() 70 | switch r.Type { 71 | case commit.Put: 72 | fill[offset>>6] |= 1 << (offset & 0x3f) 73 | data[offset] = r.Int16() 74 | case commit.Merge: 75 | fill[offset>>6] |= 1 << (offset & 0x3f) 76 | data[offset] = r.SwapInt16(opts.Merge(data[offset], r.Int16())) 77 | case commit.Delete: 78 | fill.Remove(offset) 79 | } 80 | } 81 | }, opts, 82 | ) 83 | } 84 | 85 | // rwInt16 represents a read-write cursor for int16 86 | type rwInt16 struct { 87 | rdNumber[int16] 88 | writer *commit.Buffer 89 | } 90 | 91 | // Set sets the value at the current transaction cursor 92 | func (s rwInt16) Set(value int16) { 93 | s.writer.PutInt16(commit.Put, s.txn.cursor, value) 94 | } 95 | 96 | // Merge atomically merges a delta to the value at the current transaction cursor 97 | func (s rwInt16) Merge(delta int16) { 98 | s.writer.PutInt16(commit.Merge, s.txn.cursor, delta) 99 | } 100 | 101 | // Int16 returns a read-write accessor for int16 column 102 | func (txn *Txn) Int16(columnName string) rwInt16 { 103 | return rwInt16{ 104 | rdNumber: readNumberOf[int16](txn, columnName), 105 | writer: txn.bufferFor(columnName), 106 | } 107 | } 108 | 109 | 110 | // --------------------------- Int32 ---------------------------- 111 | 112 | // makeInt32s creates a new vector for int32s 113 | func makeInt32s(opts ...func(*option[int32])) Column { 114 | return makeNumeric( 115 | func(buffer *commit.Buffer, idx uint32, value int32) { buffer.PutInt32(commit.Put, idx, value) }, 116 | func(r *commit.Reader, fill bitmap.Bitmap, data []int32, opts option[int32]) { 117 | for r.Next() { 118 | offset := r.IndexAtChunk() 119 | switch r.Type { 120 | case commit.Put: 121 | fill[offset>>6] |= 1 << (offset & 0x3f) 122 | data[offset] = r.Int32() 123 | case commit.Merge: 124 | fill[offset>>6] |= 1 << (offset & 0x3f) 125 | data[offset] = r.SwapInt32(opts.Merge(data[offset], r.Int32())) 126 | case commit.Delete: 127 | fill.Remove(offset) 128 | } 129 | } 130 | }, opts, 131 | ) 132 | } 133 | 134 | // rwInt32 represents a read-write cursor for int32 135 | type rwInt32 struct { 136 | rdNumber[int32] 137 | writer *commit.Buffer 138 | } 139 | 140 | // Set sets the value at the current transaction cursor 141 | func (s rwInt32) Set(value int32) { 142 | s.writer.PutInt32(commit.Put, s.txn.cursor, value) 143 | } 144 | 145 | // Merge atomically merges a delta to the value at the current transaction cursor 146 | func (s rwInt32) Merge(delta int32) { 147 | s.writer.PutInt32(commit.Merge, s.txn.cursor, delta) 148 | } 149 | 150 | // Int32 returns a read-write accessor for int32 column 151 | func (txn *Txn) Int32(columnName string) rwInt32 { 152 | return rwInt32{ 153 | rdNumber: readNumberOf[int32](txn, columnName), 154 | writer: txn.bufferFor(columnName), 155 | } 156 | } 157 | 158 | 159 | // --------------------------- Int64 ---------------------------- 160 | 161 | // makeInt64s creates a new vector for int64s 162 | func makeInt64s(opts ...func(*option[int64])) Column { 163 | return makeNumeric( 164 | func(buffer *commit.Buffer, idx uint32, value int64) { buffer.PutInt64(commit.Put, idx, value) }, 165 | func(r *commit.Reader, fill bitmap.Bitmap, data []int64, opts option[int64]) { 166 | for r.Next() { 167 | offset := r.IndexAtChunk() 168 | switch r.Type { 169 | case commit.Put: 170 | fill[offset>>6] |= 1 << (offset & 0x3f) 171 | data[offset] = r.Int64() 172 | case commit.Merge: 173 | fill[offset>>6] |= 1 << (offset & 0x3f) 174 | data[offset] = r.SwapInt64(opts.Merge(data[offset], r.Int64())) 175 | case commit.Delete: 176 | fill.Remove(offset) 177 | } 178 | } 179 | }, opts, 180 | ) 181 | } 182 | 183 | // rwInt64 represents a read-write cursor for int64 184 | type rwInt64 struct { 185 | rdNumber[int64] 186 | writer *commit.Buffer 187 | } 188 | 189 | // Set sets the value at the current transaction cursor 190 | func (s rwInt64) Set(value int64) { 191 | s.writer.PutInt64(commit.Put, s.txn.cursor, value) 192 | } 193 | 194 | // Merge atomically merges a delta to the value at the current transaction cursor 195 | func (s rwInt64) Merge(delta int64) { 196 | s.writer.PutInt64(commit.Merge, s.txn.cursor, delta) 197 | } 198 | 199 | // Int64 returns a read-write accessor for int64 column 200 | func (txn *Txn) Int64(columnName string) rwInt64 { 201 | return rwInt64{ 202 | rdNumber: readNumberOf[int64](txn, columnName), 203 | writer: txn.bufferFor(columnName), 204 | } 205 | } 206 | 207 | 208 | // --------------------------- Uint ---------------------------- 209 | 210 | // makeUints creates a new vector for uints 211 | func makeUints(opts ...func(*option[uint])) Column { 212 | return makeNumeric( 213 | func(buffer *commit.Buffer, idx uint32, value uint) { buffer.PutUint(commit.Put, idx, value) }, 214 | func(r *commit.Reader, fill bitmap.Bitmap, data []uint, opts option[uint]) { 215 | for r.Next() { 216 | offset := r.IndexAtChunk() 217 | switch r.Type { 218 | case commit.Put: 219 | fill[offset>>6] |= 1 << (offset & 0x3f) 220 | data[offset] = r.Uint() 221 | case commit.Merge: 222 | fill[offset>>6] |= 1 << (offset & 0x3f) 223 | data[offset] = r.SwapUint(opts.Merge(data[offset], r.Uint())) 224 | case commit.Delete: 225 | fill.Remove(offset) 226 | } 227 | } 228 | }, opts, 229 | ) 230 | } 231 | 232 | // rwUint represents a read-write cursor for uint 233 | type rwUint struct { 234 | rdNumber[uint] 235 | writer *commit.Buffer 236 | } 237 | 238 | // Set sets the value at the current transaction cursor 239 | func (s rwUint) Set(value uint) { 240 | s.writer.PutUint(commit.Put, s.txn.cursor, value) 241 | } 242 | 243 | // Merge atomically merges a delta to the value at the current transaction cursor 244 | func (s rwUint) Merge(delta uint) { 245 | s.writer.PutUint(commit.Merge, s.txn.cursor, delta) 246 | } 247 | 248 | // Uint returns a read-write accessor for uint column 249 | func (txn *Txn) Uint(columnName string) rwUint { 250 | return rwUint{ 251 | rdNumber: readNumberOf[uint](txn, columnName), 252 | writer: txn.bufferFor(columnName), 253 | } 254 | } 255 | 256 | 257 | // --------------------------- Uint16 ---------------------------- 258 | 259 | // makeUint16s creates a new vector for uint16s 260 | func makeUint16s(opts ...func(*option[uint16])) Column { 261 | return makeNumeric( 262 | func(buffer *commit.Buffer, idx uint32, value uint16) { buffer.PutUint16(commit.Put, idx, value) }, 263 | func(r *commit.Reader, fill bitmap.Bitmap, data []uint16, opts option[uint16]) { 264 | for r.Next() { 265 | offset := r.IndexAtChunk() 266 | switch r.Type { 267 | case commit.Put: 268 | fill[offset>>6] |= 1 << (offset & 0x3f) 269 | data[offset] = r.Uint16() 270 | case commit.Merge: 271 | fill[offset>>6] |= 1 << (offset & 0x3f) 272 | data[offset] = r.SwapUint16(opts.Merge(data[offset], r.Uint16())) 273 | case commit.Delete: 274 | fill.Remove(offset) 275 | } 276 | } 277 | }, opts, 278 | ) 279 | } 280 | 281 | // rwUint16 represents a read-write cursor for uint16 282 | type rwUint16 struct { 283 | rdNumber[uint16] 284 | writer *commit.Buffer 285 | } 286 | 287 | // Set sets the value at the current transaction cursor 288 | func (s rwUint16) Set(value uint16) { 289 | s.writer.PutUint16(commit.Put, s.txn.cursor, value) 290 | } 291 | 292 | // Merge atomically merges a delta to the value at the current transaction cursor 293 | func (s rwUint16) Merge(delta uint16) { 294 | s.writer.PutUint16(commit.Merge, s.txn.cursor, delta) 295 | } 296 | 297 | // Uint16 returns a read-write accessor for uint16 column 298 | func (txn *Txn) Uint16(columnName string) rwUint16 { 299 | return rwUint16{ 300 | rdNumber: readNumberOf[uint16](txn, columnName), 301 | writer: txn.bufferFor(columnName), 302 | } 303 | } 304 | 305 | 306 | // --------------------------- Uint32 ---------------------------- 307 | 308 | // makeUint32s creates a new vector for uint32s 309 | func makeUint32s(opts ...func(*option[uint32])) Column { 310 | return makeNumeric( 311 | func(buffer *commit.Buffer, idx uint32, value uint32) { buffer.PutUint32(commit.Put, idx, value) }, 312 | func(r *commit.Reader, fill bitmap.Bitmap, data []uint32, opts option[uint32]) { 313 | for r.Next() { 314 | offset := r.IndexAtChunk() 315 | switch r.Type { 316 | case commit.Put: 317 | fill[offset>>6] |= 1 << (offset & 0x3f) 318 | data[offset] = r.Uint32() 319 | case commit.Merge: 320 | fill[offset>>6] |= 1 << (offset & 0x3f) 321 | data[offset] = r.SwapUint32(opts.Merge(data[offset], r.Uint32())) 322 | case commit.Delete: 323 | fill.Remove(offset) 324 | } 325 | } 326 | }, opts, 327 | ) 328 | } 329 | 330 | // rwUint32 represents a read-write cursor for uint32 331 | type rwUint32 struct { 332 | rdNumber[uint32] 333 | writer *commit.Buffer 334 | } 335 | 336 | // Set sets the value at the current transaction cursor 337 | func (s rwUint32) Set(value uint32) { 338 | s.writer.PutUint32(commit.Put, s.txn.cursor, value) 339 | } 340 | 341 | // Merge atomically merges a delta to the value at the current transaction cursor 342 | func (s rwUint32) Merge(delta uint32) { 343 | s.writer.PutUint32(commit.Merge, s.txn.cursor, delta) 344 | } 345 | 346 | // Uint32 returns a read-write accessor for uint32 column 347 | func (txn *Txn) Uint32(columnName string) rwUint32 { 348 | return rwUint32{ 349 | rdNumber: readNumberOf[uint32](txn, columnName), 350 | writer: txn.bufferFor(columnName), 351 | } 352 | } 353 | 354 | 355 | // --------------------------- Uint64 ---------------------------- 356 | 357 | // makeUint64s creates a new vector for uint64s 358 | func makeUint64s(opts ...func(*option[uint64])) Column { 359 | return makeNumeric( 360 | func(buffer *commit.Buffer, idx uint32, value uint64) { buffer.PutUint64(commit.Put, idx, value) }, 361 | func(r *commit.Reader, fill bitmap.Bitmap, data []uint64, opts option[uint64]) { 362 | for r.Next() { 363 | offset := r.IndexAtChunk() 364 | switch r.Type { 365 | case commit.Put: 366 | fill[offset>>6] |= 1 << (offset & 0x3f) 367 | data[offset] = r.Uint64() 368 | case commit.Merge: 369 | fill[offset>>6] |= 1 << (offset & 0x3f) 370 | data[offset] = r.SwapUint64(opts.Merge(data[offset], r.Uint64())) 371 | case commit.Delete: 372 | fill.Remove(offset) 373 | } 374 | } 375 | }, opts, 376 | ) 377 | } 378 | 379 | // rwUint64 represents a read-write cursor for uint64 380 | type rwUint64 struct { 381 | rdNumber[uint64] 382 | writer *commit.Buffer 383 | } 384 | 385 | // Set sets the value at the current transaction cursor 386 | func (s rwUint64) Set(value uint64) { 387 | s.writer.PutUint64(commit.Put, s.txn.cursor, value) 388 | } 389 | 390 | // Merge atomically merges a delta to the value at the current transaction cursor 391 | func (s rwUint64) Merge(delta uint64) { 392 | s.writer.PutUint64(commit.Merge, s.txn.cursor, delta) 393 | } 394 | 395 | // Uint64 returns a read-write accessor for uint64 column 396 | func (txn *Txn) Uint64(columnName string) rwUint64 { 397 | return rwUint64{ 398 | rdNumber: readNumberOf[uint64](txn, columnName), 399 | writer: txn.bufferFor(columnName), 400 | } 401 | } 402 | 403 | 404 | // --------------------------- Float32 ---------------------------- 405 | 406 | // makeFloat32s creates a new vector for float32s 407 | func makeFloat32s(opts ...func(*option[float32])) Column { 408 | return makeNumeric( 409 | func(buffer *commit.Buffer, idx uint32, value float32) { buffer.PutFloat32(commit.Put, idx, value) }, 410 | func(r *commit.Reader, fill bitmap.Bitmap, data []float32, opts option[float32]) { 411 | for r.Next() { 412 | offset := r.IndexAtChunk() 413 | switch r.Type { 414 | case commit.Put: 415 | fill[offset>>6] |= 1 << (offset & 0x3f) 416 | data[offset] = r.Float32() 417 | case commit.Merge: 418 | fill[offset>>6] |= 1 << (offset & 0x3f) 419 | data[offset] = r.SwapFloat32(opts.Merge(data[offset], r.Float32())) 420 | case commit.Delete: 421 | fill.Remove(offset) 422 | } 423 | } 424 | }, opts, 425 | ) 426 | } 427 | 428 | // rwFloat32 represents a read-write cursor for float32 429 | type rwFloat32 struct { 430 | rdNumber[float32] 431 | writer *commit.Buffer 432 | } 433 | 434 | // Set sets the value at the current transaction cursor 435 | func (s rwFloat32) Set(value float32) { 436 | s.writer.PutFloat32(commit.Put, s.txn.cursor, value) 437 | } 438 | 439 | // Merge atomically merges a delta to the value at the current transaction cursor 440 | func (s rwFloat32) Merge(delta float32) { 441 | s.writer.PutFloat32(commit.Merge, s.txn.cursor, delta) 442 | } 443 | 444 | // Float32 returns a read-write accessor for float32 column 445 | func (txn *Txn) Float32(columnName string) rwFloat32 { 446 | return rwFloat32{ 447 | rdNumber: readNumberOf[float32](txn, columnName), 448 | writer: txn.bufferFor(columnName), 449 | } 450 | } 451 | 452 | 453 | // --------------------------- Float64 ---------------------------- 454 | 455 | // makeFloat64s creates a new vector for float64s 456 | func makeFloat64s(opts ...func(*option[float64])) Column { 457 | return makeNumeric( 458 | func(buffer *commit.Buffer, idx uint32, value float64) { buffer.PutFloat64(commit.Put, idx, value) }, 459 | func(r *commit.Reader, fill bitmap.Bitmap, data []float64, opts option[float64]) { 460 | for r.Next() { 461 | offset := r.IndexAtChunk() 462 | switch r.Type { 463 | case commit.Put: 464 | fill[offset>>6] |= 1 << (offset & 0x3f) 465 | data[offset] = r.Float64() 466 | case commit.Merge: 467 | fill[offset>>6] |= 1 << (offset & 0x3f) 468 | data[offset] = r.SwapFloat64(opts.Merge(data[offset], r.Float64())) 469 | case commit.Delete: 470 | fill.Remove(offset) 471 | } 472 | } 473 | }, opts, 474 | ) 475 | } 476 | 477 | // rwFloat64 represents a read-write cursor for float64 478 | type rwFloat64 struct { 479 | rdNumber[float64] 480 | writer *commit.Buffer 481 | } 482 | 483 | // Set sets the value at the current transaction cursor 484 | func (s rwFloat64) Set(value float64) { 485 | s.writer.PutFloat64(commit.Put, s.txn.cursor, value) 486 | } 487 | 488 | // Merge atomically merges a delta to the value at the current transaction cursor 489 | func (s rwFloat64) Merge(delta float64) { 490 | s.writer.PutFloat64(commit.Merge, s.txn.cursor, delta) 491 | } 492 | 493 | // Float64 returns a read-write accessor for float64 column 494 | func (txn *Txn) Float64(columnName string) rwFloat64 { 495 | return rwFloat64{ 496 | rdNumber: readNumberOf[float64](txn, columnName), 497 | writer: txn.bufferFor(columnName), 498 | } 499 | } 500 | 501 | --------------------------------------------------------------------------------