├── LICENSE ├── README.md ├── catena.go ├── compact.go ├── db.go ├── db_test.go ├── insert.go ├── iterator.go ├── iterator_test.go ├── partition ├── disk │ ├── extent.go │ ├── iterator.go │ └── partition.go ├── interface.go ├── memory │ ├── compact.go │ ├── insertion.go │ ├── iterator.go │ └── partition.go └── types.go ├── partition_list.go ├── partition_test.go └── wal ├── file_wal.go └── wal.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Preetam Jinka 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | Catena may import packages with separate licenses. 30 | 31 | Go: 32 | Copyright (c) 2012 The Go Authors. All rights reserved. 33 | Redistribution and use in source and binary forms, with or without 34 | modification, are permitted provided that the following conditions are 35 | met: 36 | * Redistributions of source code must retain the above copyright 37 | notice, this list of conditions and the following disclaimer. 38 | * Redistributions in binary form must reproduce the above 39 | copyright notice, this list of conditions and the following disclaimer 40 | in the documentation and/or other materials provided with the 41 | distribution. 42 | * Neither the name of Google Inc. nor the names of its 43 | contributors may be used to endorse or promote products derived from 44 | this software without specific prior written permission. 45 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 46 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 47 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 48 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 49 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 50 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 51 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 52 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 53 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 54 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 55 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 | 57 | github.com/youtube/vitess/go/cgzip: 58 | Copyright 2012, Google Inc. 59 | All rights reserved. 60 | 61 | Redistribution and use in source and binary forms, with or without 62 | modification, are permitted provided that the following conditions are 63 | met: 64 | 65 | * Redistributions of source code must retain the above copyright 66 | notice, this list of conditions and the following disclaimer. 67 | * Redistributions in binary form must reproduce the above 68 | copyright notice, this list of conditions and the following disclaimer 69 | in the documentation and/or other materials provided with the 70 | distribution. 71 | * Neither the name of Google Inc. nor the names of its 72 | contributors may be used to endorse or promote products derived from 73 | this software without specific prior written permission. 74 | 75 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 76 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 77 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 78 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 79 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 80 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 81 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 82 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 83 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 84 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 85 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | catena [![Circle CI](https://circleci.com/gh/Cistern/catena.svg?style=svg&circle-token=9bf7ad5d56d1a24e8bb2a060d5cad6d63576d6e8)](https://circleci.com/gh/Cistern/catena) [![GoDoc](https://godoc.org/github.com/Cistern/catena?status.svg)](https://godoc.org/github.com/Cistern/catena) [![BSD License](https://img.shields.io/pypi/l/Django.svg)](https://github.com/Cistern/catena/blob/master/LICENSE) 2 | === 3 | > *n.* A closely linked series. 4 | 5 | Catena is a storage engine for time series data. 6 | 7 | Documentation 8 | --- 9 | Extensive documentation is not available yet, but there are a few resources you can use to learn more: 10 | 11 | 1. Original blog post introduction Catena: http://misfra.me/state-of-the-state-part-iii 12 | 2. Time series storage slide deck: https://speakerdeck.com/preetamjinka/time-series-storage 13 | 14 | Disclaimer 15 | --- 16 | Catena is just a prototype at the moment. **It's not ready for you to use.** 17 | 18 | License 19 | --- 20 | BSD (see LICENSE) 21 | -------------------------------------------------------------------------------- /catena.go: -------------------------------------------------------------------------------- 1 | // Package catena provides a time series storage engine. 2 | package catena 3 | 4 | import ( 5 | "github.com/Cistern/catena/partition" 6 | "github.com/Cistern/catena/partition/disk" 7 | "github.com/Cistern/catena/partition/memory" 8 | ) 9 | 10 | // A Point is a single observation of a time series metric. It 11 | // has a timestamp and a value. 12 | type Point struct { 13 | Timestamp int64 `json:"timestamp"` 14 | Value float64 `json:"value"` 15 | } 16 | 17 | // A Row is a Point with Source and Metric fields. 18 | type Row struct { 19 | Source string `json:"source"` 20 | Metric string `json:"metric"` 21 | Point 22 | } 23 | 24 | // Making sure there are no import cycles 25 | var _ partition.Partition = &disk.DiskPartition{} 26 | var _ partition.Partition = &memory.MemoryPartition{} 27 | var _ partition.Iterator 28 | -------------------------------------------------------------------------------- /compact.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | "sync/atomic" 7 | 8 | "github.com/Cistern/catena/partition" 9 | "github.com/Cistern/catena/partition/disk" 10 | "github.com/Cistern/catena/partition/memory" 11 | ) 12 | 13 | // compact drops old partitions and compacts older memory to 14 | // read-only disk partitions. 15 | func (db *DB) compact() { 16 | 17 | // Look for partitions to drop 18 | i := db.partitionList.NewIterator() 19 | seen := 0 20 | lastMin := int64(0) 21 | for i.Next() { 22 | p, err := i.Value() 23 | if err != nil { 24 | break 25 | } 26 | 27 | seen++ 28 | if seen <= db.maxPartitions { 29 | lastMin = p.MinTimestamp() 30 | continue 31 | } 32 | 33 | atomic.SwapInt64(&db.minTimestamp, lastMin) 34 | 35 | // Remove it from the list 36 | db.partitionList.Remove(p) 37 | 38 | // Make sure we're the only ones accessing the partition 39 | p.ExclusiveHold() 40 | p.Destroy() 41 | p.ExclusiveRelease() 42 | } 43 | 44 | // Find partitions to compact 45 | toCompact := []partition.Partition{} 46 | 47 | seen = 0 48 | i = db.partitionList.NewIterator() 49 | for i.Next() { 50 | seen++ 51 | if seen <= 2 { 52 | // Skip the latest two in-memory partitions 53 | continue 54 | } 55 | 56 | p, _ := i.Value() 57 | 58 | p.Hold() 59 | if !p.ReadOnly() { 60 | p.Release() 61 | p.ExclusiveHold() 62 | p.SetReadOnly() 63 | p.ExclusiveRelease() 64 | 65 | toCompact = append(toCompact, p) 66 | } else { 67 | p.Release() 68 | } 69 | } 70 | 71 | for _, p := range toCompact { 72 | // p is read-only, so no need to lock. 73 | memPart := p.(*memory.MemoryPartition) 74 | 75 | // Create the disk partition file 76 | filename := strings.TrimSuffix(memPart.Filename(), ".wal") + ".part" 77 | f, err := os.Create(filename) 78 | if err != nil { 79 | // ??? 80 | return 81 | } 82 | 83 | // Compact 84 | err = memPart.Compact(f) 85 | if err != nil { 86 | // ??? 87 | return 88 | } 89 | 90 | // Close and reopen. 91 | f.Sync() 92 | f.Close() 93 | 94 | diskPart, err := disk.OpenDiskPartition(filename) 95 | if err != nil { 96 | // ??? 97 | return 98 | } 99 | 100 | // Swap the memory partition with the disk partition. 101 | db.partitionList.Swap(memPart, diskPart) 102 | 103 | memPart.ExclusiveHold() 104 | memPart.Destroy() 105 | memPart.ExclusiveRelease() 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "sort" 9 | "strings" 10 | "sync" 11 | "time" 12 | 13 | "github.com/Cistern/catena/partition" 14 | "github.com/Cistern/catena/partition/disk" 15 | "github.com/Cistern/catena/partition/memory" 16 | "github.com/Cistern/catena/wal" 17 | ) 18 | 19 | // DB is a handle to a catena database. 20 | type DB struct { 21 | baseDir string 22 | 23 | partitionList *partitionList 24 | 25 | lastPartitionID int64 26 | partitionSize int64 27 | maxPartitions int 28 | 29 | minTimestamp int64 30 | maxTimestamp int64 31 | 32 | partitionCreateLock sync.Mutex 33 | } 34 | 35 | // NewDB creates a new DB located in baseDir. If baseDir 36 | // does not exist it will be created. An error is returned 37 | // if baseDir is not empty. 38 | func NewDB(baseDir string, partitionSize, maxPartitions int) (*DB, error) { 39 | err := os.MkdirAll(baseDir, 0755) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | dir, err := os.Open(baseDir) 45 | if err != nil { 46 | return nil, err 47 | } 48 | 49 | defer dir.Close() 50 | 51 | names, err := dir.Readdirnames(-1) 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | if len(names) > 0 { 57 | return nil, errors.New("catena: NewDB called with non-empty directory") 58 | } 59 | 60 | db := &DB{ 61 | baseDir: baseDir, 62 | partitionSize: int64(partitionSize), 63 | maxPartitions: maxPartitions, 64 | partitionList: newPartitionList(), 65 | } 66 | 67 | // Start up the compactor. 68 | // TODO: use a semaphore to only have a single compactor 69 | // at a time. 70 | go func() { 71 | for _ = range time.Tick(500 * time.Millisecond) { 72 | db.compact() 73 | } 74 | }() 75 | 76 | return db, nil 77 | } 78 | 79 | // OpenDB opens a DB located in baseDir. 80 | func OpenDB(baseDir string, partitionSize, maxPartitions int) (*DB, error) { 81 | db := &DB{ 82 | baseDir: baseDir, 83 | partitionSize: int64(partitionSize), 84 | maxPartitions: maxPartitions, 85 | partitionList: newPartitionList(), 86 | } 87 | 88 | dir, err := os.Open(baseDir) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | defer dir.Close() 94 | 95 | dirInfo, err := dir.Stat() 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | if !dirInfo.IsDir() { 101 | return nil, errors.New("catena: baseDir is not a directory") 102 | } 103 | 104 | names, err := dir.Readdirnames(-1) 105 | if err != nil { 106 | return nil, err 107 | } 108 | 109 | err = db.loadPartitions(names) 110 | if err != nil { 111 | return nil, err 112 | } 113 | 114 | go func() { 115 | for _ = range time.Tick(time.Millisecond * 50) { 116 | db.compact() 117 | } 118 | }() 119 | 120 | return db, nil 121 | } 122 | 123 | // Close closes the DB and releases any internal state. 124 | // Close will block if there are active iterators. 125 | func (db *DB) Close() error { 126 | i := db.partitionList.NewIterator() 127 | for i.Next() { 128 | val, _ := i.Value() 129 | db.partitionList.Remove(val) 130 | 131 | val.ExclusiveHold() 132 | val.SetReadOnly() 133 | 134 | err := val.Close() 135 | if err != nil { 136 | val.ExclusiveRelease() 137 | return err 138 | } 139 | 140 | val.ExclusiveRelease() 141 | } 142 | 143 | return nil 144 | } 145 | 146 | // Sources returns a slice of sources that are present within the 147 | // given time range. 148 | func (db *DB) Sources(start, end int64) []string { 149 | sourcesMap := map[string]struct{}{} 150 | 151 | i := db.partitionList.NewIterator() 152 | for i.Next() { 153 | val, _ := i.Value() 154 | 155 | val.Hold() 156 | 157 | if val.MaxTimestamp() > start && val.MinTimestamp() < end { 158 | for _, source := range val.Sources() { 159 | sourcesMap[source] = struct{}{} 160 | } 161 | } 162 | 163 | val.Release() 164 | } 165 | 166 | sources := []string{} 167 | 168 | for source := range sourcesMap { 169 | sources = append(sources, source) 170 | } 171 | 172 | return sources 173 | } 174 | 175 | // Metrics returns a slice of metrics that are present within the 176 | // given time range for the given source. 177 | func (db *DB) Metrics(source string, start, end int64) []string { 178 | metricsMap := map[string]struct{}{} 179 | 180 | i := db.partitionList.NewIterator() 181 | for i.Next() { 182 | val, _ := i.Value() 183 | 184 | val.Hold() 185 | 186 | if val.MaxTimestamp() > start && val.MinTimestamp() < end { 187 | 188 | for _, metric := range val.Metrics(source) { 189 | metricsMap[metric] = struct{}{} 190 | } 191 | } 192 | 193 | val.Release() 194 | } 195 | 196 | metrics := []string{} 197 | 198 | for metric := range metricsMap { 199 | metrics = append(metrics, metric) 200 | } 201 | 202 | return metrics 203 | } 204 | 205 | // loadPartitions reads a slice of partition file names 206 | // and updates the internal partition state. 207 | func (db *DB) loadPartitions(names []string) error { 208 | 209 | // Slice of partition IDs 210 | partitions := []int{} 211 | 212 | isWAL := map[int]bool{} 213 | 214 | for _, name := range names { 215 | partitionNum := -1 216 | 217 | wal := false 218 | 219 | if strings.HasSuffix(name, ".wal") { 220 | _, err := fmt.Sscanf(name, "%d.wal", &partitionNum) 221 | if err != nil { 222 | return err 223 | } 224 | 225 | wal = true 226 | } 227 | 228 | if strings.HasSuffix(name, ".part") { 229 | _, err := fmt.Sscanf(name, "%d.part", &partitionNum) 230 | if err != nil { 231 | return err 232 | } 233 | } 234 | 235 | if partitionNum < 0 { 236 | return errors.New(fmt.Sprintf("catena: invalid partition %s", name)) 237 | } 238 | 239 | if seenWAL, seen := isWAL[partitionNum]; seen { 240 | if (seenWAL && !wal) || (!seenWAL && wal) { 241 | // We have both a .wal and a .part, so 242 | // we'll get rid of the .part and recompact. 243 | wal = true 244 | err := os.Remove(filepath.Join(db.baseDir, fmt.Sprintf("%d.part", partitionNum))) 245 | if err != nil { 246 | return err 247 | } 248 | } 249 | } 250 | 251 | isWAL[partitionNum] = wal 252 | } 253 | 254 | for partitionNum := range isWAL { 255 | partitions = append(partitions, partitionNum) 256 | } 257 | 258 | // Sort the partitions in increasing order. 259 | sort.Ints(partitions) 260 | 261 | for _, part := range partitions { 262 | if int64(part) > db.lastPartitionID { 263 | db.lastPartitionID = int64(part) 264 | } 265 | 266 | var p partition.Partition 267 | var err error 268 | var filename string 269 | 270 | if isWAL[part] { 271 | filename = filepath.Join(db.baseDir, 272 | fmt.Sprintf("%d.wal", part)) 273 | 274 | w, err := wal.OpenFileWAL(filename) 275 | if err != nil { 276 | return err 277 | } 278 | 279 | p, err = memory.RecoverMemoryPartition(w) 280 | if err != nil { 281 | return err 282 | } 283 | 284 | } else { 285 | filename = filepath.Join(db.baseDir, 286 | fmt.Sprintf("%d.part", part)) 287 | 288 | p, err = disk.OpenDiskPartition(filename) 289 | if err != nil { 290 | return err 291 | } 292 | } 293 | 294 | // No need for locks here. 295 | 296 | if db.partitionList.Size() == 1 { 297 | db.minTimestamp = p.MinTimestamp() 298 | db.maxTimestamp = p.MaxTimestamp() 299 | } 300 | 301 | if db.minTimestamp > p.MinTimestamp() { 302 | db.minTimestamp = p.MinTimestamp() 303 | } 304 | 305 | if db.maxTimestamp < p.MaxTimestamp() { 306 | db.maxTimestamp = p.MaxTimestamp() 307 | } 308 | 309 | db.partitionList.Insert(p) 310 | } 311 | 312 | return nil 313 | } 314 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "os" 5 | "runtime" 6 | "strconv" 7 | "sync" 8 | "testing" 9 | ) 10 | 11 | func TestDB(t *testing.T) { 12 | os.RemoveAll("/tmp/catena") 13 | 14 | db, err := NewDB("/tmp/catena", 500, 20) 15 | if err != nil { 16 | t.Fatal(err) 17 | } 18 | 19 | ts := int64(0) 20 | 21 | parallelism := 4 22 | runtime.GOMAXPROCS(parallelism) 23 | wg := sync.WaitGroup{} 24 | wg.Add(parallelism) 25 | 26 | work := make(chan []Row, parallelism) 27 | 28 | for i := 0; i < parallelism; i++ { 29 | go func() { 30 | for rows := range work { 31 | err := db.InsertRows(rows) 32 | if err != nil { 33 | wg.Done() 34 | t.Fatal(err) 35 | } 36 | } 37 | 38 | wg.Done() 39 | }() 40 | } 41 | 42 | for n := 0; n < 500; n++ { 43 | 44 | rows := []Row{} 45 | for i := 0; i < 10000; i++ { 46 | rows = append(rows, Row{ 47 | Source: "src", 48 | Metric: "met_" + strconv.Itoa(i), 49 | Point: Point{ 50 | Timestamp: ts, 51 | Value: float64(i), 52 | }, 53 | }) 54 | } 55 | 56 | ts++ 57 | 58 | work <- rows 59 | } 60 | 61 | close(work) 62 | wg.Wait() 63 | 64 | err = db.Close() 65 | if err != nil { 66 | t.Fatal(err) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /insert.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "path/filepath" 7 | "sort" 8 | "sync/atomic" 9 | "unsafe" 10 | 11 | "github.com/Cistern/catena/partition" 12 | "github.com/Cistern/catena/partition/memory" 13 | "github.com/Cistern/catena/wal" 14 | ) 15 | 16 | // InsertRows inserts the given rows into the database. 17 | func (db *DB) InsertRows(rows []Row) error { 18 | keyToRows := map[int][]Row{} 19 | 20 | for _, row := range rows { 21 | key := int(row.Timestamp / db.partitionSize) 22 | keyToRows[key] = append(keyToRows[key], row) 23 | } 24 | 25 | keys := []int{} 26 | for key := range keyToRows { 27 | keys = append(keys, key) 28 | } 29 | sort.Ints(keys) 30 | 31 | for _, key := range keys { 32 | rowsForKey := keyToRows[key] 33 | 34 | minTimestampInRows := int64(0) 35 | maxTimestampInRows := int64(0) 36 | 37 | for i, row := range rowsForKey { 38 | if i == 1 { 39 | minTimestampInRows = row.Timestamp 40 | maxTimestampInRows = row.Timestamp 41 | } 42 | 43 | if row.Timestamp > maxTimestampInRows { 44 | maxTimestampInRows = row.Timestamp 45 | } 46 | 47 | if row.Timestamp < minTimestampInRows { 48 | minTimestampInRows = row.Timestamp 49 | } 50 | } 51 | 52 | var p partition.Partition 53 | 54 | FIND_PARTITION: 55 | 56 | i := db.partitionList.NewIterator() 57 | for i.Next() { 58 | val, _ := i.Value() 59 | val.Hold() 60 | 61 | if val.MinTimestamp()/db.partitionSize == int64(key) { 62 | p = val 63 | goto VALID_PARTITION 64 | } 65 | 66 | if val.MinTimestamp()/db.partitionSize < int64(key) && val.MaxTimestamp()/db.partitionSize >= int64(key) { 67 | p = val 68 | goto VALID_PARTITION 69 | } 70 | 71 | val.Release() 72 | } 73 | 74 | db.partitionCreateLock.Lock() 75 | if p == nil { 76 | if int64(key) < atomic.LoadInt64(&db.minTimestamp)/db.partitionSize { 77 | db.partitionCreateLock.Unlock() 78 | return errors.New("catena: row(s) being inserted are too old") 79 | } 80 | 81 | if db.partitionList.Size() == 0 || 82 | int64(key) > atomic.LoadInt64(&db.maxTimestamp)/db.partitionSize { 83 | 84 | // Need to make a new partition 85 | newPartitionID := atomic.LoadInt64(&db.lastPartitionID) + 1 86 | w, err := wal.NewFileWAL(filepath.Join(db.baseDir, 87 | fmt.Sprintf("%d.wal", newPartitionID))) 88 | if err != nil { 89 | // Couldn't create the WAL. Maybe another writer has created 90 | // the WAL file. Retry. 91 | db.partitionCreateLock.Unlock() 92 | goto FIND_PARTITION 93 | } 94 | 95 | p = memory.NewMemoryPartition(w) 96 | db.partitionList.Insert(p) 97 | p.Hold() 98 | 99 | if db.partitionList.Size() == 1 { 100 | atomic.SwapInt64(&db.minTimestamp, minTimestampInRows) 101 | atomic.SwapInt64(&db.maxTimestamp, maxTimestampInRows) 102 | } 103 | 104 | if !atomic.CompareAndSwapInt64(&db.lastPartitionID, newPartitionID-1, newPartitionID) { 105 | p.Release() 106 | p.Destroy() 107 | db.partitionCreateLock.Unlock() 108 | goto FIND_PARTITION 109 | } 110 | } 111 | 112 | if p == nil { 113 | db.partitionCreateLock.Unlock() 114 | goto FIND_PARTITION 115 | } 116 | } 117 | 118 | db.partitionCreateLock.Unlock() 119 | 120 | VALID_PARTITION: 121 | 122 | if p.ReadOnly() { 123 | p.Release() 124 | return errors.New("catena: insert into read-only partition") 125 | } 126 | 127 | err := p.InsertRows(*(*[]partition.Row)(unsafe.Pointer(&rows))) 128 | if err != nil { 129 | p.Release() 130 | return err 131 | } 132 | 133 | p.Release() 134 | 135 | for min := atomic.LoadInt64(&db.minTimestamp); min > minTimestampInRows; min = atomic.LoadInt64(&db.minTimestamp) { 136 | if atomic.CompareAndSwapInt64(&db.minTimestamp, min, minTimestampInRows) { 137 | break 138 | } 139 | } 140 | 141 | for max := atomic.LoadInt64(&db.maxTimestamp); max < maxTimestampInRows; max = atomic.LoadInt64(&db.maxTimestamp) { 142 | if atomic.CompareAndSwapInt64(&db.maxTimestamp, max, maxTimestampInRows) { 143 | break 144 | } 145 | } 146 | } 147 | 148 | return nil 149 | } 150 | -------------------------------------------------------------------------------- /iterator.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/Cistern/catena/partition" 7 | ) 8 | 9 | // An Iterator is a cursor over an array of points 10 | // for a source and metric. 11 | type Iterator struct { 12 | source, metric string 13 | db *DB 14 | curPartition partition.Partition 15 | partition.Iterator 16 | } 17 | 18 | // NewIterator creates a new Iterator for the given source and metric. 19 | func (db *DB) NewIterator(source, metric string) (*Iterator, error) { 20 | var p partition.Partition = nil 21 | 22 | i := db.partitionList.NewIterator() 23 | for i.Next() { 24 | val, _ := i.Value() 25 | 26 | val.Hold() 27 | 28 | if val.HasMetric(source, metric) { 29 | if p != nil { 30 | p.Release() 31 | 32 | } 33 | 34 | p = val 35 | } else { 36 | val.Release() 37 | } 38 | } 39 | 40 | if p == nil { 41 | return nil, errors.New("catena: couldn't find metric for iterator") 42 | } 43 | 44 | partitionIter, err := p.NewIterator(source, metric) 45 | p.Release() 46 | 47 | if err != nil { 48 | return nil, err 49 | } 50 | 51 | return &Iterator{ 52 | source: source, 53 | metric: metric, 54 | db: db, 55 | curPartition: p, 56 | Iterator: partitionIter, 57 | }, nil 58 | } 59 | 60 | // Next advances i to the next available point. 61 | func (i *Iterator) Next() error { 62 | currentPoint := i.Point() 63 | err := i.Iterator.Next() 64 | if err == nil { 65 | return nil 66 | } 67 | 68 | err = i.Seek(currentPoint.Timestamp + 1) 69 | return err 70 | } 71 | 72 | // Seek moves the iterator to the first timestamp greater than 73 | // or equal to timestamp. 74 | func (i *Iterator) Seek(timestamp int64) error { 75 | if i.Iterator != nil { 76 | i.Iterator.Close() 77 | } 78 | 79 | i.Iterator = nil 80 | 81 | partitionListIter := i.db.partitionList.NewIterator() 82 | for partitionListIter.Next() { 83 | 84 | val, _ := partitionListIter.Value() 85 | 86 | val.Hold() 87 | 88 | if val.MaxTimestamp() < timestamp { 89 | val.Release() 90 | 91 | break 92 | } 93 | 94 | if val.HasMetric(i.source, i.metric) { 95 | partitionIter, err := val.NewIterator(i.source, i.metric) 96 | val.Release() 97 | 98 | if err != nil { 99 | continue 100 | } 101 | 102 | err = partitionIter.Seek(timestamp) 103 | if err != nil { 104 | partitionIter.Close() 105 | continue 106 | } 107 | 108 | if i.Iterator != nil { 109 | i.Iterator.Close() 110 | } 111 | 112 | i.Iterator = partitionIter 113 | i.curPartition = val 114 | } else { 115 | val.Release() 116 | } 117 | } 118 | 119 | if i.Iterator == nil { 120 | return errors.New("catena: couldn't find metric for iterator") 121 | } 122 | 123 | return nil 124 | } 125 | 126 | // Reset moves i to the first available timestamp. 127 | func (i *Iterator) Reset() error { 128 | i.Iterator.Close() 129 | 130 | var p partition.Partition 131 | 132 | partitionListIter := i.db.partitionList.NewIterator() 133 | for partitionListIter.Next() { 134 | val, _ := partitionListIter.Value() 135 | val.Hold() 136 | 137 | if val.HasMetric(i.source, i.metric) { 138 | if p != nil { 139 | p.Release() 140 | } 141 | 142 | p = val 143 | } else { 144 | val.Release() 145 | } 146 | } 147 | 148 | if p == nil { 149 | return errors.New("catena: couldn't find metric for iterator") 150 | } 151 | 152 | defer p.Release() 153 | 154 | i.curPartition = p 155 | 156 | partitionIter, err := p.NewIterator(i.source, i.metric) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | i.Iterator = partitionIter 162 | return nil 163 | } 164 | 165 | // Close closes the iterator. Iterators MUST be closed to unblock 166 | // the compactor! 167 | func (i *Iterator) Close() { 168 | if i.Iterator == nil { 169 | return 170 | } 171 | 172 | i.Iterator.Close() 173 | i.curPartition = nil 174 | } 175 | -------------------------------------------------------------------------------- /iterator_test.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestIterator(t *testing.T) { 10 | os.RemoveAll("/tmp/catena_iterator_test") 11 | 12 | db, err := NewDB("/tmp/catena_iterator_test", 5, 4) 13 | if err != nil { 14 | t.Fatal(err) 15 | } 16 | 17 | insert5Rows := func(startingTS int64) error { 18 | return db.InsertRows([]Row{ 19 | Row{ 20 | Source: "a", 21 | Metric: "b", 22 | Point: Point{ 23 | Timestamp: startingTS, 24 | }, 25 | }, 26 | Row{ 27 | Source: "a", 28 | Metric: "b", 29 | Point: Point{ 30 | Timestamp: startingTS + 1, 31 | }, 32 | }, 33 | Row{ 34 | Source: "a", 35 | Metric: "b", 36 | Point: Point{ 37 | Timestamp: startingTS + 2, 38 | }, 39 | }, 40 | Row{ 41 | Source: "a", 42 | Metric: "b", 43 | Point: Point{ 44 | Timestamp: startingTS + 3, 45 | }, 46 | }, 47 | Row{ 48 | Source: "a", 49 | Metric: "b", 50 | Point: Point{ 51 | Timestamp: startingTS + 4, 52 | }, 53 | }, 54 | }) 55 | } 56 | 57 | err = insert5Rows(0) 58 | if err != nil { 59 | t.Fatal(err) 60 | } 61 | err = insert5Rows(5) 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | err = insert5Rows(10) 66 | if err != nil { 67 | t.Fatal(err) 68 | } 69 | err = insert5Rows(15) 70 | if err != nil { 71 | t.Fatal(err) 72 | } 73 | 74 | db.compact() 75 | 76 | i, err := db.NewIterator("b", "b") 77 | if err == nil { 78 | t.Fatal("expected to see an error for an invalid iterator") 79 | } 80 | 81 | i, err = db.NewIterator("a", "b") 82 | if err != nil { 83 | t.Fatal(err) 84 | } 85 | 86 | // timestamp 0 87 | err = i.Next() 88 | if err != nil { 89 | t.Fatal(err) 90 | } 91 | 92 | if i.Point().Timestamp != 0 { 93 | t.Fatalf("expected timestamp %d, got %d", 0, i.Point().Timestamp) 94 | } 95 | 96 | // timestamp 1 97 | err = i.Next() 98 | if err != nil { 99 | t.Fatal(err) 100 | } 101 | 102 | // timestamp 2 103 | err = i.Next() 104 | if err != nil { 105 | t.Fatal(err) 106 | } 107 | 108 | // timestamp 3 109 | err = i.Next() 110 | if err != nil { 111 | t.Fatal(err) 112 | } 113 | 114 | // timestamp 4 115 | err = i.Next() 116 | if err != nil { 117 | t.Fatal(err) 118 | } 119 | 120 | if i.Point().Timestamp != 4 { 121 | t.Fatalf("expected timestamp %d, got %d", 4, i.Point().Timestamp) 122 | } 123 | 124 | // Seek to point 12 125 | err = i.Seek(12) 126 | if err != nil { 127 | t.Fatal(err) 128 | } 129 | 130 | if i.Point().Timestamp != 12 { 131 | t.Fatalf("expected timestamp %d, got %d", 12, i.Point().Timestamp) 132 | } 133 | 134 | err = i.Seek(2) 135 | if err != nil { 136 | t.Fatal(err) 137 | } 138 | 139 | if i.Point().Timestamp != 2 { 140 | t.Fatalf("expected timestamp %d, got %d", 2, i.Point().Timestamp) 141 | } 142 | 143 | // Now we add some more points. 144 | err = insert5Rows(20) 145 | if err != nil { 146 | t.Fatal(err) 147 | } 148 | 149 | // Wait a bit for the compactor to start up. 150 | // It should be blocked on the iterator. 151 | time.Sleep(time.Millisecond * 50) 152 | 153 | // Close the iterator, which should unblock the compactor. 154 | i.Close() 155 | 156 | // Let the compactor do its work. 157 | time.Sleep(time.Millisecond * 550) 158 | 159 | i, err = db.NewIterator("a", "b") 160 | if err != nil { 161 | t.Fatal(err) 162 | } 163 | 164 | // Now we should be at point 5 because the oldest partition got dropped. 165 | if i.Point().Timestamp != 5 { 166 | t.Fatalf("expected timestamp %d, got %d", 5, i.Point().Timestamp) 167 | } 168 | 169 | err = i.Seek(0) 170 | // Note that this should NOT be an error. Seek moves the iterator to the 171 | // first timestamp greater than or equal to the requested timestamp, 172 | // so we should now be at timestamp 5. 173 | if err != nil { 174 | t.Fatal(err) 175 | } 176 | 177 | if i.Point().Timestamp != 5 { 178 | t.Fatalf("expected timestamp %d, got %d", 5, i.Point().Timestamp) 179 | } 180 | 181 | err = i.Seek(5) 182 | if err != nil { 183 | t.Fatal(err) 184 | } 185 | 186 | if i.Point().Timestamp != 5 { 187 | t.Fatalf("expected timestamp %d, got %d", 5, i.Point().Timestamp) 188 | } 189 | 190 | // timestamp 6 191 | err = i.Next() 192 | if err != nil { 193 | t.Fatal(err) 194 | } 195 | 196 | // timestamp 7 197 | err = i.Next() 198 | if err != nil { 199 | t.Fatal(err) 200 | } 201 | 202 | // timestamp 8 203 | err = i.Next() 204 | if err != nil { 205 | t.Fatal(err) 206 | } 207 | 208 | // timestamp 9 209 | err = i.Next() 210 | if err != nil { 211 | t.Fatal(err) 212 | } 213 | 214 | // Crossing partition boundary 215 | 216 | // timestamp 10 217 | err = i.Next() 218 | if err != nil { 219 | t.Fatal(err) 220 | } 221 | 222 | // timestamp 11 223 | err = i.Next() 224 | if err != nil { 225 | t.Fatal(err) 226 | } 227 | 228 | if i.Point().Timestamp != 11 { 229 | t.Fatalf("expected timestamp %d, got %d", 11, i.Point().Timestamp) 230 | } 231 | 232 | // timestamp 12 233 | err = i.Next() 234 | if err != nil { 235 | t.Fatal(err) 236 | } 237 | 238 | // timestamp 13 239 | err = i.Next() 240 | if err != nil { 241 | t.Fatal(err) 242 | } 243 | 244 | // timestamp 14 245 | err = i.Next() 246 | if err != nil { 247 | t.Fatal(err) 248 | } 249 | 250 | // Crossing partition boundary 251 | 252 | // timestamp 15 253 | err = i.Next() 254 | if err != nil { 255 | t.Fatal(err) 256 | } 257 | 258 | // timestamp 16 259 | err = i.Next() 260 | if err != nil { 261 | t.Fatal(err) 262 | } 263 | 264 | if i.Point().Timestamp != 16 { 265 | t.Fatalf("expected timestamp %d, got %d", 16, i.Point().Timestamp) 266 | } 267 | 268 | i.Close() 269 | 270 | db.Close() 271 | } 272 | -------------------------------------------------------------------------------- /partition/disk/extent.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/binary" 7 | 8 | "github.com/Cistern/catena/partition" 9 | ) 10 | 11 | type diskExtent struct { 12 | startTS int64 13 | offset int64 14 | numPoints uint32 15 | } 16 | 17 | func (p *DiskPartition) extentPoints(e diskExtent) ([]partition.Point, error) { 18 | r := bytes.NewReader(p.mapped) 19 | r.Seek(e.offset, 0) 20 | 21 | gzipReader, err := gzip.NewReader(r) 22 | if err != nil { 23 | return nil, err 24 | } 25 | 26 | points := []partition.Point{} 27 | 28 | for i := uint32(0); i < e.numPoints; i++ { 29 | point := partition.Point{} 30 | 31 | err = binary.Read(gzipReader, binary.LittleEndian, &point) 32 | if err != nil { 33 | return nil, err 34 | } 35 | 36 | points = append(points, point) 37 | } 38 | 39 | gzipReader.Close() 40 | 41 | return points, nil 42 | } 43 | -------------------------------------------------------------------------------- /partition/disk/iterator.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/Cistern/catena/partition" 7 | ) 8 | 9 | // NewIterator returns an Iterator for the partition that iterates over 10 | // a sequence of points for the given source and metric. 11 | func (p *DiskPartition) NewIterator(sourceName string, metricName string) (partition.Iterator, error) { 12 | p.Hold() 13 | i := &diskIterator{} 14 | 15 | source, present := p.sources[sourceName] 16 | if !present { 17 | p.Release() 18 | return nil, errors.New("partition/disk: source not found") 19 | } 20 | 21 | metric, present := source.metrics[metricName] 22 | if !present { 23 | p.Release() 24 | return nil, errors.New("partition/disk: metric not found") 25 | } 26 | 27 | i.metric = metric 28 | i.partition = p 29 | i.sourceName = sourceName 30 | 31 | err := i.Reset() 32 | if err != nil { 33 | p.Release() 34 | return nil, err 35 | } 36 | 37 | i.currentPointIndex = -1 38 | 39 | return i, nil 40 | } 41 | 42 | type diskIterator struct { 43 | sourceName string 44 | partition *DiskPartition 45 | metric diskMetric 46 | currentExtent diskExtent 47 | currentExtentIndex int 48 | currentExtentPoints []partition.Point 49 | currentPointIndex int 50 | currentPoint partition.Point 51 | } 52 | 53 | // Point returns the current point. 54 | func (i *diskIterator) Point() partition.Point { 55 | return i.currentPoint 56 | } 57 | 58 | // Reset moves the iterator to the first available point. 59 | func (i *diskIterator) Reset() error { 60 | i.currentExtent = i.metric.extents[0] 61 | i.currentExtentIndex = 0 62 | 63 | points, err := i.partition.extentPoints(i.currentExtent) 64 | if err != nil { 65 | return err 66 | } 67 | 68 | i.currentExtentPoints = points 69 | i.currentPoint = points[0] 70 | i.currentPointIndex = 0 71 | 72 | return nil 73 | } 74 | 75 | // Seek moves the iterator to the first timestamp greater 76 | // than or equal to the given timestamp. 77 | func (i *diskIterator) Seek(timestamp int64) error { 78 | i.Reset() 79 | 80 | for { 81 | firstTSInExtent := i.currentExtent.startTS 82 | lastTSInExtent := i.currentExtentPoints[len(i.currentExtentPoints)-1].Timestamp 83 | 84 | if firstTSInExtent >= timestamp { 85 | i.currentPointIndex = 0 86 | i.currentPoint = i.currentExtentPoints[0] 87 | return nil 88 | } 89 | 90 | if firstTSInExtent < timestamp && lastTSInExtent < timestamp { 91 | err := i.nextExtent() 92 | if err != nil { 93 | return err 94 | } 95 | 96 | continue 97 | } 98 | 99 | for index := i.currentPointIndex; index < len(i.currentExtentPoints); index++ { 100 | if i.currentExtentPoints[index].Timestamp >= timestamp { 101 | i.currentPointIndex = index 102 | i.currentPoint = i.currentExtentPoints[index] 103 | return nil 104 | } 105 | } 106 | } 107 | 108 | return errors.New("partition/disk: could not seek to requested timestamp") 109 | } 110 | 111 | // Next moves the iterator to the next point. 112 | func (i *diskIterator) Next() error { 113 | i.currentPointIndex++ 114 | if i.currentPointIndex == len(i.currentExtentPoints) { 115 | return i.nextExtent() 116 | } 117 | 118 | i.currentPoint = i.currentExtentPoints[i.currentPointIndex] 119 | 120 | return nil 121 | } 122 | 123 | // Close closes the iterator. 124 | func (i *diskIterator) Close() { 125 | i.partition.Release() 126 | i.currentExtentPoints = nil 127 | } 128 | 129 | func (i *diskIterator) nextExtent() error { 130 | var err error 131 | 132 | if i.currentExtentIndex == len(i.metric.extents)-1 { 133 | return errors.New("partition/disk: no more extents") 134 | } 135 | 136 | i.currentExtentIndex++ 137 | i.currentExtent = i.metric.extents[i.currentExtentIndex] 138 | i.currentExtentPoints, err = i.partition.extentPoints(i.currentExtent) 139 | if err != nil { 140 | return err 141 | } 142 | 143 | i.currentPointIndex = 0 144 | i.currentPoint = i.currentExtentPoints[0] 145 | 146 | return nil 147 | } 148 | 149 | // diskIterator is an Iterator. 150 | var _ partition.Iterator = &diskIterator{} 151 | -------------------------------------------------------------------------------- /partition/disk/partition.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "errors" 7 | "os" 8 | "sync" 9 | "syscall" 10 | 11 | "github.com/Cistern/catena/partition" 12 | ) 13 | 14 | const Magic = uint32(0xcafec0de) 15 | 16 | // diskPartition represents a partition 17 | // stored as a file on disk. 18 | type DiskPartition struct { 19 | // Metadata 20 | minTS int64 21 | maxTS int64 22 | 23 | // File on disk 24 | f *os.File 25 | filename string 26 | 27 | // Memory mapped backed by f 28 | mapped []byte 29 | 30 | sources map[string]diskSource 31 | 32 | rwMu sync.RWMutex 33 | } 34 | 35 | // diskSource is a metric source registered on disk. 36 | type diskSource struct { 37 | name string 38 | metrics map[string]diskMetric 39 | } 40 | 41 | // diskMetric is a metric on disk. 42 | type diskMetric struct { 43 | name string 44 | offset int64 45 | numPoints uint32 46 | 47 | extents []diskExtent 48 | } 49 | 50 | func OpenDiskPartition(filename string) (*DiskPartition, error) { 51 | f, err := os.Open(filename) 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | // Run Stat to get the file size. 57 | fInfo, err := f.Stat() 58 | if err != nil { 59 | f.Close() 60 | return nil, err 61 | } 62 | 63 | mapped, err := syscall.Mmap(int(f.Fd()), 0, int(fInfo.Size()), 64 | // Set read only protection and shared mapping flag. 65 | syscall.PROT_READ, syscall.MAP_SHARED) 66 | if err != nil { 67 | f.Close() 68 | return nil, err 69 | } 70 | 71 | p := &DiskPartition{ 72 | f: f, 73 | filename: filename, 74 | mapped: mapped, 75 | sources: map[string]diskSource{}, 76 | } 77 | 78 | // Attempt to load the metadata. 79 | err = p.readMetadata() 80 | if err != nil { 81 | // Failed to read partition metadata, so 82 | // we need to clean up. There's nothing 83 | // else we can do. 84 | munmapErr := syscall.Munmap(mapped) 85 | if munmapErr != nil { 86 | // What do we do? 87 | } else { 88 | p.mapped = nil 89 | } 90 | 91 | fileCloseErr := f.Close() 92 | if fileCloseErr != nil { 93 | // What do we do? 94 | } else { 95 | p.f = nil 96 | } 97 | 98 | p.minTS = 0 99 | p.maxTS = 0 100 | p.sources = nil 101 | 102 | return nil, err 103 | } 104 | 105 | // Everything went well. 106 | return p, nil 107 | } 108 | 109 | // readMetadata decodes metadata information for the DiskPartition from 110 | // the file. 111 | func (p *DiskPartition) readMetadata() error { 112 | r := bytes.NewReader(p.mapped) 113 | 114 | // Read metadata offset. 115 | _, err := r.Seek(-8, 2) 116 | if err != nil { 117 | return err 118 | } 119 | 120 | metaStartOffset := int64(0) 121 | err = binary.Read(r, binary.LittleEndian, &metaStartOffset) 122 | if err != nil { 123 | return err 124 | } 125 | 126 | // Seek to the start of the metadata offset. 127 | _, err = r.Seek(metaStartOffset, 0) 128 | if err != nil { 129 | return err 130 | } 131 | 132 | // Check magic sequence number 133 | magic := uint32(0) 134 | err = binary.Read(r, binary.BigEndian, &magic) 135 | if err != nil { 136 | return err 137 | } 138 | 139 | if magic != Magic { 140 | return errors.New("partition/disk: invalid magic") 141 | } 142 | 143 | // Read min and max timestamps. 144 | err = binary.Read(r, binary.LittleEndian, &p.minTS) 145 | if err != nil { 146 | return err 147 | } 148 | 149 | err = binary.Read(r, binary.LittleEndian, &p.maxTS) 150 | if err != nil { 151 | return err 152 | } 153 | 154 | // Read the number of sources. 155 | numSources := uint16(0) 156 | err = binary.Read(r, binary.LittleEndian, &numSources) 157 | if err != nil { 158 | return err 159 | } 160 | 161 | // Read each source. 162 | for i := uint16(0); i < numSources; i++ { 163 | src := diskSource{ 164 | metrics: map[string]diskMetric{}, 165 | } 166 | 167 | // Read the length of the name. 168 | srcNameLength := uint8(0) 169 | err = binary.Read(r, binary.LittleEndian, &srcNameLength) 170 | if err != nil { 171 | return err 172 | } 173 | 174 | // Read the name. 175 | srcNameBytes := make([]byte, int(srcNameLength)) 176 | _, err = r.Read(srcNameBytes) 177 | if err != nil { 178 | return err 179 | } 180 | 181 | src.name = string(srcNameBytes) 182 | 183 | // Read the number of metrics for this source. 184 | numMetrics := uint16(0) 185 | err = binary.Read(r, binary.LittleEndian, &numMetrics) 186 | if err != nil { 187 | return err 188 | } 189 | 190 | // Read each meric. 191 | for j := uint16(0); j < numMetrics; j++ { 192 | met := diskMetric{} 193 | 194 | // Read the length of the name. 195 | metNameLength := uint8(0) 196 | err = binary.Read(r, binary.LittleEndian, &metNameLength) 197 | if err != nil { 198 | return err 199 | } 200 | 201 | // Read the name. 202 | metNameBytes := make([]byte, int(metNameLength)) 203 | _, err = r.Read(metNameBytes) 204 | if err != nil { 205 | return err 206 | } 207 | 208 | met.name = string(metNameBytes) 209 | 210 | // Read metric offset. 211 | err = binary.Read(r, binary.LittleEndian, &met.offset) 212 | if err != nil { 213 | return err 214 | } 215 | 216 | // Read number of points. 217 | err = binary.Read(r, binary.LittleEndian, &met.numPoints) 218 | if err != nil { 219 | return err 220 | } 221 | 222 | // Read number of extents. 223 | numExtents := uint32(0) 224 | err = binary.Read(r, binary.LittleEndian, &numExtents) 225 | if err != nil { 226 | return err 227 | } 228 | 229 | for i := uint32(0); i < numExtents; i++ { 230 | ext := diskExtent{} 231 | err = binary.Read(r, binary.LittleEndian, &ext.startTS) 232 | if err != nil { 233 | return err 234 | } 235 | 236 | err = binary.Read(r, binary.LittleEndian, &ext.offset) 237 | if err != nil { 238 | return err 239 | } 240 | 241 | err = binary.Read(r, binary.LittleEndian, &ext.numPoints) 242 | if err != nil { 243 | return err 244 | } 245 | 246 | met.extents = append(met.extents, ext) 247 | } 248 | 249 | src.metrics[met.name] = met 250 | } 251 | 252 | p.sources[src.name] = src 253 | } 254 | 255 | // Internal state has been updated without issues. 256 | return nil 257 | } 258 | 259 | func (p *DiskPartition) InsertRows(rows []partition.Row) error { 260 | return errors.New("partition/disk: read only") 261 | } 262 | 263 | func (p *DiskPartition) MinTimestamp() int64 { 264 | return p.minTS 265 | } 266 | 267 | func (p *DiskPartition) MaxTimestamp() int64 { 268 | return p.maxTS 269 | } 270 | 271 | func (p *DiskPartition) ReadOnly() bool { 272 | return true 273 | } 274 | 275 | func (p *DiskPartition) SetReadOnly() { 276 | } 277 | 278 | func (p *DiskPartition) Filename() string { 279 | return p.filename 280 | } 281 | 282 | func (p *DiskPartition) Sources() []string { 283 | sources := []string{} 284 | for source := range p.sources { 285 | sources = append(sources, source) 286 | } 287 | 288 | return sources 289 | } 290 | 291 | func (p *DiskPartition) Metrics(source string) []string { 292 | metrics := []string{} 293 | 294 | src, present := p.sources[source] 295 | if !present { 296 | return metrics 297 | } 298 | 299 | for metric := range src.metrics { 300 | metrics = append(metrics, metric) 301 | } 302 | 303 | return metrics 304 | } 305 | 306 | func (p *DiskPartition) HasSource(source string) bool { 307 | _, present := p.sources[source] 308 | return present 309 | } 310 | 311 | func (p *DiskPartition) HasMetric(source, metric string) bool { 312 | src, present := p.sources[source] 313 | if !present { 314 | return false 315 | } 316 | 317 | _, present = src.metrics[metric] 318 | return present 319 | } 320 | 321 | func (p *DiskPartition) Hold() { 322 | p.rwMu.RLock() 323 | } 324 | 325 | func (p *DiskPartition) Release() { 326 | p.rwMu.RUnlock() 327 | } 328 | 329 | func (p *DiskPartition) ExclusiveHold() { 330 | p.rwMu.Lock() 331 | } 332 | 333 | func (p *DiskPartition) ExclusiveRelease() { 334 | p.rwMu.Unlock() 335 | } 336 | 337 | func (p *DiskPartition) Close() error { 338 | err := syscall.Munmap(p.mapped) 339 | if err != nil { 340 | return err 341 | } 342 | 343 | p.mapped = nil 344 | return p.f.Close() 345 | } 346 | 347 | func (p *DiskPartition) Destroy() error { 348 | err := p.Close() 349 | if err != nil { 350 | return err 351 | } 352 | 353 | return os.Remove(p.filename) 354 | } 355 | -------------------------------------------------------------------------------- /partition/interface.go: -------------------------------------------------------------------------------- 1 | package partition 2 | 3 | type Partition interface { 4 | // Insertion 5 | InsertRows([]Row) error 6 | 7 | // Metadata 8 | ReadOnly() bool 9 | Filename() string 10 | MinTimestamp() int64 11 | MaxTimestamp() int64 12 | 13 | // Metrics metadata 14 | Sources() []string 15 | Metrics(source string) []string 16 | HasSource(source string) bool 17 | HasMetric(source, metric string) bool 18 | 19 | // Management 20 | SetReadOnly() 21 | Close() error 22 | Destroy() error 23 | 24 | // Opaque locking 25 | Hold() 26 | Release() 27 | ExclusiveHold() 28 | ExclusiveRelease() 29 | 30 | // Iterator 31 | NewIterator(source string, metric string) (Iterator, error) 32 | } 33 | 34 | // Iterator is an iterator over a sequence of points. 35 | type Iterator interface { 36 | Reset() error 37 | Next() error 38 | Point() Point 39 | Seek(int64) error 40 | Close() 41 | } 42 | -------------------------------------------------------------------------------- /partition/memory/compact.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "compress/gzip" 5 | "encoding/binary" 6 | "errors" 7 | "io" 8 | "sort" 9 | 10 | "github.com/Cistern/catena/partition" 11 | "github.com/Cistern/catena/partition/disk" 12 | ) 13 | 14 | const extentSize = 3600 15 | 16 | var ( 17 | errorPartitionNotReadyOnly = errors.New("partition/memory: partition is not read only") 18 | ) 19 | 20 | type metaKey struct { 21 | source string 22 | metric string 23 | } 24 | 25 | type extent struct { 26 | startTS int64 27 | offset int64 28 | numPoints uint32 29 | points []partition.Point 30 | } 31 | 32 | type metaValue struct { 33 | offset int64 34 | numPoints uint32 35 | 36 | extents []extent 37 | } 38 | 39 | func (p *MemoryPartition) Compact(w io.WriteSeeker) error { 40 | if !p.readOnly { 41 | return errorPartitionNotReadyOnly 42 | } 43 | 44 | meta := map[metaKey]metaValue{} 45 | 46 | gzipWriter := gzip.NewWriter(w) 47 | 48 | sources := []string{} 49 | metricsBySource := map[string][]string{} 50 | 51 | for sourceName, source := range p.sources { 52 | sources = append(sources, sourceName) 53 | 54 | for metricName, metric := range source.metrics { 55 | metricsBySource[sourceName] = append(metricsBySource[sourceName], metricName) 56 | 57 | extents := splitIntoExtents(metric.points) 58 | 59 | currentOffset, err := w.Seek(0, 1) 60 | if err != nil { 61 | return err 62 | } 63 | 64 | metaVal := metaValue{ 65 | offset: currentOffset, 66 | numPoints: uint32(len(metric.points)), 67 | } 68 | 69 | for extentIndex, ext := range extents { 70 | gzipWriter.Close() 71 | gzipWriter = gzip.NewWriter(w) 72 | currentOffset, err := w.Seek(0, 1) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | ext.offset = currentOffset 78 | 79 | err = binary.Write(gzipWriter, binary.LittleEndian, ext.points) 80 | if err != nil { 81 | return err 82 | } 83 | 84 | err = gzipWriter.Flush() 85 | if err != nil { 86 | return err 87 | } 88 | 89 | extents[extentIndex] = ext 90 | } 91 | 92 | metaVal.extents = extents 93 | 94 | meta[metaKey{sourceName, metricName}] = metaVal 95 | } 96 | } 97 | 98 | err := gzipWriter.Close() 99 | if err != nil { 100 | return err 101 | } 102 | 103 | metaStartOffset, err := w.Seek(0, 1) 104 | if err != nil { 105 | return err 106 | } 107 | 108 | // Start writing metadata 109 | // Magic sequence 110 | err = binary.Write(w, binary.BigEndian, disk.Magic) 111 | if err != nil { 112 | return err 113 | } 114 | 115 | err = binary.Write(w, binary.LittleEndian, p.minTS) 116 | if err != nil { 117 | return err 118 | } 119 | 120 | err = binary.Write(w, binary.LittleEndian, p.maxTS) 121 | if err != nil { 122 | return err 123 | } 124 | 125 | // Encode the number of sources 126 | err = binary.Write(w, binary.LittleEndian, uint16(len(sources))) 127 | if err != nil { 128 | return err 129 | } 130 | 131 | sort.Strings(sources) 132 | 133 | for _, sourceName := range sources { 134 | err = binary.Write(w, binary.LittleEndian, uint8(len(sourceName))) 135 | if err != nil { 136 | return err 137 | } 138 | 139 | _, err = w.Write([]byte(sourceName)) 140 | if err != nil { 141 | return err 142 | } 143 | 144 | metrics := metricsBySource[sourceName] 145 | sort.Strings(metrics) 146 | 147 | // Encode number of metrics 148 | err = binary.Write(w, binary.LittleEndian, uint16(len(metrics))) 149 | if err != nil { 150 | return err 151 | } 152 | 153 | for _, metricName := range metrics { 154 | err = binary.Write(w, binary.LittleEndian, uint8(len(metricName))) 155 | if err != nil { 156 | return err 157 | } 158 | 159 | _, err = w.Write([]byte(metricName)) 160 | if err != nil { 161 | return err 162 | } 163 | 164 | metadata := meta[metaKey{sourceName, metricName}] 165 | 166 | err = binary.Write(w, binary.LittleEndian, metadata.offset) 167 | if err != nil { 168 | return err 169 | } 170 | 171 | err = binary.Write(w, binary.LittleEndian, uint32(metadata.numPoints)) 172 | if err != nil { 173 | return err 174 | } 175 | 176 | err = binary.Write(w, binary.LittleEndian, uint32(len(metadata.extents))) 177 | if err != nil { 178 | return err 179 | } 180 | 181 | for _, ext := range metadata.extents { 182 | err = binary.Write(w, binary.LittleEndian, ext.startTS) 183 | if err != nil { 184 | return err 185 | } 186 | 187 | err = binary.Write(w, binary.LittleEndian, ext.offset) 188 | if err != nil { 189 | return err 190 | } 191 | 192 | err = binary.Write(w, binary.LittleEndian, ext.numPoints) 193 | if err != nil { 194 | return err 195 | } 196 | } 197 | } 198 | } 199 | 200 | err = binary.Write(w, binary.LittleEndian, metaStartOffset) 201 | if err != nil { 202 | return err 203 | } 204 | 205 | return nil 206 | } 207 | 208 | func splitIntoExtents(points []partition.Point) []extent { 209 | extents := []extent{} 210 | 211 | currentExtent := extent{} 212 | 213 | for _, point := range points { 214 | if currentExtent.numPoints == 0 { 215 | currentExtent.startTS = point.Timestamp 216 | } 217 | 218 | currentExtent.points = append(currentExtent.points, point) 219 | currentExtent.numPoints++ 220 | 221 | if currentExtent.numPoints == extentSize { 222 | extents = append(extents, currentExtent) 223 | currentExtent = extent{} 224 | } 225 | } 226 | 227 | if currentExtent.numPoints > 0 { 228 | extents = append(extents, currentExtent) 229 | } 230 | 231 | return extents 232 | } 233 | -------------------------------------------------------------------------------- /partition/memory/insertion.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/Cistern/catena/partition" 7 | ) 8 | 9 | func (p *MemoryPartition) getOrCreateSource(name string) *memorySource { 10 | var source *memorySource 11 | present := false 12 | 13 | p.sourcesLock.RLock() 14 | if source, present = p.sources[name]; !present { 15 | p.sourcesLock.RUnlock() 16 | p.sourcesLock.Lock() 17 | 18 | if source, present = p.sources[name]; !present { 19 | source = &memorySource{ 20 | name: name, 21 | metrics: map[string]*memoryMetric{}, 22 | } 23 | 24 | p.sources[name] = source 25 | } 26 | 27 | p.sourcesLock.Unlock() 28 | } else { 29 | p.sourcesLock.RUnlock() 30 | } 31 | 32 | return source 33 | } 34 | 35 | func (s *memorySource) getOrCreateMetric(name string) *memoryMetric { 36 | var metric *memoryMetric 37 | present := false 38 | 39 | s.lock.RLock() 40 | if metric, present = s.metrics[name]; !present { 41 | s.lock.RUnlock() 42 | s.lock.Lock() 43 | 44 | if metric, present = s.metrics[name]; !present { 45 | metric = &memoryMetric{ 46 | name: name, 47 | points: make([]partition.Point, 0, 64), 48 | lastInsertIndex: -1, 49 | } 50 | 51 | s.metrics[name] = metric 52 | } 53 | 54 | s.lock.Unlock() 55 | } else { 56 | s.lock.RUnlock() 57 | } 58 | 59 | return metric 60 | } 61 | 62 | func (m *memoryMetric) insertPoints(points []partition.Point) { 63 | m.lock.Lock() 64 | 65 | for _, point := range points { 66 | if m.lastInsertIndex < 0 { 67 | // This is the first point. 68 | m.points = append(m.points, point) 69 | m.lastInsertIndex = 0 70 | continue 71 | } 72 | 73 | prevPoint := m.points[m.lastInsertIndex] 74 | if prevPoint.Timestamp < point.Timestamp { 75 | m.lastInsertIndex = m.insertAfter(point, m.lastInsertIndex) 76 | continue 77 | } 78 | 79 | var i int 80 | for i = m.lastInsertIndex; i > 0 && m.points[i].Timestamp > point.Timestamp; i-- { 81 | } 82 | 83 | m.lastInsertIndex = m.insertAfter(point, i) 84 | } 85 | 86 | m.lock.Unlock() 87 | } 88 | 89 | func (m *memoryMetric) insertAfter(point partition.Point, after int) int { 90 | lenPoints := len(m.points) 91 | 92 | var i int 93 | for i = after; i < lenPoints && m.points[i].Timestamp < point.Timestamp; i++ { 94 | } 95 | 96 | // Resize 97 | m.points = append(m.points, point) 98 | 99 | // Shift elements over 100 | copy(m.points[i+1:], m.points[i:lenPoints]) 101 | 102 | // Insert in place 103 | m.points[i] = point 104 | 105 | return i 106 | } 107 | 108 | func (s *memorySource) String() string { 109 | str := "" 110 | 111 | str += fmt.Sprintf("source %s\n", s.name) 112 | for _, metric := range s.metrics { 113 | str += metric.String() 114 | } 115 | 116 | return str 117 | } 118 | 119 | func (m *memoryMetric) String() string { 120 | str := "" 121 | 122 | str += fmt.Sprintf(" metric %s\n", m.name) 123 | for _, p := range m.points { 124 | str += fmt.Sprintf(" %d => %g\n", p.Timestamp, p.Value) 125 | } 126 | 127 | return str 128 | } 129 | -------------------------------------------------------------------------------- /partition/memory/iterator.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/Cistern/catena/partition" 7 | ) 8 | 9 | // NewIterator returns an Iterator for the partition that iterates over 10 | // a sequence of points for the given source and metric. 11 | func (p *MemoryPartition) NewIterator(sourceName string, metricName string) (partition.Iterator, error) { 12 | p.Hold() 13 | 14 | p.sourcesLock.Lock() 15 | source, present := p.sources[sourceName] 16 | if !present { 17 | return nil, errors.New("partition/memory: source not found") 18 | } 19 | 20 | metric, present := source.metrics[metricName] 21 | if !present { 22 | return nil, errors.New("partition/memory: metric not found") 23 | } 24 | p.sourcesLock.Unlock() 25 | 26 | return &memoryIterator{ 27 | sourceName: sourceName, 28 | partition: p, 29 | metric: metric, 30 | currentIndex: -1, 31 | }, nil 32 | } 33 | 34 | type memoryIterator struct { 35 | sourceName string 36 | partition *MemoryPartition 37 | metric *memoryMetric 38 | currentIndex int 39 | currentPoint partition.Point 40 | } 41 | 42 | // Point returns the current point. 43 | func (i *memoryIterator) Point() partition.Point { 44 | return i.currentPoint 45 | } 46 | 47 | // Reset moves the iterator to the first available point. 48 | func (i *memoryIterator) Reset() error { 49 | i.currentIndex = 0 50 | 51 | i.metric.lock.Lock() 52 | defer i.metric.lock.Unlock() 53 | 54 | i.currentPoint = i.metric.points[0] 55 | 56 | return nil 57 | } 58 | 59 | // Seek moves the iterator to the first timestamp greater 60 | // than or equal to the given timestamp. 61 | func (i *memoryIterator) Seek(timestamp int64) error { 62 | i.currentIndex = 0 63 | 64 | i.metric.lock.Lock() 65 | defer i.metric.lock.Unlock() 66 | 67 | for ; i.currentIndex < len(i.metric.points)-1; i.currentIndex++ { 68 | if i.metric.points[i.currentIndex].Timestamp >= timestamp { 69 | break 70 | } 71 | } 72 | 73 | i.currentPoint = i.metric.points[i.currentIndex] 74 | if i.currentPoint.Timestamp < timestamp { 75 | return errors.New("partition/memory: could not seek to requested timestamp") 76 | } 77 | 78 | return nil 79 | } 80 | 81 | // Next moves the iterator to the next point. 82 | func (i *memoryIterator) Next() error { 83 | if i.currentIndex < 0 { 84 | i.Reset() 85 | return nil 86 | } 87 | 88 | i.metric.lock.Lock() 89 | defer i.metric.lock.Unlock() 90 | 91 | if i.metric.points[i.currentIndex] != i.currentPoint { 92 | for i.currentIndex = 0; i.currentIndex != len(i.metric.points); i.currentIndex++ { 93 | if i.metric.points[i.currentIndex] == i.currentPoint { 94 | break 95 | } 96 | } 97 | } 98 | 99 | if i.currentIndex == len(i.metric.points)-1 { 100 | return errors.New("partition/memory: iterator index out of bounds") 101 | } 102 | 103 | i.currentIndex++ 104 | i.currentPoint = i.metric.points[i.currentIndex] 105 | 106 | return nil 107 | } 108 | 109 | // Close closes the iterator. 110 | func (i *memoryIterator) Close() { 111 | i.partition.Release() 112 | } 113 | 114 | // memoryIterator is an Iterator. 115 | var _ partition.Iterator = &memoryIterator{} 116 | -------------------------------------------------------------------------------- /partition/memory/partition.go: -------------------------------------------------------------------------------- 1 | package memory 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "math" 7 | "sync" 8 | "sync/atomic" 9 | 10 | "github.com/Cistern/catena/partition" 11 | "github.com/Cistern/catena/wal" 12 | ) 13 | 14 | // MemoryPartition is a partition that exists in-memory. 15 | type MemoryPartition struct { 16 | readOnly bool 17 | partitionLock sync.RWMutex 18 | 19 | minTS int64 20 | maxTS int64 21 | 22 | sources map[string]*memorySource 23 | sourcesLock sync.RWMutex 24 | 25 | wal wal.WAL 26 | } 27 | 28 | // memorySource is a source with metrics. 29 | type memorySource struct { 30 | name string 31 | metrics map[string]*memoryMetric 32 | lock sync.RWMutex 33 | } 34 | 35 | // memoryMetric contains an ordered slice of points. 36 | type memoryMetric struct { 37 | name string 38 | points []partition.Point 39 | lock sync.Mutex 40 | 41 | lastInsertIndex int 42 | } 43 | 44 | // NewMemoryPartition creates a new MemoryPartition backed by WAL. 45 | func NewMemoryPartition(WAL wal.WAL) *MemoryPartition { 46 | p := MemoryPartition{ 47 | readOnly: false, 48 | sources: map[string]*memorySource{}, 49 | wal: WAL, 50 | minTS: math.MaxInt64, 51 | maxTS: math.MinInt64, 52 | } 53 | 54 | return &p 55 | } 56 | 57 | // RecoverMemoryPartition recovers a MemoryPartition backed by WAL. 58 | func RecoverMemoryPartition(WAL wal.WAL) (*MemoryPartition, error) { 59 | p := &MemoryPartition{ 60 | readOnly: false, 61 | sources: map[string]*memorySource{}, 62 | minTS: math.MaxInt64, 63 | maxTS: math.MinInt64, 64 | } 65 | 66 | var entry wal.WALEntry 67 | var err error 68 | 69 | for entry, err = WAL.ReadEntry(); err == nil; entry, err = WAL.ReadEntry() { 70 | p.InsertRows(entry.Rows) 71 | } 72 | 73 | if err != nil { 74 | if err != io.EOF { 75 | return nil, err 76 | } 77 | } 78 | 79 | err = WAL.Truncate() 80 | 81 | p.wal = WAL 82 | 83 | return p, err 84 | } 85 | 86 | // InsertRows inserts rows into the partition. 87 | func (p *MemoryPartition) InsertRows(rows []partition.Row) error { 88 | if p.readOnly { 89 | return errors.New("partition/memory: read only") 90 | } 91 | 92 | if p.wal != nil { 93 | _, err := p.wal.Append(wal.WALEntry{ 94 | Operation: wal.OperationInsert, 95 | Rows: rows, 96 | }) 97 | 98 | if err != nil { 99 | return err 100 | } 101 | } 102 | 103 | var ( 104 | minTS int64 105 | maxTS int64 106 | ) 107 | 108 | for i, row := range rows { 109 | if i == 0 { 110 | minTS = row.Timestamp 111 | maxTS = row.Timestamp 112 | } 113 | 114 | if row.Timestamp < minTS { 115 | minTS = row.Timestamp 116 | } 117 | 118 | if row.Timestamp > maxTS { 119 | maxTS = row.Timestamp 120 | } 121 | 122 | source := p.getOrCreateSource(row.Source) 123 | metric := source.getOrCreateMetric(row.Metric) 124 | metric.insertPoints([]partition.Point{row.Point}) 125 | } 126 | 127 | for min := atomic.LoadInt64(&p.minTS); min > minTS; min = atomic.LoadInt64(&p.minTS) { 128 | if atomic.CompareAndSwapInt64(&p.minTS, min, minTS) { 129 | break 130 | } 131 | } 132 | 133 | for max := atomic.LoadInt64(&p.maxTS); max < maxTS; max = atomic.LoadInt64(&p.maxTS) { 134 | if atomic.CompareAndSwapInt64(&p.maxTS, max, maxTS) { 135 | break 136 | } 137 | } 138 | 139 | return nil 140 | } 141 | 142 | // SetReadOnly sets the partition mode to read-only. 143 | func (m *MemoryPartition) SetReadOnly() { 144 | m.readOnly = true 145 | } 146 | 147 | // Closes sets the memory partition to read-only, releases resources, 148 | // and closes its WAL. 149 | func (m *MemoryPartition) Close() error { 150 | // Close WAL 151 | err := m.wal.Close() 152 | if err != nil { 153 | return err 154 | } 155 | 156 | m.readOnly = true 157 | 158 | m.sources = nil 159 | 160 | return nil 161 | } 162 | 163 | func (p *MemoryPartition) MinTimestamp() int64 { 164 | return atomic.LoadInt64(&p.minTS) 165 | } 166 | 167 | func (p *MemoryPartition) MaxTimestamp() int64 { 168 | return atomic.LoadInt64(&p.maxTS) 169 | } 170 | 171 | func (p *MemoryPartition) ReadOnly() bool { 172 | return p.readOnly 173 | } 174 | 175 | func (p *MemoryPartition) Filename() string { 176 | return p.wal.Filename() 177 | } 178 | 179 | func (p *MemoryPartition) Sources() []string { 180 | sources := []string{} 181 | for source := range p.sources { 182 | sources = append(sources, source) 183 | } 184 | 185 | return sources 186 | } 187 | 188 | func (p *MemoryPartition) Metrics(source string) []string { 189 | metrics := []string{} 190 | 191 | src, present := p.sources[source] 192 | if !present { 193 | return metrics 194 | } 195 | 196 | for metric := range src.metrics { 197 | metrics = append(metrics, metric) 198 | } 199 | 200 | return metrics 201 | } 202 | 203 | func (p *MemoryPartition) HasSource(source string) bool { 204 | _, present := p.sources[source] 205 | return present 206 | } 207 | 208 | func (p *MemoryPartition) HasMetric(source, metric string) bool { 209 | src, present := p.sources[source] 210 | if !present { 211 | return false 212 | } 213 | 214 | _, present = src.metrics[metric] 215 | return present 216 | } 217 | 218 | func (p *MemoryPartition) Hold() { 219 | p.partitionLock.RLock() 220 | } 221 | 222 | func (p *MemoryPartition) Release() { 223 | p.partitionLock.RUnlock() 224 | } 225 | 226 | func (p *MemoryPartition) ExclusiveHold() { 227 | p.partitionLock.Lock() 228 | } 229 | 230 | func (p *MemoryPartition) ExclusiveRelease() { 231 | p.partitionLock.Unlock() 232 | } 233 | 234 | // Destroy destroys the memory partition as well as its WAL. 235 | func (m *MemoryPartition) Destroy() error { 236 | // Destroy WAL 237 | err := m.wal.Destroy() 238 | 239 | m.readOnly = true 240 | 241 | return err 242 | } 243 | -------------------------------------------------------------------------------- /partition/types.go: -------------------------------------------------------------------------------- 1 | package partition 2 | 3 | type Point struct { 4 | Timestamp int64 `json:"timestamp"` 5 | Value float64 `json:"value"` 6 | } 7 | 8 | type Row struct { 9 | Source string `json:"source"` 10 | Metric string `json:"metric"` 11 | Point 12 | } 13 | -------------------------------------------------------------------------------- /partition_list.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "sync/atomic" 7 | "unsafe" 8 | 9 | "github.com/Cistern/catena/partition" 10 | ) 11 | 12 | type partitionList struct { 13 | head unsafe.Pointer 14 | size int32 15 | } 16 | 17 | type partitionListNode struct { 18 | val partition.Partition 19 | next unsafe.Pointer 20 | } 21 | 22 | type partitionListIterator struct { 23 | list *partitionList 24 | current *partitionListNode 25 | valid bool 26 | } 27 | 28 | func comparePartitions(a, b partition.Partition) int { 29 | // Highest timestamp first 30 | return int(b.MinTimestamp() - a.MinTimestamp()) 31 | } 32 | 33 | func newPartitionList() *partitionList { 34 | return &partitionList{} 35 | } 36 | 37 | // Insert inserts v into the list in order. An error is returned if v is already present. 38 | func (l *partitionList) Insert(v partition.Partition) error { 39 | n := &partitionListNode{ 40 | val: v, 41 | next: nil, 42 | } 43 | 44 | HEAD: 45 | headPtr := atomic.LoadPointer(&l.head) 46 | 47 | if headPtr == nil { 48 | if !atomic.CompareAndSwapPointer(&l.head, headPtr, unsafe.Pointer(n)) { 49 | goto HEAD 50 | } 51 | 52 | atomic.AddInt32(&l.size, 1) 53 | return nil 54 | } 55 | 56 | headNode := (*partitionListNode)(headPtr) 57 | if comparePartitions(headNode.val, n.val) > 0 { 58 | n.next = headPtr 59 | if !atomic.CompareAndSwapPointer(&l.head, headPtr, unsafe.Pointer(n)) { 60 | goto HEAD 61 | } 62 | 63 | atomic.AddInt32(&l.size, 1) 64 | return nil 65 | } 66 | 67 | NEXT: 68 | nextPtr := atomic.LoadPointer(&headNode.next) 69 | if nextPtr == nil { 70 | if !atomic.CompareAndSwapPointer(&headNode.next, nextPtr, unsafe.Pointer(n)) { 71 | goto NEXT 72 | } 73 | 74 | atomic.AddInt32(&l.size, 1) 75 | return nil 76 | } 77 | 78 | nextNode := (*partitionListNode)(nextPtr) 79 | if comparePartitions(nextNode.val, n.val) > 0 { 80 | n.next = nextPtr 81 | if !atomic.CompareAndSwapPointer(&headNode.next, nextPtr, unsafe.Pointer(n)) { 82 | goto NEXT 83 | } 84 | 85 | atomic.AddInt32(&l.size, 1) 86 | return nil 87 | } 88 | 89 | if comparePartitions(nextNode.val, n.val) == 0 { 90 | return errors.New("catena/partition_list: partition exists") 91 | } 92 | 93 | headNode = nextNode 94 | goto NEXT 95 | } 96 | 97 | func (l *partitionList) Swap(old, new partition.Partition) error { 98 | n := &partitionListNode{ 99 | val: new, 100 | next: nil, 101 | } 102 | 103 | HEAD: 104 | headPtr := atomic.LoadPointer(&l.head) 105 | 106 | if headPtr == nil { 107 | return errors.New("catena/partition_list: partition not found") 108 | } 109 | 110 | headNode := (*partitionListNode)(headPtr) 111 | if comparePartitions(headNode.val, n.val) == 0 { 112 | n.next = headNode.next 113 | 114 | if !atomic.CompareAndSwapPointer(&l.head, headPtr, unsafe.Pointer(n)) { 115 | goto HEAD 116 | } 117 | 118 | return nil 119 | } 120 | 121 | NEXT: 122 | nextPtr := atomic.LoadPointer(&headNode.next) 123 | if nextPtr == nil { 124 | return errors.New("catena/partition_list: partition not found") 125 | } 126 | 127 | nextNode := (*partitionListNode)(nextPtr) 128 | if comparePartitions(nextNode.val, n.val) == 0 { 129 | n.next = nextNode.next 130 | 131 | if !atomic.CompareAndSwapPointer(&headNode.next, nextPtr, unsafe.Pointer(n)) { 132 | goto NEXT 133 | } 134 | 135 | return nil 136 | } 137 | 138 | if comparePartitions(nextNode.val, n.val) > 0 { 139 | return errors.New("catena/partition_list: partition not found") 140 | } 141 | 142 | headNode = nextNode 143 | goto NEXT 144 | } 145 | 146 | // Remove removes v from the list. An error is returned if v is not present. 147 | func (l *partitionList) Remove(v partition.Partition) error { 148 | HEAD: 149 | headPtr := atomic.LoadPointer(&l.head) 150 | 151 | if headPtr == nil { 152 | return errors.New("catena/partition_list: partition not found") 153 | } 154 | 155 | headNode := (*partitionListNode)(headPtr) 156 | 157 | if comparePartitions(headNode.val, v) == 0 { 158 | nextPtr := atomic.LoadPointer(&headNode.next) 159 | if !atomic.CompareAndSwapPointer(&l.head, headPtr, nextPtr) { 160 | goto HEAD 161 | } 162 | 163 | atomic.AddInt32(&l.size, -1) 164 | return nil 165 | } 166 | 167 | NEXT: 168 | nextPtr := atomic.LoadPointer(&headNode.next) 169 | if nextPtr == nil { 170 | return errors.New("catena/partition_list: partition not found") 171 | } 172 | 173 | nextNode := (*partitionListNode)(nextPtr) 174 | 175 | if comparePartitions(nextNode.val, v) > 0 { 176 | return errors.New("catena/partition_list: partition not found") 177 | } 178 | 179 | if comparePartitions(nextNode.val, v) == 0 { 180 | replacementPtr := atomic.LoadPointer(&nextNode.next) 181 | if !atomic.CompareAndSwapPointer(&headNode.next, nextPtr, replacementPtr) { 182 | goto NEXT 183 | } 184 | 185 | atomic.AddInt32(&l.size, -1) 186 | return nil 187 | } 188 | 189 | headNode = nextNode 190 | goto NEXT 191 | } 192 | 193 | // Size returns the number of elements currently in the list. 194 | func (l *partitionList) Size() int { 195 | return int(atomic.LoadInt32(&l.size)) 196 | } 197 | 198 | // NewIterator returns a new iterator. Values can be read 199 | // after Next is called. 200 | func (l *partitionList) NewIterator() *partitionListIterator { 201 | return &partitionListIterator{ 202 | list: l, 203 | valid: true, 204 | } 205 | } 206 | 207 | // Next positions the iterator at the next node in the list. 208 | // Next will be positioned at the head on the first call. 209 | // The return value will be true if a value can be read from the list. 210 | func (i *partitionListIterator) Next() bool { 211 | if !i.valid { 212 | return false 213 | } 214 | 215 | if i.current == nil { 216 | head := atomic.LoadPointer(&i.list.head) 217 | if head == nil { 218 | i.valid = false 219 | return false 220 | } 221 | 222 | i.current = (*partitionListNode)(head) 223 | return true 224 | } 225 | 226 | next := atomic.LoadPointer(&i.current.next) 227 | i.current = (*partitionListNode)(next) 228 | 229 | i.valid = i.current != nil 230 | return i.valid 231 | } 232 | 233 | func (i *partitionListIterator) HasNext() bool { 234 | if i.current == nil { 235 | return false 236 | } 237 | 238 | return atomic.LoadPointer(&i.current.next) != nil 239 | } 240 | 241 | // Value reads the value from the current node of the iterator. 242 | // An error is returned if a value cannot be retrieved. 243 | func (i *partitionListIterator) Value() (partition.Partition, error) { 244 | var v partition.Partition 245 | 246 | if i.current == nil { 247 | return v, errors.New("catena/partition_list: partition not found") 248 | } 249 | 250 | return i.current.val, nil 251 | } 252 | 253 | // String returns the string representation of the list. 254 | func (l *partitionList) String() string { 255 | output := "" 256 | 257 | if l.head == nil { 258 | return output 259 | } 260 | 261 | i := l.NewIterator() 262 | 263 | for i.Next() { 264 | v, _ := i.Value() 265 | output += fmt.Sprintf("%v ", v) 266 | } 267 | 268 | return output 269 | } 270 | -------------------------------------------------------------------------------- /partition_test.go: -------------------------------------------------------------------------------- 1 | package catena 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "runtime" 7 | "sync" 8 | "testing" 9 | "time" 10 | 11 | "github.com/Cistern/catena/partition" 12 | "github.com/Cistern/catena/partition/disk" 13 | "github.com/Cistern/catena/partition/memory" 14 | "github.com/Cistern/catena/wal" 15 | ) 16 | 17 | func TestMemoryPartition1(t *testing.T) { 18 | os.RemoveAll("/tmp/wal.wal") 19 | timestamps := 100 20 | sources := 100 21 | metrics := 100 22 | 23 | WAL, err := wal.NewFileWAL("/tmp/wal.wal") 24 | if err != nil { 25 | t.Fatal(err) 26 | } 27 | 28 | p := memory.NewMemoryPartition(WAL) 29 | 30 | workQueue := make(chan []partition.Row, timestamps*sources) 31 | 32 | parallelism := runtime.NumCPU() 33 | runtime.GOMAXPROCS(parallelism) 34 | 35 | for i := 0; i < timestamps; i++ { 36 | for j := 0; j < sources; j++ { 37 | 38 | rows := make([]partition.Row, metrics) 39 | 40 | for k := 0; k < metrics; k++ { 41 | rows[k] = partition.Row{ 42 | Source: "source_" + fmt.Sprint(j), 43 | Metric: "metric_" + fmt.Sprint(k), 44 | Point: partition.Point{ 45 | Timestamp: int64(i), 46 | Value: 0, 47 | }, 48 | } 49 | } 50 | 51 | workQueue <- rows 52 | } 53 | } 54 | 55 | wg := sync.WaitGroup{} 56 | wg.Add(parallelism) 57 | 58 | start := time.Now() 59 | 60 | for i := 0; i < parallelism; i++ { 61 | go func() { 62 | for rows := range workQueue { 63 | err := p.InsertRows(rows) 64 | if err != nil { 65 | t.Fatal(err) 66 | } 67 | } 68 | wg.Done() 69 | }() 70 | } 71 | 72 | close(workQueue) 73 | 74 | wg.Wait() 75 | 76 | t.Logf("%0.2f rows / sec\n", float64(timestamps*sources*metrics)/time.Now().Sub(start).Seconds()) 77 | 78 | i, err := p.NewIterator("source_0", "metric_0") 79 | if err != nil { 80 | t.Fatal(err) 81 | } 82 | 83 | expected := int64(0) 84 | for i.Next() == nil { 85 | if i.Point().Timestamp != expected { 86 | t.Fatalf("expected timestamp %d; got %d", expected, i.Point().Timestamp) 87 | } 88 | 89 | expected++ 90 | } 91 | i.Close() 92 | if expected != int64(timestamps) { 93 | t.Fatal(expected) 94 | } 95 | 96 | p.Close() 97 | 98 | WAL, err = wal.OpenFileWAL("/tmp/wal.wal") 99 | if err != nil { 100 | t.Fatal(err) 101 | } 102 | 103 | start = time.Now() 104 | 105 | p, err = memory.RecoverMemoryPartition(WAL) 106 | if err != nil { 107 | t.Fatal(err) 108 | } 109 | 110 | t.Logf("%0.2f rows / sec\n", float64(timestamps*sources*metrics)/time.Now().Sub(start).Seconds()) 111 | 112 | expected = int64(0) 113 | i, err = p.NewIterator("source_0", "metric_0") 114 | if err != nil { 115 | t.Fatal(err) 116 | } 117 | for i.Next() == nil { 118 | if i.Point().Timestamp != expected { 119 | t.Fatalf("expected timestamp %d; got %d", expected, i.Point().Timestamp) 120 | } 121 | 122 | expected++ 123 | } 124 | i.Close() 125 | 126 | if expected != int64(timestamps) { 127 | t.Fatal(expected) 128 | } 129 | 130 | p.SetReadOnly() 131 | 132 | f, err := os.Create("/tmp/compact.part") 133 | if err != nil { 134 | p.Destroy() 135 | t.Fatal(err) 136 | } 137 | 138 | err = p.Compact(f) 139 | if err != nil { 140 | p.Destroy() 141 | t.Fatal(err) 142 | } 143 | 144 | err = p.Destroy() 145 | if err != nil { 146 | t.Fatal(err) 147 | } 148 | 149 | f.Close() 150 | 151 | d, err := disk.OpenDiskPartition("/tmp/compact.part") 152 | if err != nil { 153 | t.Fatal(err) 154 | } 155 | 156 | diskIter, err := d.NewIterator("source_0", "metric_0") 157 | if err != nil { 158 | t.Fatal(err) 159 | } 160 | 161 | expected = 0 162 | for diskIter.Next() == nil { 163 | if diskIter.Point().Timestamp != expected { 164 | t.Fatalf("expected timestamp %d; got %d", expected, diskIter.Point().Timestamp) 165 | } 166 | 167 | expected++ 168 | } 169 | diskIter.Close() 170 | 171 | err = d.Destroy() 172 | if err != nil { 173 | t.Fatal(err) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /wal/file_wal.go: -------------------------------------------------------------------------------- 1 | package wal 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "encoding/binary" 7 | "errors" 8 | "io/ioutil" 9 | "math" 10 | "os" 11 | "sync" 12 | 13 | "github.com/Cistern/catena/partition" 14 | ) 15 | 16 | var ( 17 | // Magic sequence to check for valid data. 18 | walMagic = uint32(0x11141993) 19 | 20 | errorInvalidWALMagic = errors.New("wal: invalid WAL magic number") 21 | errorInvalidWALFile = errors.New("wal: invalid WAL file") 22 | ) 23 | 24 | // A FileWAL is a write-ahead log represented by a file on disk. 25 | type FileWAL struct { 26 | f *os.File 27 | lock sync.Mutex 28 | 29 | filename string 30 | 31 | // lastReadOffset stores end of the last good 32 | // WAL entry. This way we can truncate the WAL 33 | // and keep appending valid data at the end. 34 | lastReadOffset int64 35 | } 36 | 37 | // NewFileWAL returns a new on-disk write-ahead log 38 | // with the given file name. 39 | func NewFileWAL(filename string) (*FileWAL, error) { 40 | // Attempt to open WAL file. 41 | f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC|os.O_EXCL, 0666) 42 | if err != nil { 43 | return nil, err 44 | } 45 | 46 | return &FileWAL{ 47 | f: f, 48 | filename: filename, 49 | }, nil 50 | } 51 | 52 | // OpenFileWAL opens a write-ahead long stored at filename. 53 | func OpenFileWAL(filename string) (*FileWAL, error) { 54 | // Attempt to open an existing WAL file. 55 | f, err := os.OpenFile(filename, os.O_RDWR, 0666) 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | return &FileWAL{ 61 | f: f, 62 | filename: filename, 63 | }, nil 64 | } 65 | 66 | // Append writes the WALentry to the write-ahead log. 67 | // It returns the number of bytes written and an error. 68 | func (w *FileWAL) Append(entry WALEntry) (int, error) { 69 | 70 | // Make sure we have an open WAL. 71 | if w.f == nil { 72 | return 0, errorInvalidWALFile 73 | } 74 | 75 | // Buffer writes until the end. 76 | buf := &bytes.Buffer{} 77 | 78 | var err error 79 | 80 | scratch := [512]byte{} 81 | 82 | // Write magic number 83 | scratch[0] = byte(walMagic) 84 | scratch[1] = byte(walMagic >> 8) 85 | scratch[2] = byte(walMagic >> 16) 86 | scratch[3] = byte(walMagic >> 24) 87 | 88 | _, err = buf.Write(scratch[:4]) 89 | if err != nil { 90 | return 0, err 91 | } 92 | 93 | // Write the operation type 94 | err = buf.WriteByte(byte(entry.Operation)) 95 | if err != nil { 96 | return 0, err 97 | } 98 | 99 | // Write the number of rows 100 | numRows := uint32(len(entry.Rows)) 101 | scratch[0] = byte(numRows) 102 | scratch[1] = byte(numRows >> 8) 103 | scratch[2] = byte(numRows >> 16) 104 | scratch[3] = byte(numRows >> 24) 105 | 106 | _, err = buf.Write(scratch[:4]) 107 | if err != nil { 108 | return 0, err 109 | } 110 | 111 | // Write the size of the entry (0 for now) 112 | _, err = buf.Write(scratch[4:8]) 113 | if err != nil { 114 | return 0, err 115 | } 116 | 117 | gzipWriter, err := gzip.NewWriterLevel(buf, gzip.BestSpeed) 118 | if err != nil { 119 | return 0, err 120 | } 121 | 122 | for _, row := range entry.Rows { 123 | // Write source name length 124 | _, err = gzipWriter.Write([]byte{byte(len(row.Source)), byte(len(row.Metric))}) 125 | if err != nil { 126 | return 0, err 127 | } 128 | 129 | // Write source and metric names 130 | _, err = gzipWriter.Write([]byte(row.Source + row.Metric)) 131 | if err != nil { 132 | return 0, err 133 | } 134 | 135 | // Write timestamp and value 136 | scratch[0] = byte(row.Point.Timestamp) 137 | scratch[1] = byte(row.Point.Timestamp >> (8 * 1)) 138 | scratch[2] = byte(row.Point.Timestamp >> (8 * 2)) 139 | scratch[3] = byte(row.Point.Timestamp >> (8 * 3)) 140 | scratch[4] = byte(row.Point.Timestamp >> (8 * 4)) 141 | scratch[5] = byte(row.Point.Timestamp >> (8 * 5)) 142 | scratch[6] = byte(row.Point.Timestamp >> (8 * 6)) 143 | scratch[7] = byte(row.Point.Timestamp >> (8 * 7)) 144 | _, err = gzipWriter.Write(scratch[:8]) 145 | if err != nil { 146 | return 0, err 147 | } 148 | 149 | valueBits := math.Float64bits(row.Point.Value) 150 | scratch[0] = byte(valueBits) 151 | scratch[1] = byte(valueBits >> (8 * 1)) 152 | scratch[2] = byte(valueBits >> (8 * 2)) 153 | scratch[3] = byte(valueBits >> (8 * 3)) 154 | scratch[4] = byte(valueBits >> (8 * 4)) 155 | scratch[5] = byte(valueBits >> (8 * 5)) 156 | scratch[6] = byte(valueBits >> (8 * 6)) 157 | scratch[7] = byte(valueBits >> (8 * 7)) 158 | _, err = gzipWriter.Write(scratch[:8]) 159 | if err != nil { 160 | return 0, err 161 | } 162 | } 163 | 164 | err = gzipWriter.Close() 165 | if err != nil { 166 | return 0, err 167 | } 168 | 169 | entrySize := buf.Len() - 13 170 | 171 | result := buf.Bytes() 172 | 173 | // Write the size of the entry 174 | entrySizeUint32 := uint32(entrySize) 175 | scratch[0] = byte(entrySizeUint32) 176 | scratch[1] = byte(entrySizeUint32 >> 8) 177 | scratch[2] = byte(entrySizeUint32 >> 16) 178 | scratch[3] = byte(entrySizeUint32 >> 24) 179 | 180 | copy(result[9:13], scratch[:4]) 181 | 182 | w.lock.Lock() 183 | // Record the current offset so we can truncate 184 | // later in case something goes wrong. 185 | currentOffset, err := w.f.Seek(0, 1) 186 | if err != nil { 187 | w.lock.Unlock() 188 | return 0, err 189 | } 190 | 191 | w.lastReadOffset = currentOffset 192 | 193 | // Flush to the file. 194 | n, err := w.f.Write(result) 195 | if err != nil { 196 | w.Truncate() 197 | w.lock.Unlock() 198 | return 0, err 199 | } 200 | 201 | w.lock.Unlock() 202 | return n, err 203 | } 204 | 205 | // ReadEntry reads a WALEntry from the write-ahead log. 206 | // If a non-nil error is returned, w.Truncate() may 207 | // be called to make the WAL safe for writing. 208 | func (w *FileWAL) ReadEntry() (WALEntry, error) { 209 | w.lock.Lock() 210 | 211 | entry := WALEntry{} 212 | var err error 213 | 214 | // Make sure we have an open WAL. 215 | if w.f == nil { 216 | w.lock.Unlock() 217 | return entry, errorInvalidWALFile 218 | } 219 | 220 | // Read magic value. 221 | magic := uint32(0) 222 | err = binary.Read(w.f, binary.LittleEndian, &magic) 223 | if err != nil { 224 | w.lock.Unlock() 225 | return entry, err 226 | } 227 | 228 | if magic != walMagic { 229 | w.lock.Unlock() 230 | return entry, errorInvalidWALMagic 231 | } 232 | 233 | // Read the operation type. 234 | err = binary.Read(w.f, binary.LittleEndian, &entry.Operation) 235 | if err != nil { 236 | w.lock.Unlock() 237 | return entry, err 238 | } 239 | 240 | // Read the number of rows. 241 | numRows := uint32(0) 242 | err = binary.Read(w.f, binary.LittleEndian, &numRows) 243 | if err != nil { 244 | w.lock.Unlock() 245 | return entry, err 246 | } 247 | 248 | // Read the size of the entry. 249 | entrySize := uint32(0) 250 | err = binary.Read(w.f, binary.LittleEndian, &entrySize) 251 | if err != nil { 252 | w.lock.Unlock() 253 | return entry, err 254 | } 255 | 256 | entryBytes := make([]byte, int(entrySize)) 257 | n, err := w.f.Read(entryBytes) 258 | if err != nil { 259 | w.lock.Unlock() 260 | return entry, err 261 | } 262 | 263 | if n != int(entrySize) { 264 | w.lock.Unlock() 265 | return entry, errors.New("wal: did not read full entry") 266 | } 267 | 268 | r := bytes.NewReader(entryBytes) 269 | if err != nil { 270 | w.lock.Unlock() 271 | return entry, err 272 | } 273 | 274 | gzipReader, err := gzip.NewReader(r) 275 | if err != nil { 276 | w.lock.Unlock() 277 | return entry, err 278 | } 279 | 280 | uncompressed, err := ioutil.ReadAll(gzipReader) 281 | if err != nil { 282 | gzipReader.Close() 283 | w.lock.Unlock() 284 | return entry, err 285 | } 286 | 287 | gzipReader.Close() 288 | 289 | r = bytes.NewReader(uncompressed) 290 | 291 | for i := uint32(0); i < numRows; i++ { 292 | row := partition.Row{} 293 | 294 | sourceNameLength, metricNameLength := uint8(0), uint8(0) 295 | 296 | // Read the source and metric name lengths. 297 | err = binary.Read(r, binary.LittleEndian, &sourceNameLength) 298 | if err != nil { 299 | w.lock.Unlock() 300 | return entry, err 301 | } 302 | err = binary.Read(r, binary.LittleEndian, &metricNameLength) 303 | if err != nil { 304 | w.lock.Unlock() 305 | return entry, err 306 | } 307 | 308 | sourceAndMetricNames := make([]byte, int(sourceNameLength+metricNameLength)) 309 | 310 | _, err = r.Read(sourceAndMetricNames) 311 | if err != nil { 312 | w.lock.Unlock() 313 | return entry, err 314 | } 315 | 316 | row.Source = string(sourceAndMetricNames[:int(sourceNameLength)]) 317 | row.Metric = string(sourceAndMetricNames[int(sourceNameLength):]) 318 | 319 | err = binary.Read(r, binary.LittleEndian, &row.Point) 320 | if err != nil { 321 | w.lock.Unlock() 322 | return entry, err 323 | } 324 | 325 | entry.Rows = append(entry.Rows, row) 326 | } 327 | 328 | // We've decoded everything fine. 329 | // We now update lastReadOffset to the current offset 330 | // in the file. 331 | currentOffset, err := w.f.Seek(0, 1) 332 | if err != nil { 333 | w.lock.Unlock() 334 | return entry, err 335 | } 336 | 337 | w.lastReadOffset = currentOffset 338 | 339 | w.lock.Unlock() 340 | return entry, err 341 | } 342 | 343 | // Truncate truncates w's backing file to 344 | // lastReadOffset. Truncation ensures that 345 | // new entries can be safely read after 346 | // they are appended. 347 | func (w *FileWAL) Truncate() error { 348 | return w.f.Truncate(w.lastReadOffset) 349 | } 350 | 351 | // Close flushes any pending writes and closes the file. 352 | func (w *FileWAL) Close() error { 353 | w.f.Sync() 354 | w.f.Close() 355 | return nil 356 | } 357 | 358 | // Destroy closes the FileWAL and removes the 359 | // file on disk. 360 | func (w *FileWAL) Destroy() error { 361 | w.Close() 362 | err := os.Remove(w.filename) 363 | return err 364 | } 365 | func (w *FileWAL) Filename() string { 366 | return w.filename 367 | } 368 | 369 | // FileWAL is a WAL 370 | var _ WAL = &FileWAL{} 371 | -------------------------------------------------------------------------------- /wal/wal.go: -------------------------------------------------------------------------------- 1 | // Package wal provides a write-ahead log. 2 | package wal 3 | 4 | import ( 5 | "github.com/Cistern/catena/partition" 6 | ) 7 | 8 | type walOperation byte 9 | 10 | const ( 11 | OperationInsert walOperation = iota 12 | ) 13 | 14 | // A WAL is a write-ahead log. 15 | type WAL interface { 16 | Append(WALEntry) (int, error) 17 | ReadEntry() (WALEntry, error) 18 | Truncate() error 19 | Close() error 20 | Destroy() error 21 | Filename() string 22 | } 23 | 24 | // WALEntry is an entry in the write-ahead log. 25 | type WALEntry struct { 26 | Operation walOperation 27 | Rows []partition.Row 28 | } 29 | --------------------------------------------------------------------------------