├── .deepsource.toml ├── .gitignore ├── .travis.yml ├── LICENCE.txt ├── README.md ├── cmd ├── README.md ├── compare.go ├── convert.go ├── dump.go ├── estimate.go ├── icheck.go ├── read.go ├── summarize.go └── write.go ├── compress.go ├── compress_test.go ├── debug.go ├── debug_test.go ├── doc └── compressed.md ├── go.mod ├── go.sum ├── internal ├── fuzzy_archive │ └── fuzz.go └── fuzzy_whisper │ ├── corpus │ ├── Inactive.wsp.cwsp │ ├── SwapTotal.wsp.cwsp │ ├── discarded_full.wsp.cwsp │ ├── header │ ├── reach.wsp.cwsp │ ├── rx_byte_ipv6.wsp.cwsp │ ├── rx_errors.wsp.cwsp │ ├── rx_fifo.wsp.cwsp │ ├── tx_colls.wsp.cwsp │ ├── tx_drop.wsp.cwsp │ └── tx_errors.wsp.cwsp │ └── fuzz.go ├── whisper.go ├── whisper_test.go └── whisper_test.py /.deepsource.toml: -------------------------------------------------------------------------------- 1 | version = 1 2 | 3 | test_patterns = [ 4 | "*_test.go" 5 | ] 6 | 7 | [[analyzers]] 8 | name = "go" 9 | enabled = true 10 | 11 | [analyzers.meta] 12 | import_path = "github.com/go-graphite/go-whisper" 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.test 2 | cpu.out 3 | tags 4 | .idea -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | 3 | language: go 4 | 5 | go: 6 | - "1.12.x" 7 | - "1.13.x" 8 | - "1.14.x" 9 | - master 10 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Rob Young. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go Whisper 2 | 3 | [![Build Status](https://travis-ci.org/robyoung/go-whisper.png?branch=master)](https://travis-ci.org/robyoung/go-whisper?branch=master) 4 | 5 | Go Whisper is a [Go](http://golang.org/) implementation of the [Whisper](https://github.com/graphite-project/whisper) database, which is part of the [Graphite Project](http://graphite.wikidot.com/). 6 | 7 | To create a new whisper database you must define it's retention levels (see: [storage schemas](http://graphite.readthedocs.org/en/1.0/config-carbon.html#storage-schemas-conf)), aggregation method and the xFilesFactor. The xFilesFactor specifies the fraction of data points in a propagation interval that must have known values for a propagation to occur. 8 | 9 | ## Examples 10 | 11 | Create a new whisper database in "/tmp/test.wsp" with two retention levels (1 second for 1 day and 1 hour for 5 weeks), it will sum values when propagating them to the next retention level, and it requires half the values of the first retention level to be set before they are propagated. 12 | ```go 13 | retentions, err := whisper.ParseRetentionDefs("1s:1d,1h:5w") 14 | if err == nil { 15 | wsp, err := whisper.Create("/tmp/test.wsp", retentions, whisper.Sum, 0.5) 16 | } 17 | ``` 18 | 19 | Alternatively you can open an existing whisper database. 20 | ```go 21 | wsp, err := whisper.Open("/tmp/test.wsp") 22 | ``` 23 | 24 | Once you have a whisper database you can set values at given time points. This sets the time point 1 hour ago to 12345.678. 25 | ```go 26 | wsp.Update(12345.678, time.Now().Add(time.ParseDuration("-1h")).Unix()) 27 | ``` 28 | 29 | And you can retrieve time series from it. This example fetches a time series for the last 1 hour and then iterates through it's points. 30 | ```go 31 | series, err := wsp.Fetch(time.Now().Add(time.ParseDuration("-1h")).Unix(), time.Now().Unix()) 32 | if err != nil { 33 | // handle 34 | } 35 | for _, point := range series.Points() { 36 | fmt.Println(point.Time, point.Value) 37 | } 38 | ``` 39 | 40 | ## Thread Safety 41 | 42 | This implementation is *not* thread safe. Writing to a database concurrently will cause bad things to happen. It is up to the user to manage this in their application as they need to. 43 | 44 | ## Compressed Format 45 | 46 | go-whisper library supports a compressed format, which maintains the same functionality of standard whisper file, while keeping data in a much smaller size. This compressed format is called `cwhisper`. 47 | 48 | Compression algorithm source: [4.1 Time series compression in Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf). 49 | 50 | Data point in cwhisper ranges from 2 - 14 bytes (12 bytes for standard format). So in theory, cwhisper file size could be 16.67% - 116.67% of standard file size. So the theoretical compression ratio is 6 - 0.86. 51 | 52 | In random data point testing, compressed/uncompressed ratio is between 18.88% and 113.25%. 53 | 54 | In real production payload, we are seeing 50%+ less disk space usage. 55 | 56 | Read/Write Performance between standard and compressed formats: 57 | 58 | ``` 59 | BenchmarkWriteStandard-8 50000 33824 ns/op 60 | BenchmarkWriteCompressed-8 1000000 1630 ns/op 61 | 62 | BenchmarkReadStandard-8 500 2270392 ns/op 63 | BenchmarkReadCompressed-8 10000 260862 ns/op 64 | ``` 65 | 66 | ### Drawbacks 67 | 68 | * cwhisper is faster and smaller, but unlike standard format, you can't easily backfill/update/rewrite old data points because it's not data-point addressable. 69 | * file size could grow if data points are irregular. 70 | 71 | ### Suitable Application 72 | 73 | * cwhisper is most suitable for metrics that are mostly regular and less likely needed to backfill/rewrite old data, like system metrics. cwhisper also works nicely for sparse metrics. 74 | 75 | ### How does it work in a nutshell 76 | 77 | An example format: https://github.com/go-graphite/go-whisper/blob/master/doc/compressed.md 78 | 79 | In cwhisper, archives are broken down into multiple blocks (by default 7200 data points per block as recommended by the gorilla paper), and data points are compressed into blocks. cwhisper assumes 2 as the default data point size, but when it detects that the default size is too small, it would grow the file. 80 | 81 | cwhisper still has one file per metric, it's doing round-robin update, instead of rotating data points, block is rotation unit for archives. 82 | 83 | ## Licence 84 | 85 | Go Whisper is licenced under a BSD Licence. 86 | -------------------------------------------------------------------------------- /cmd/README.md: -------------------------------------------------------------------------------- 1 | # Commands 2 | 3 | | Commands | Description | 4 | | --- | --- | 5 | | convert.go | migrates existing standard whisper files to compressed format | 6 | | dump.go | prints out data in whipser files (support both compressed and standard format) | 7 | | compare.go | checks if two whisper files are containing the same data, made for verify migration result | 8 | | icheck.go | checks if a compressed whipser file is corrupted through crc32 values saved in the headers and if there is invalid data ponit size | 9 | | summarize.go | generates md5 sums of values in different archives (for quick comparsion of whisper files on different hosts) | 10 | | estimate.go | to estimate the file compression ratio for specific retention policy on different compressed data point sizes | 11 | 12 | ## Caveats of convert.go 13 | 14 | * convert.go locks a whisper file during convertion (by creating a temporary .cwsp files in the same directory) 15 | * it might cause some inaccurate or lost of data points (usually 2-4) in lower archives (because of the buffer design in higher archives) 16 | * it generates logs and progress files 17 | -------------------------------------------------------------------------------- /cmd/compare.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | 11 | whisper "github.com/go-graphite/go-whisper" 12 | ) 13 | 14 | func init() { log.SetFlags(log.Lshortfile) } 15 | 16 | func main() { 17 | now := flag.Int("now", 0, "specify the current time") 18 | ignoreBuffer := flag.Bool("ignore-buffer", false, "ignore points in buffer that haven't been propagated") 19 | quarantinesRaw := flag.String("quarantines", "", "ignore data started from this point. e.g. 2019-02-21,2019-02-22") 20 | verbose := flag.Bool("verbose", false, "be overly and nicely talkive") 21 | strict := flag.Bool("strict", false, "exit 1 whenever there are discrepancies between between the files") 22 | muteThreshold := flag.Int("mute-if-less", 2, "do not alarm if diff of points is less than specified.") 23 | flag.BoolVar(verbose, "v", false, "be overly and nicely talkive") 24 | flag.Parse() 25 | if len(flag.Args()) != 2 { 26 | fmt.Println("usage: cverify metric.wsp metric.cwsp") 27 | fmt.Println("purpose: check if two whisper files are containing the same data, made for verify migration result.") 28 | flag.PrintDefaults() 29 | os.Exit(1) 30 | } 31 | 32 | file1 := flag.Args()[0] 33 | file2 := flag.Args()[1] 34 | msg, err := whisper.Compare( 35 | file1, file2, 36 | *now, 37 | *ignoreBuffer, 38 | *quarantinesRaw, 39 | *verbose, 40 | *strict, 41 | *muteThreshold, 42 | ) 43 | if len(msg) > 0 { 44 | fmt.Print(msg) 45 | } 46 | if err != nil { 47 | fmt.Print(err) 48 | os.Exit(1) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /cmd/convert.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "bufio" 7 | "bytes" 8 | "flag" 9 | "fmt" 10 | "io" 11 | "io/ioutil" 12 | "math/rand" 13 | "os" 14 | "os/signal" 15 | "path/filepath" 16 | "runtime" 17 | "runtime/debug" 18 | "strings" 19 | "sync" 20 | "sync/atomic" 21 | "syscall" 22 | "time" 23 | 24 | whisper "github.com/go-graphite/go-whisper" 25 | ) 26 | 27 | var helpMessage = `convert: convert standard whisper files to compressed format 28 | 29 | Start a normal conversion: 30 | convert -home /var/lib/carbon/convert -store /var/lib/carbon/whisper -rate 2 -one-off 31 | 32 | Run a one-off to convert a file: 33 | convert -home /var/lib/carbon/convert -store metric.wsp -rate 2 -debug -one-off -force 34 | 35 | Or files: 36 | convert -home /var/lib/carbon/convert -store *.wsp -rate 2 -debug -one-off -force 37 | ` 38 | 39 | func main() { 40 | rand.Seed(time.Now().Unix()) 41 | 42 | var storeDir = flag.String("store", "/var/lib/carbon/whisper", "path to whisper data files") 43 | var homdDir = flag.String("home", "/var/lib/carbon/convert", "home directory for saving conversion progress") 44 | var rate = flag.Int("rate", runtime.NumCPU(), "count of concurrent conversion per second") 45 | var debug = flag.Bool("debug", false, "show debug info") 46 | var force = flag.Bool("force", false, "ignore records progress.db and convert the files") 47 | var oneoff = flag.Bool("one-off", false, "only scan once") 48 | var keepOriginal = flag.Bool("keep-original", false, "keep both the original and compressed whisper files") 49 | var now = flag.Int64("now", 0, "specify the current time") 50 | var help = flag.Bool("help", false, "show help message") 51 | flag.BoolVar(help, "h", false, "show help message") 52 | flag.Parse() 53 | if *help { 54 | fmt.Println(helpMessage) 55 | flag.PrintDefaults() 56 | os.Exit(0) 57 | } 58 | if err := os.MkdirAll(*homdDir, 0755); err != nil { 59 | panic(err) 60 | } 61 | 62 | if *now > 0 { 63 | whisper.Now = func() time.Time { 64 | return time.Unix(*now, 0) 65 | } 66 | } 67 | 68 | var progressDB = *homdDir + "/progress.db" 69 | var pidFile = *homdDir + "/pid" 70 | var taskc = make(chan string, *rate) 71 | var convertingFiles sync.Map 72 | var convertingCount int64 73 | var progressc = make(chan string, *rate) 74 | var exitc = make(chan struct{}) 75 | var shutdownc = make(chan os.Signal, 1) 76 | 77 | if err := ioutil.WriteFile(pidFile, []byte(fmt.Sprintf("%d", os.Getpid())), 0644); err != nil { 78 | fmt.Printf("main: failed to save pid: %s\n", err) 79 | } 80 | 81 | go schedule(*rate, taskc, progressDB, progressc, &convertingCount, &convertingFiles, exitc, *debug, *keepOriginal) 82 | go logProgress(progressDB, progressc) 83 | go func() { 84 | for { 85 | if err := scanAndDispatch(*storeDir, progressDB, taskc, *force); err != nil { 86 | fmt.Printf("error: %s", err) 87 | } 88 | 89 | if *oneoff { 90 | for len(taskc) > 0 { 91 | time.Sleep(time.Millisecond * time.Duration(rand.Intn(1000))) 92 | } 93 | time.Sleep(time.Second * 3) 94 | shutdownc <- syscall.SIGUSR2 95 | } 96 | 97 | time.Sleep(time.Hour) 98 | } 99 | }() 100 | onExit(&convertingCount, &convertingFiles, progressc, taskc, exitc, shutdownc) 101 | } 102 | 103 | func onExit(convertingCount *int64, convertingFiles *sync.Map, progressc chan string, taskc chan string, exitc chan struct{}, shutdownc chan os.Signal) { 104 | signal.Notify(shutdownc, os.Interrupt) 105 | signal.Notify(shutdownc, syscall.SIGTERM) 106 | signal.Notify(shutdownc, syscall.SIGUSR2) 107 | 108 | <-shutdownc 109 | close(exitc) 110 | fmt.Printf("exit: enter shutting down process\n") 111 | for { 112 | if c := atomic.LoadInt64(convertingCount); c > 0 { 113 | fmt.Printf("exit: %d files are still converting\n", c) 114 | convertingFiles.Range(func(k, v interface{}) bool { 115 | fmt.Printf("\t%s\n", k) 116 | return true 117 | }) 118 | 119 | time.Sleep(time.Second * time.Duration(rand.Intn(10))) 120 | continue 121 | } 122 | if len(progressc) > 0 { 123 | fmt.Printf("exit: flushing progress records\n") 124 | time.Sleep(time.Second) 125 | continue 126 | } 127 | fmt.Printf("exit: progrem shutting down in 3 seconds\n") 128 | time.Sleep(time.Second * 3) 129 | // close(progressc) 130 | os.Exit(0) 131 | } 132 | } 133 | 134 | func schedule(rate int, taskc chan string, db string, progressc chan string, convertingCount *int64, convertingFiles *sync.Map, exitc chan struct{}, debug, keepOriginal bool) { 135 | ticker := time.NewTicker(time.Second / time.Duration(rate)) 136 | for range ticker.C { 137 | if atomic.LoadInt64(convertingCount) >= int64(rate) { 138 | continue 139 | } 140 | 141 | var metric string 142 | select { 143 | case metric = <-taskc: 144 | case <-exitc: 145 | fmt.Printf("schedule: stopped\n") 146 | return 147 | } 148 | atomic.AddInt64(convertingCount, 1) 149 | convertingFiles.Store(metric, struct{}{}) 150 | go func() { 151 | err := convert(metric, progressc, convertingCount, convertingFiles, debug, keepOriginal) 152 | if err != nil { 153 | fmt.Printf("error: %s", err) 154 | } 155 | }() 156 | } 157 | } 158 | 159 | func readProgress(db string) (files map[string]struct{}, err error) { 160 | file, err := os.Open(db) 161 | if err != nil { 162 | return 163 | } 164 | defer file.Close() 165 | 166 | files = map[string]struct{}{} 167 | var line []byte 168 | r := bufio.NewReader(file) 169 | for { 170 | line, _, err = r.ReadLine() 171 | if err == io.EOF { 172 | err = nil 173 | break 174 | } 175 | if len(line) == 0 { 176 | continue 177 | } 178 | a := bytes.Split(line, []byte(",")) 179 | files[string(a[0])] = struct{}{} 180 | } 181 | 182 | return 183 | } 184 | 185 | func logProgress(progressDB string, progressc chan string) { 186 | logf, err := os.OpenFile(progressDB, os.O_APPEND|os.O_CREATE|os.O_RDWR, 0644) 187 | if err != nil { 188 | panic(fmt.Sprintf("log: failed to open %s: %s\n", progressDB, err)) 189 | return 190 | } 191 | 192 | for file := range progressc { 193 | if _, err := logf.WriteString(file); err != nil { 194 | fmt.Printf("log: failed to log %s: %s\n", file, err) 195 | } 196 | if err := logf.Sync(); err != nil { 197 | fmt.Printf("log: failed to sync log: %s\n", err) 198 | } 199 | } 200 | 201 | // if err := logf.Close(); err != nil { 202 | // fmt.Printf("log: failed to close db: %s\n", err) 203 | // } 204 | } 205 | 206 | func convert(path string, progressc chan string, convertingCount *int64, convertingFiles *sync.Map, debugf, keepOriginal bool) error { 207 | defer func() { 208 | atomic.AddInt64(convertingCount, -1) 209 | convertingFiles.Delete(path) 210 | }() 211 | 212 | if debugf { 213 | fmt.Printf("convert: handling %s\n", path) 214 | } 215 | 216 | db, err := whisper.OpenWithOptions(path, &whisper.Options{FLock: true}) 217 | if err != nil { 218 | return fmt.Errorf("convert: failed to open %s: %s", path, err) 219 | } 220 | defer func() { 221 | if err := db.File().Close(); err != nil { 222 | fmt.Printf("convert: failed to close converted file %s: %s", path, err) 223 | } 224 | }() 225 | 226 | if db.IsCompressed() { 227 | if debugf { 228 | fmt.Printf("convert: %s is already compressed\n", path) 229 | } 230 | return nil 231 | } 232 | 233 | var start = time.Now() 234 | var tmpPath = path + ".cwsp" 235 | var oldSize int64 236 | if stat, err := os.Stat(path); err == nil { 237 | oldSize = stat.Size() 238 | } 239 | defer func() { 240 | if err := recover(); err != nil { 241 | fmt.Printf("convert: %s panicked: %s\n", path, err) 242 | debug.PrintStack() 243 | return 244 | } 245 | 246 | var cpath = path 247 | if keepOriginal { 248 | cpath = tmpPath 249 | } 250 | stat, err := os.Stat(cpath) 251 | if err != nil { 252 | fmt.Printf("convert: failed to stat new size of %s: %s\n", cpath, err) 253 | } 254 | newSize := stat.Size() 255 | progressc <- fmt.Sprintf( 256 | "%s,%d,%d,%d,%d\n", 257 | path, oldSize, newSize, time.Now().Sub(start), start.Unix(), 258 | ) 259 | }() 260 | 261 | os.Remove(tmpPath) 262 | if err := db.CompressTo(tmpPath); err != nil { 263 | return fmt.Errorf("convert: failed to compress %s: %s", path, err) 264 | } 265 | 266 | if keepOriginal { 267 | return nil 268 | } 269 | 270 | defer os.Remove(tmpPath) 271 | 272 | cfile, err := os.Open(tmpPath) 273 | if err != nil { 274 | return fmt.Errorf("convert: failed to open %s: %s", tmpPath, err) 275 | } 276 | defer cfile.Close() 277 | 278 | cstat, err := cfile.Stat() 279 | if err != nil { 280 | return fmt.Errorf("convert: failed to stat %s: %s", tmpPath, err) 281 | } 282 | 283 | if err := db.File().Truncate(cstat.Size()); err != nil { 284 | fmt.Printf("convert: failed to truncate %s: %s", path, err) 285 | } 286 | 287 | if _, err := db.File().Seek(0, 0); err != nil { 288 | return fmt.Errorf("convert: failed to seek %s: %s", path, err) 289 | } 290 | if _, err := io.Copy(db.File(), cfile); err != nil { 291 | os.Remove(path) // original file is most likely corrupted 292 | return fmt.Errorf("convert: failed to copy compressed data for %s: %s", path, err) 293 | } 294 | 295 | return nil 296 | } 297 | 298 | func scanAndDispatch(storeDir, progressDB string, taskc chan string, force bool) error { 299 | fmt.Printf("sd: start new conversion cycle\n") 300 | files, dur, err := scan(storeDir) 301 | if err != nil { 302 | return err 303 | } 304 | fmt.Printf("sd: scan %d files took %s\n", len(files), dur) 305 | 306 | cwsps, err := readProgress(progressDB) 307 | var tasks []string 308 | for _, file := range files { 309 | if _, ok := cwsps[file]; ok && !force { 310 | continue 311 | } 312 | tasks = append(tasks, file) 313 | } 314 | 315 | fmt.Printf("sd: uncompressed %d total %d pct %.2f%%\n", len(tasks), len(files), float64(len(tasks))/float64(len(files))*100) 316 | start := time.Now() 317 | for _, task := range tasks { 318 | taskc <- task 319 | } 320 | 321 | fmt.Printf("sd: done took %s\n", time.Now().Sub(start)) 322 | return nil 323 | } 324 | 325 | func scan(dir string) (files []string, dur time.Duration, err error) { 326 | if stat, e := os.Stat(dir); e != nil { 327 | err = e 328 | return 329 | } else if !stat.IsDir() { 330 | files = append(files, dir) 331 | return 332 | } 333 | 334 | start := time.Now() 335 | err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { 336 | if err != nil { 337 | return err 338 | } 339 | if !info.IsDir() && strings.HasSuffix(path, ".wsp") { 340 | files = append(files, path) 341 | } 342 | return nil 343 | }) 344 | dur = time.Now().Sub(start) 345 | return 346 | } 347 | -------------------------------------------------------------------------------- /cmd/dump.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "os" 10 | "os/exec" 11 | 12 | whisper "github.com/go-graphite/go-whisper" 13 | ) 14 | 15 | func main() { 16 | header := flag.Bool("header", false, "show only file header") 17 | debug := flag.Bool("debug", false, "show decompression debug info") 18 | noLess := flag.Bool("no-less", false, "Don't use less, print everything to stdout.") 19 | flag.Parse() 20 | 21 | oflag := os.O_RDONLY 22 | db, err := whisper.OpenWithOptions(flag.Args()[0], &whisper.Options{OpenFileFlag: &oflag}) 23 | if err != nil { 24 | fmt.Println(err.Error()) 25 | os.Exit(1) 26 | } 27 | 28 | less := exec.Command("less") 29 | if !*noLess { 30 | less.Stdout = os.Stdout 31 | temp, err := ioutil.TempFile("", "") 32 | if err != nil { 33 | fmt.Println(err.Error()) 34 | os.Exit(1) 35 | } 36 | os.Stdout = temp 37 | } 38 | 39 | db.Dump(!*header, *debug) 40 | 41 | if !*noLess { 42 | if _, err := os.Stdout.Seek(0, 0); err != nil { 43 | panic(err) 44 | } 45 | less.Stderr = os.Stderr 46 | less.Stdin = os.Stdout 47 | if err := less.Run(); err != nil { 48 | panic(err) 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cmd/estimate.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "os" 8 | "time" 9 | 10 | whisper "github.com/go-graphite/go-whisper" 11 | ) 12 | 13 | func help() { 14 | fmt.Printf("estimate: to estimate the file compression ratio for specific retention policy on different compressed data point sizes.\nusage: estimate 1s:2d,1m:31d,1h:2y\n") 15 | os.Exit(1) 16 | } 17 | 18 | func main() { 19 | if len(os.Args) == 1 { 20 | help() 21 | } 22 | rets, err := whisper.ParseRetentionDefs(os.Args[1]) 23 | if err != nil { 24 | help() 25 | } 26 | 27 | swhisper, err := whisper.CreateWithOptions( 28 | fmt.Sprintf("estimate.%d.wsp", time.Now().Unix()), rets, whisper.Sum, 0, 29 | &whisper.Options{Compressed: false, PointsPerBlock: 7200, InMemory: true}, 30 | ) 31 | if err != nil { 32 | fmt.Printf("failed to estimate whisper file size: %s\n", err) 33 | os.Exit(1) 34 | } 35 | 36 | fmt.Println("compressed whisper file sizes using different size per data points and comparing to standard whisper file") 37 | fmt.Printf("standard whisper file size = %s (%d)\n", toString(swhisper.Size()), swhisper.Size()) 38 | fmt.Println("point size, file size, ratio") 39 | for _, size := range []float32{0.2, 2, 7, 14} { 40 | for _, ret := range rets { 41 | ret.SetAvgCompressedPointSize(size) 42 | } 43 | 44 | cwhisper, err := whisper.CreateWithOptions( 45 | fmt.Sprintf("estimate.%d.cwsp", time.Now().Unix()), rets, whisper.Sum, 0, 46 | &whisper.Options{Compressed: true, PointsPerBlock: 7200, InMemory: true}, 47 | ) 48 | if err != nil { 49 | fmt.Printf("failed to estimate cwhisper file with point size %f: %s\n", size, err) 50 | os.Exit(1) 51 | } 52 | 53 | fmt.Printf("%.1f, %s (%d), %.2f%%\n", size, toString(cwhisper.Size()), cwhisper.Size(), float32(cwhisper.Size()*100)/float32(swhisper.Size())) 54 | } 55 | } 56 | 57 | func toString(size int) string { 58 | switch { 59 | case size < 1024: 60 | return fmt.Sprintf("%dB", size) 61 | case size < 1024*1024: 62 | return fmt.Sprintf("%.2fKB", float32(size)/1024) 63 | default: 64 | return fmt.Sprintf("%.2fMB", float32(size)/1024/1024) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /cmd/icheck.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "log" 8 | "os" 9 | 10 | whisper "github.com/go-graphite/go-whisper" 11 | ) 12 | 13 | func init() { 14 | log.SetFlags(log.Lshortfile) 15 | } 16 | 17 | func main() { 18 | if len(os.Args) != 2 { 19 | fmt.Println("usage: icheck metric.wsp") 20 | fmt.Println("purpose: checks intergiry of a compressed file, including") 21 | fmt.Println(" - crc32 values saved in the headers matched the content") 22 | fmt.Println(" - no invalid data ponit size like 0 or NAN") 23 | os.Exit(1) 24 | } 25 | file1 := os.Args[1] 26 | 27 | oflag := os.O_RDONLY 28 | db1, err := whisper.OpenWithOptions(file1, &whisper.Options{OpenFileFlag: &oflag}) 29 | if err != nil { 30 | panic(err) 31 | } 32 | 33 | if err := db1.CheckIntegrity(); err != nil { 34 | fmt.Printf("integrity: %s\n%s", file1, err) 35 | os.Exit(1) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /cmd/read.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "encoding/json" 7 | "flag" 8 | "fmt" 9 | "math" 10 | "os" 11 | "strconv" 12 | "strings" 13 | "time" 14 | 15 | whisper "github.com/go-graphite/go-whisper" 16 | ) 17 | 18 | func main() { 19 | archive := flag.Int("archive", -1, "read data from specified archive (0-indexed).") 20 | from := flag.String("from", "", "read data from the specified timestamp.") 21 | until := flag.String("until", "", "read data until the specified timestamp.") 22 | format := flag.String("format", "txt", "specify the format of the data returned: json, txt.") 23 | archiveNum := flag.Bool("archive-num", false, "check the number of archives") 24 | timezone := flag.String("timezone", "Local", `timezone to parse from/until timestamps: Local, UTC, or a location name corresponding to a file in the IANA Time Zone database, such as "America/New_York". https://golang.org/pkg/time/#LoadLocation.`) 25 | help := flag.Bool("help", false, "print help messages.") 26 | keepNaN := flag.Bool("nan", false, "Show NaN/absent data points. If false it will be returned as 0.") 27 | quiet := flag.Bool("quiet", false, "print only data, no other messages.") 28 | flag.BoolVar(help, "h", false, "print help messages.") 29 | 30 | flag.Parse() 31 | 32 | location, err := time.LoadLocation(*timezone) 33 | if err != nil { 34 | panic(err) 35 | } 36 | 37 | if len(flag.Args()) == 0 || *help { 38 | fmt.Printf( 39 | "usage: read -from %q -until %q whisper_filename\n", 40 | time.Now().In(location).Add(-4*time.Hour).Format("2006-01-02 15:03:06"), 41 | time.Now().In(location).Add(-5*time.Minute).Format("2006-01-02 15:03:06")) 42 | flag.PrintDefaults() 43 | os.Exit(0) 44 | } 45 | 46 | openOption := os.O_RDONLY 47 | db, err := whisper.OpenWithOptions(flag.Arg(0), &whisper.Options{FLock: true, OpenFileFlag: &openOption}) 48 | if err != nil { 49 | panic(err) 50 | } 51 | defer func() { 52 | if err := db.Close(); err != nil { 53 | panic(err) 54 | } 55 | }() 56 | 57 | if *archiveNum { 58 | fmt.Println(len(db.Retentions())) 59 | os.Exit(0) 60 | } 61 | 62 | if *archive == -1 && (*from == "" || *until == "") { 63 | flag.PrintDefaults() 64 | os.Exit(1) 65 | } 66 | 67 | var fromTime, untilTime int 68 | if *archive != -1 { 69 | rets := db.Retentions() 70 | if *archive >= len(rets) { 71 | fmt.Fprintf(os.Stderr, "The whisper file only has %d archives.\n", len(rets)) 72 | os.Exit(2) 73 | } 74 | 75 | if *until == "" { 76 | untilTime = int(time.Now().Unix()) 77 | } else { 78 | t, err := time.ParseInLocation("2006-01-02 15:04:05", *until, location) 79 | if err != nil { 80 | panic(err) 81 | } 82 | untilTime = int(t.Unix()) 83 | } 84 | 85 | fromTime = untilTime - rets[*archive].MaxRetention() 86 | } else { 87 | t1, err := time.ParseInLocation("2006-01-02 15:04:05", *from, location) 88 | if err != nil { 89 | panic(err) 90 | } 91 | fromTime = int(t1.Unix()) 92 | 93 | t2, err := time.ParseInLocation("2006-01-02 15:04:05", *until, location) 94 | if err != nil { 95 | panic(err) 96 | } 97 | untilTime = int(t2.Unix()) 98 | } 99 | 100 | if !*quiet { 101 | fmt.Printf("reading data from %d (%s) to %d (%s)\n", fromTime, time.Unix(int64(fromTime), 0), untilTime, time.Unix(int64(untilTime), 0)) 102 | } 103 | 104 | data, err := db.Fetch(fromTime, untilTime) 105 | if err != nil { 106 | panic(err) 107 | } else if data == nil { 108 | fmt.Println("No data is fetched. Bad timerange (from/until)?") 109 | return 110 | } 111 | 112 | var ps []whisper.TimeSeriesPoint 113 | for _, p := range data.Points() { 114 | if math.IsNaN(p.Value) { 115 | if !*keepNaN { 116 | continue 117 | } 118 | p.Value = 0 119 | } 120 | 121 | ps = append(ps, p) 122 | } 123 | 124 | switch *format { 125 | case "json": 126 | bytes, err := json.Marshal(ps) 127 | if err != nil { 128 | panic(err) 129 | } 130 | fmt.Println(string(bytes)) 131 | default: 132 | for _, p := range ps { 133 | fmt.Printf("%d: %v\n", p.Time, p.Value) 134 | } 135 | } 136 | } 137 | 138 | func parse(str string) []*whisper.TimeSeriesPoint { 139 | var ps []*whisper.TimeSeriesPoint 140 | for _, p := range strings.Split(str, ",") { 141 | p = strings.TrimSpace(p) 142 | if p == "" { 143 | continue 144 | } 145 | pp := strings.Split(p, ":") 146 | t, err := strconv.Atoi(pp[0]) 147 | if err != nil { 148 | panic(err) 149 | } 150 | v, err := strconv.ParseFloat(pp[1], 64) 151 | if err != nil { 152 | panic(err) 153 | } 154 | ps = append(ps, &whisper.TimeSeriesPoint{Time: t, Value: v}) 155 | } 156 | return ps 157 | } 158 | -------------------------------------------------------------------------------- /cmd/summarize.go: -------------------------------------------------------------------------------- 1 | //go:build ignore 2 | // +build ignore 3 | 4 | package main 5 | 6 | import ( 7 | "crypto/md5" 8 | "encoding/binary" 9 | "flag" 10 | "fmt" 11 | "math" 12 | "os" 13 | "strings" 14 | "time" 15 | 16 | whisper "github.com/go-graphite/go-whisper" 17 | ) 18 | 19 | func main() { 20 | now := flag.Int("now", int(time.Now().Add(0*time.Hour).Unix()), "specify the until value") 21 | offset := flag.Int("offset", 3600*12, "until = now - offset (unit: hour)") 22 | quarantinesRaw := flag.String("quarantines", "2019-02-21,2019-02-22", "ignore data started from this point") 23 | pretty := flag.Bool("pretty", false, "pretty output") 24 | flag.Parse() 25 | 26 | var quarantines [][2]int 27 | for _, q := range strings.Split(*quarantinesRaw, ";") { 28 | var quarantine [2]int 29 | for i, t := range strings.Split(q, ",") { 30 | tim, err := time.Parse("2006-01-02", t) 31 | if err != nil { 32 | panic(err) 33 | } 34 | quarantine[i] = int(tim.Unix()) 35 | } 36 | quarantines = append(quarantines, quarantine) 37 | } 38 | 39 | path := flag.Args()[0] 40 | oflag := os.O_RDONLY 41 | db, err := whisper.OpenWithOptions(path, &whisper.Options{OpenFileFlag: &oflag}) 42 | if err != nil { 43 | panic(err) 44 | } 45 | 46 | var sums []string 47 | for _, ret := range db.Retentions() { 48 | // from := int(whisper.Now().Unix()) - ret.MaxRetention() 49 | // until := int(whisper.Now().Add(time.Hour * time.Duration(-1*(*endOffset))).Unix()) 50 | from := *now - ret.MaxRetention() + ret.SecondsPerPoint()*60 51 | until := *now - *offset 52 | dps, err := db.Fetch(from, until) 53 | if err != nil { 54 | panic(err) 55 | } 56 | 57 | vals := dps.Values() 58 | data := make([]byte, 8*len(vals)) 59 | 60 | for _, quarantine := range quarantines { 61 | qfrom := quarantine[0] 62 | quntil := quarantine[1] 63 | if from <= qfrom && qfrom <= until { 64 | qfromIndex := (qfrom - from) / ret.SecondsPerPoint() 65 | quntilIndex := (quntil - from) / ret.SecondsPerPoint() 66 | for i := qfromIndex; i <= quntilIndex && i < len(vals); i++ { 67 | vals[i] = math.NaN() 68 | } 69 | } 70 | } 71 | 72 | var nonNans, nonZeros uint 73 | for i, p := range vals { 74 | if math.IsNaN(p) { 75 | binary.LittleEndian.PutUint64(data[i*8:], math.Float64bits(0)) 76 | } else { 77 | binary.LittleEndian.PutUint64(data[i*8:], math.Float64bits(p)) 78 | nonNans++ 79 | if p != 0.0 { 80 | nonZeros++ 81 | } 82 | } 83 | } 84 | 85 | sum := md5.Sum(data) 86 | sums = append(sums, fmt.Sprintf("%s,%x,%d,%d,%d", ret, sum[:], nonNans, nonZeros, len(vals))) 87 | } 88 | if *pretty { 89 | fmt.Printf("archive,checksum,number_of_non_nans,number_of_non_zeros,number_of_points\n") 90 | for _, s := range sums { 91 | fmt.Println(s) 92 | } 93 | } else { 94 | fmt.Printf("%s,%x,%s\n", path, md5.Sum([]byte(strings.Join(sums, ","))), strings.Join(sums, ",")) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /cmd/write.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "math/rand" 10 | "os" 11 | "strconv" 12 | "strings" 13 | "time" 14 | 15 | whisper "github.com/go-graphite/go-whisper" 16 | ) 17 | 18 | func main() { 19 | ignoreNow := flag.Bool("ignore-now", false, "ignore now on write (always write to the base/first archive)") 20 | schema := flag.String("schema", "", "create a new whisper file using the schema if file not found: 1s2d:1m:31d:1h:10y;avg") 21 | xFilesFactor := flag.Float64("xfiles-factor", 0.0, "xfiles factor used for creating new whisper file") 22 | delimiter := flag.String("d", ",", "delimiter of data points") 23 | compressed := flag.Bool("compressed", false, "use compressed format") 24 | randChunk := flag.Int("rand-chunk", 0, "randomize input size with limit for triggering extensions and simulating real life writes.") 25 | ppb := flag.Int("ppb", whisper.DefaultPointsPerBlock, "points per block") 26 | flag.Parse() 27 | 28 | var body string 29 | if len(flag.Args()) < 1 { 30 | fmt.Println("write: write data points to a whisper file.\nwrite file.wsp [1572940800:3,1572940801:5]\n") 31 | os.Exit(1) 32 | } else if len(flag.Args()) > 1 { 33 | body = flag.Args()[1] 34 | } else { 35 | in, err := ioutil.ReadAll(os.Stdin) 36 | if err != nil { 37 | panic(err) 38 | } 39 | body = string(in) 40 | } 41 | 42 | filename := flag.Args()[0] 43 | db, err := whisper.OpenWithOptions(filename, &whisper.Options{FLock: true, IgnoreNowOnWrite: *ignoreNow}) 44 | if err != nil { 45 | if !os.IsNotExist(err) { 46 | fmt.Printf("failed to open file: %s\n", err) 47 | os.Exit(1) 48 | } 49 | if *schema == "" { 50 | fmt.Println("file not found") 51 | os.Exit(1) 52 | } 53 | 54 | specs := strings.Split(*schema, ";") 55 | if len(specs) != 2 { 56 | fmt.Printf("illegal schema: %s example: retention;aggregation\n", *schema) 57 | os.Exit(1) 58 | } 59 | rets, err := whisper.ParseRetentionDefs(specs[0]) 60 | if err != nil { 61 | fmt.Printf("failed to parse retentions: %s\n", err) 62 | os.Exit(1) 63 | } 64 | aggregationMethod := whisper.ParseAggregationMethod(specs[1]) 65 | if aggregationMethod == whisper.Unknown { 66 | fmt.Printf("unknow aggregation method: %s\n", specs[1]) 67 | os.Exit(1) 68 | } 69 | 70 | db, err = whisper.CreateWithOptions( 71 | filename, rets, aggregationMethod, float32(*xFilesFactor), 72 | &whisper.Options{ 73 | Compressed: *compressed, 74 | IgnoreNowOnWrite: *ignoreNow, 75 | PointsPerBlock: *ppb, 76 | }, 77 | ) 78 | if err != nil { 79 | fmt.Printf("failed to create new whisper file: %s\n", err) 80 | os.Exit(1) 81 | } 82 | } 83 | 84 | rand.Seed(time.Now().Unix()) 85 | 86 | dps := parse(body, *delimiter) 87 | if *randChunk > 0 { 88 | for i := 0; i < len(dps); { 89 | // end := i + rand.Intn(*randChunk) + 1 90 | end := i + *randChunk + 1 91 | if end > len(dps) { 92 | end = len(dps) 93 | } 94 | if err := db.UpdateMany(dps[i:end]); err != nil { 95 | panic(err) 96 | } 97 | i = end 98 | } 99 | } else { 100 | if err := db.UpdateMany(dps); err != nil { 101 | panic(err) 102 | } 103 | } 104 | 105 | if err := db.Close(); err != nil { 106 | panic(err) 107 | } 108 | 109 | if db.Extended { 110 | fmt.Println("file is extended.") 111 | } 112 | } 113 | 114 | func parse(str, delimiter string) []*whisper.TimeSeriesPoint { 115 | var ps []*whisper.TimeSeriesPoint 116 | for _, p := range strings.Split(str, delimiter) { 117 | p = strings.TrimSpace(p) 118 | if p == "" { 119 | continue 120 | } 121 | pp := strings.Split(p, ":") 122 | t, err := strconv.Atoi(pp[0]) 123 | if err != nil { 124 | panic(err) 125 | } 126 | v, err := strconv.ParseFloat(pp[1], 64) 127 | if err != nil { 128 | panic(err) 129 | } 130 | ps = append(ps, &whisper.TimeSeriesPoint{Time: t, Value: v}) 131 | } 132 | return ps 133 | } 134 | -------------------------------------------------------------------------------- /compress.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "math" 10 | "math/bits" 11 | "os" 12 | "sort" 13 | "strconv" 14 | "sync" 15 | "time" 16 | "unsafe" 17 | ) 18 | 19 | var ( 20 | CompressedMetadataSize = 28 + FreeCompressedMetadataSize 21 | FreeCompressedMetadataSize = 16 22 | 23 | VersionSize = 1 24 | 25 | CompressedArchiveInfoSize = 92 + FreeCompressedArchiveInfoSize 26 | FreeCompressedArchiveInfoSize = 36 27 | 28 | compressedHeaderAggregationOffset = len(compressedMagicString) + VersionSize 29 | compressedHeaderXFFOffset = compressedHeaderAggregationOffset + IntSize*2 30 | 31 | BlockRangeSize = 16 32 | endOfBlockSize = 5 33 | 34 | // One can see that blocks that extend longer than two 35 | // hours provide diminishing returns for compressed size. A 36 | // two-hour block allows us to achieve a compression ratio of 37 | // 1.37 bytes per data point. 38 | // 4.1.2 Compressing values 39 | // Gorilla: A Fast, Scalable, In-Memory Time Series Database 40 | DefaultPointsPerBlock = 7200 // recommended by the gorilla paper algorithm 41 | 42 | // using 2 buffer here to mitigate data points arriving at 43 | // random orders causing early propagation 44 | bufferCount = 2 45 | 46 | compressedMagicString = []byte("whisper_compressed") // len = 18 47 | 48 | debugCompress bool 49 | debugBitsWrite bool 50 | debugExtend bool 51 | 52 | avgCompressedPointSize float32 = 2 53 | ) 54 | 55 | // In worst case scenario all data points would required 2 bytes more space 56 | // after compression, this buffer size make sure that it's always big enough 57 | // to contain the compressed result 58 | const MaxCompressedPointSize = PointSize + 2 59 | 60 | const sizeEstimationBuffer = 0.618 61 | 62 | func Debug(compress, bitsWrite bool) { 63 | debugCompress = compress 64 | debugBitsWrite = bitsWrite 65 | } 66 | 67 | func (whisper *Whisper) WriteHeaderCompressed() (err error) { 68 | b := make([]byte, whisper.MetadataSize()) 69 | i := 0 70 | 71 | // magic string 72 | i += len(compressedMagicString) 73 | copy(b, compressedMagicString) 74 | 75 | // version 76 | b[i] = whisper.compVersion 77 | i += VersionSize 78 | 79 | i += packInt(b, int(whisper.aggregationMethod), i) 80 | i += packInt(b, whisper.maxRetention, i) 81 | i += packFloat32(b, whisper.xFilesFactor, i) 82 | i += packInt(b, whisper.pointsPerBlock, i) 83 | i += packInt(b, len(whisper.archives), i) 84 | i += packFloat32(b, whisper.avgCompressedPointSize, i) 85 | i += packInt(b, 0, i) // crc32 always write at the end of whisper meta info header and before archive header 86 | i += FreeCompressedMetadataSize 87 | 88 | for _, archive := range whisper.archives { 89 | i += packInt(b, archive.offset, i) 90 | i += packInt(b, archive.secondsPerPoint, i) 91 | i += packInt(b, archive.numberOfPoints, i) 92 | i += packInt(b, archive.blockSize, i) 93 | i += packInt(b, archive.blockCount, i) 94 | i += packFloat32(b, archive.avgCompressedPointSize, i) 95 | 96 | var mixSpecSize int 97 | if archive.aggregationSpec != nil { 98 | b[i] = byte(archive.aggregationSpec.Method) 99 | i += ByteSize 100 | i += packFloat32(b, archive.aggregationSpec.Percentile, i) 101 | mixSpecSize = ByteSize + FloatSize 102 | } 103 | 104 | i += packInt(b, archive.cblock.index, i) 105 | i += packInt(b, archive.cblock.p0.interval, i) 106 | i += packFloat64(b, archive.cblock.p0.value, i) 107 | i += packInt(b, archive.cblock.pn1.interval, i) 108 | i += packFloat64(b, archive.cblock.pn1.value, i) 109 | i += packInt(b, archive.cblock.pn2.interval, i) 110 | i += packFloat64(b, archive.cblock.pn2.value, i) 111 | i += packInt(b, int(archive.cblock.lastByte), i) 112 | i += packInt(b, archive.cblock.lastByteOffset, i) 113 | i += packInt(b, archive.cblock.lastByteBitPos, i) 114 | i += packInt(b, archive.cblock.count, i) 115 | i += packInt(b, int(archive.cblock.crc32), i) 116 | 117 | i += packInt(b, int(archive.stats.discard.oldInterval), i) 118 | i += packInt(b, int(archive.stats.extended), i) 119 | 120 | i += FreeCompressedArchiveInfoSize - mixSpecSize 121 | 122 | if FreeCompressedArchiveInfoSize < mixSpecSize { 123 | panic("out of FreeCompressedArchiveInfoSize") // a panic that should never happens 124 | } 125 | } 126 | 127 | // write block_range_info and buffer 128 | for _, archive := range whisper.archives { 129 | for _, bran := range archive.blockRanges { 130 | i += packInt(b, bran.start, i) 131 | i += packInt(b, bran.end, i) 132 | i += packInt(b, bran.count, i) 133 | i += packInt(b, int(bran.crc32), i) 134 | } 135 | 136 | if archive.hasBuffer() { 137 | i += copy(b[i:], archive.buffer) 138 | } 139 | } 140 | 141 | whisper.crc32 = crc32(b, 0) 142 | packInt(b, int(whisper.crc32), whisper.crc32Offset()) 143 | 144 | if err := whisper.fileWriteAt(b, 0); err != nil { 145 | return err 146 | } 147 | if _, err := whisper.file.Seek(int64(len(b)), 0); err != nil { 148 | return err 149 | } 150 | 151 | return nil 152 | } 153 | 154 | func (whisper *Whisper) readHeaderCompressed() (err error) { 155 | if _, err := whisper.file.Seek(int64(len(compressedMagicString)), 0); err != nil { 156 | return err 157 | } 158 | 159 | offset := 0 160 | hlen := whisper.MetadataSize() - len(compressedMagicString) 161 | b := make([]byte, hlen) 162 | readed, err := whisper.file.Read(b) 163 | if err != nil { 164 | err = fmt.Errorf("unable to read header: %s", err) 165 | return 166 | } 167 | if readed != hlen { 168 | err = fmt.Errorf("unable to read header: EOF") 169 | return 170 | } 171 | 172 | whisper.compVersion = b[offset] 173 | offset++ 174 | 175 | whisper.aggregationMethod = AggregationMethod(unpackInt(b[offset : offset+IntSize])) 176 | offset += IntSize 177 | whisper.maxRetention = unpackInt(b[offset : offset+IntSize]) 178 | offset += IntSize 179 | whisper.xFilesFactor = unpackFloat32(b[offset : offset+FloatSize]) 180 | offset += FloatSize 181 | whisper.pointsPerBlock = unpackInt(b[offset : offset+IntSize]) 182 | offset += IntSize 183 | archiveCount := unpackInt(b[offset : offset+IntSize]) 184 | offset += IntSize 185 | whisper.avgCompressedPointSize = unpackFloat32(b[offset : offset+FloatSize]) 186 | offset += FloatSize 187 | whisper.crc32 = uint32(unpackInt(b[offset : offset+IntSize])) 188 | offset += IntSize 189 | offset += FreeCompressedMetadataSize 190 | 191 | whisper.archives = make([]*archiveInfo, archiveCount) 192 | for i := 0; i < archiveCount; i++ { 193 | b := make([]byte, CompressedArchiveInfoSize) 194 | readed, err = whisper.file.Read(b) 195 | if err != nil || readed != CompressedArchiveInfoSize { 196 | err = fmt.Errorf("unable to read compressed archive %d metadata: %s", i, err) 197 | return 198 | } 199 | var offset int 200 | var arc archiveInfo 201 | 202 | arc.offset = unpackInt(b[offset : offset+IntSize]) 203 | offset += IntSize 204 | arc.secondsPerPoint = unpackInt(b[offset : offset+IntSize]) 205 | offset += IntSize 206 | arc.numberOfPoints = unpackInt(b[offset : offset+IntSize]) 207 | offset += IntSize 208 | arc.blockSize = unpackInt(b[offset : offset+IntSize]) 209 | offset += IntSize 210 | arc.blockCount = unpackInt(b[offset : offset+IntSize]) 211 | offset += IntSize 212 | arc.avgCompressedPointSize = unpackFloat32(b[offset : offset+FloatSize]) 213 | offset += FloatSize 214 | 215 | if whisper.aggregationMethod == Mix && i > 0 { 216 | arc.aggregationSpec = &MixAggregationSpec{} 217 | arc.aggregationSpec.Method = AggregationMethod(b[offset]) 218 | offset += ByteSize 219 | arc.aggregationSpec.Percentile = unpackFloat32(b[offset : offset+FloatSize]) 220 | offset += FloatSize 221 | } 222 | 223 | arc.cblock.index = unpackInt(b[offset : offset+IntSize]) 224 | offset += IntSize 225 | arc.cblock.p0.interval = unpackInt(b[offset : offset+IntSize]) 226 | offset += IntSize 227 | arc.cblock.p0.value = unpackFloat64(b[offset : offset+Float64Size]) 228 | offset += Float64Size 229 | arc.cblock.pn1.interval = unpackInt(b[offset : offset+IntSize]) 230 | offset += IntSize 231 | arc.cblock.pn1.value = unpackFloat64(b[offset : offset+Float64Size]) 232 | offset += Float64Size 233 | arc.cblock.pn2.interval = unpackInt(b[offset : offset+IntSize]) 234 | offset += IntSize 235 | arc.cblock.pn2.value = unpackFloat64(b[offset : offset+Float64Size]) 236 | offset += Float64Size 237 | arc.cblock.lastByte = byte(unpackInt(b[offset : offset+IntSize])) 238 | offset += IntSize 239 | arc.cblock.lastByteOffset = unpackInt(b[offset : offset+IntSize]) 240 | offset += IntSize 241 | arc.cblock.lastByteBitPos = unpackInt(b[offset : offset+IntSize]) 242 | offset += IntSize 243 | arc.cblock.count = unpackInt(b[offset : offset+IntSize]) 244 | offset += IntSize 245 | arc.cblock.crc32 = uint32(unpackInt(b[offset : offset+IntSize])) 246 | offset += IntSize 247 | 248 | arc.stats.discard.oldInterval = uint32(unpackInt(b[offset : offset+IntSize])) 249 | whisper.discardedPointsAtOpen += arc.stats.discard.oldInterval 250 | offset += IntSize 251 | arc.stats.extended = uint32(unpackInt(b[offset : offset+IntSize])) 252 | offset += IntSize 253 | 254 | whisper.archives[i] = &arc 255 | } 256 | 257 | whisper.initMetaInfo() 258 | 259 | for i, arc := range whisper.archives { 260 | b := make([]byte, BlockRangeSize*arc.blockCount) 261 | readed, err = whisper.file.Read(b) 262 | if err != nil || readed != BlockRangeSize*arc.blockCount { 263 | err = fmt.Errorf("unable to read archive %d block ranges: %s", i, err) 264 | return 265 | } 266 | offset := 0 267 | 268 | arc.blockRanges = make([]blockRange, arc.blockCount) 269 | for i := range arc.blockRanges { 270 | arc.blockRanges[i].index = i 271 | arc.blockRanges[i].start = unpackInt(b[offset : offset+IntSize]) 272 | offset += IntSize 273 | arc.blockRanges[i].end = unpackInt(b[offset : offset+IntSize]) 274 | offset += IntSize 275 | arc.blockRanges[i].count = unpackInt(b[offset : offset+IntSize]) 276 | offset += IntSize 277 | arc.blockRanges[i].crc32 = uint32(unpackInt(b[offset : offset+IntSize])) 278 | offset += IntSize 279 | } 280 | 281 | // arc.initBlockRanges() 282 | 283 | if !arc.hasBuffer() { 284 | continue 285 | } 286 | arc.buffer = make([]byte, arc.bufferSize) 287 | 288 | readed, err = whisper.file.Read(arc.buffer) 289 | if err != nil { 290 | return fmt.Errorf("unable to read archive %d buffer: %s", i, err) 291 | } else if readed != arc.bufferSize { 292 | return fmt.Errorf("unable to read archive %d buffer: readed = %d want = %d", i, readed, arc.bufferSize) 293 | } 294 | } 295 | 296 | return nil 297 | } 298 | 299 | func (a *archiveInfo) blockOffset(blockIndex int) int { 300 | return a.offset + blockIndex*a.blockSize 301 | } 302 | 303 | const maxInt = 1< b.start { 327 | from = b.start 328 | } 329 | if until == 0 || b.end > until { 330 | until = b.end 331 | } 332 | } 333 | return 334 | } 335 | 336 | func (archive *archiveInfo) hasBuffer() bool { return archive.bufferSize > 0 } 337 | 338 | func (whisper *Whisper) fetchCompressed(start, end int64, archive *archiveInfo) ([]dataPoint, error) { 339 | var dst []dataPoint // TODO: optimize this with pre-allocation 340 | var buf = make([]byte, archive.blockSize) 341 | for _, block := range archive.getSortedBlockRanges() { 342 | if block.end >= int(start) && int(end) >= block.start { 343 | if err := whisper.fileReadAt(buf, int64(archive.blockOffset(block.index))); err != nil { 344 | return nil, fmt.Errorf("fetchCompressed.%d.%d: %s", archive.numberOfPoints, block.index, err) 345 | } 346 | 347 | var err error 348 | dst, _, err = archive.ReadFromBlock(buf, dst, int(start), int(end)) 349 | if err != nil { 350 | return dst, err 351 | } 352 | 353 | for i := 0; i < archive.blockSize; i++ { 354 | buf[i] = 0 355 | } 356 | } 357 | } 358 | 359 | if archive.hasBuffer() { 360 | dps := unpackDataPoints(archive.buffer) 361 | for _, p := range dps { 362 | if p.interval != 0 && int(start) <= p.interval && p.interval <= int(end) { 363 | dst = append(dst, p) 364 | } 365 | } 366 | } 367 | 368 | base := whisper.archives[0] 369 | if base == archive { 370 | return dst, nil 371 | } 372 | 373 | // Start live aggregation. This probably has a read peformance hit. 374 | if whisper.aggregationMethod == Mix { 375 | // Mix aggregation is triggered when block in base archive is rotated and also 376 | // depends on the sufficiency of data points. This could results to a over 377 | // long gap when fetching data from higer archives, depending on different 378 | // retention policy. Therefore cwhisper needs to do live aggregation. 379 | 380 | var dps []dataPoint 381 | var inBase bool 382 | var baseLookupNeeded = len(dst) == 0 || dst[len(dst)-1].interval < int(end) 383 | if baseLookupNeeded { 384 | bstart, bend := base.getRange() 385 | inBase = int64(bstart) <= end || end <= int64(bend) 386 | } 387 | 388 | if inBase { 389 | nstart := start 390 | if len(dst) > 0 { 391 | // TODO: invest why shifting the last data point interval is wrong 392 | nstart = int64(archive.Interval(dst[len(dst)-1].interval)) // + archive.secondsPerPoint 393 | } 394 | var err error 395 | dps, err = whisper.fetchCompressed(nstart, end, base) 396 | if err != nil { 397 | return dst, err 398 | } 399 | } 400 | 401 | if base.hasBuffer() { 402 | for _, p := range unpackDataPoints(base.buffer) { 403 | if p.interval != 0 && int(start) <= p.interval && p.interval <= int(end) { 404 | dps = append(dps, p) 405 | } 406 | } 407 | } 408 | 409 | adps := whisper.aggregateByArchives(dps) 410 | dst = append(dst, adps[archive]...) 411 | } else { 412 | // retrieve data points within range from the higher/previous archives 413 | var dps []dataPoint 414 | for i, arc := range whisper.archives { 415 | if arc == archive || i == len(whisper.archives)-1 { 416 | break 417 | } 418 | 419 | cvals := []float64{} 420 | cinterval := 0 421 | tdps := append(dps, unpackDataPoints(arc.buffer)...) // skipcq: CRT-D0001 422 | dps = []dataPoint{} 423 | for j, p := range tdps { 424 | if p.interval == 0 && j < len(tdps)-1 { 425 | continue 426 | } 427 | interval := arc.AggregateInterval(p.interval) 428 | if cinterval == 0 || cinterval == interval { 429 | cinterval = interval 430 | cvals = append(cvals, p.value) 431 | 432 | continue 433 | } 434 | 435 | dps = append(dps, dataPoint{cinterval, aggregate(whisper.aggregationMethod, cvals)}) 436 | 437 | cinterval = interval 438 | cvals = []float64{p.value} 439 | } 440 | } 441 | sort.SliceStable(dps, func(i, j int) bool { return dps[i].interval < dps[j].interval }) 442 | for i := 0; i < len(dps); i++ { 443 | if int(start) <= dps[i].interval && dps[i].interval <= int(end) { 444 | continue 445 | } 446 | dps = dps[:i] 447 | break 448 | } 449 | dst = append(dst, dps...) 450 | } 451 | 452 | return dst, nil 453 | } 454 | 455 | // NOTE: this method assumes data saved in higer archives are fixed. If 456 | // we mvoe to allowing data/intervals coming in non-monotonic order, we 457 | // need to rethink the implementation here as well. 458 | func (whisper *Whisper) archiveUpdateManyCompressed(archive *archiveInfo, points []*TimeSeriesPoint) error { 459 | alignedPoints := alignPoints(archive, points) 460 | 461 | // Note: in the current design, mix aggregation doesn't have any buffer in 462 | // higer archives 463 | if !archive.hasBuffer() { 464 | rotated, err := archive.appendToBlockAndRotate(alignedPoints) 465 | if err != nil { 466 | return err 467 | } 468 | 469 | if !(whisper.aggregationMethod == Mix && rotated) { 470 | return nil 471 | } 472 | 473 | return whisper.propagateToMixedArchivesCompressed() 474 | } 475 | 476 | baseIntervalsPerUnit, currentUnit, minInterval := archive.getBufferInfo() 477 | bufferUnitPointsCount := archive.next.secondsPerPoint / archive.secondsPerPoint 478 | for aindex := 0; aindex < len(alignedPoints); { 479 | dp := alignedPoints[aindex] 480 | dpBaseInterval := archive.AggregateInterval(dp.interval) 481 | 482 | // NOTE: current implementation expects data points to be monotonically 483 | // increasing in time 484 | if minInterval != 0 && dpBaseInterval < minInterval { // TODO: check against cblock pn1.interval? 485 | archive.stats.discard.oldInterval++ 486 | aindex++ 487 | continue 488 | } 489 | 490 | // Tolerate out of order data handling within the current buffer. 491 | targetUnit := -1 492 | for i, unitInterval := range baseIntervalsPerUnit { 493 | if dpBaseInterval == unitInterval { 494 | targetUnit = i 495 | break 496 | } 497 | } 498 | if targetUnit != -1 { 499 | aindex++ 500 | 501 | // TODO: not efficient if many data points are being written in one call 502 | offset := targetUnit*bufferUnitPointsCount + (dp.interval-dpBaseInterval)/archive.secondsPerPoint 503 | copy(archive.buffer[offset*PointSize:], dp.Bytes()) 504 | 505 | continue 506 | } 507 | 508 | // check if buffer is full 509 | if baseIntervalsPerUnit[currentUnit] == 0 || baseIntervalsPerUnit[currentUnit] == dpBaseInterval { 510 | aindex++ 511 | baseIntervalsPerUnit[currentUnit] = dpBaseInterval 512 | 513 | // TODO: not efficient if many data points are being written in one call 514 | offset := currentUnit*bufferUnitPointsCount + (dp.interval-dpBaseInterval)/archive.secondsPerPoint 515 | copy(archive.buffer[offset*PointSize:], dp.Bytes()) 516 | 517 | continue 518 | } 519 | 520 | currentUnit = (currentUnit + 1) % len(baseIntervalsPerUnit) 521 | baseIntervalsPerUnit[currentUnit] = 0 522 | 523 | // flush buffer 524 | buffer := archive.getBufferByUnit(currentUnit) 525 | dps := unpackDataPointsStrict(buffer) 526 | 527 | // reset buffer 528 | for i := range buffer { 529 | buffer[i] = 0 530 | } 531 | 532 | if len(dps) <= 0 { 533 | continue 534 | } 535 | 536 | if _, err := archive.appendToBlockAndRotate(dps); err != nil { 537 | // TODO: record and continue? 538 | return err 539 | } 540 | 541 | // propagate 542 | lower := archive.next 543 | lowerIntervalStart := archive.AggregateInterval(dps[0].interval) 544 | 545 | var knownValues []float64 546 | for _, dPoint := range dps { 547 | knownValues = append(knownValues, dPoint.value) 548 | } 549 | 550 | knownPercent := float32(len(knownValues)) / float32(lower.secondsPerPoint/archive.secondsPerPoint) 551 | // check we have enough data points to propagate a value 552 | if knownPercent >= whisper.xFilesFactor { 553 | aggregateValue := aggregate(whisper.aggregationMethod, knownValues) 554 | point := &TimeSeriesPoint{lowerIntervalStart, aggregateValue} 555 | 556 | // TODO: consider migrating to a non-recursive propagation implementation like mix policy 557 | if err := whisper.archiveUpdateManyCompressed(lower, []*TimeSeriesPoint{point}); err != nil { 558 | return err 559 | } 560 | } 561 | } 562 | 563 | return nil 564 | } 565 | 566 | func (archive *archiveInfo) getBufferInfo() (units []int, index, min int) { 567 | var max int 568 | for i := 0; i < archive.bufferUnitCount(); i++ { 569 | v := getFirstDataPointStrict(archive.getBufferByUnit(i)).interval 570 | if v > 0 { 571 | v = archive.AggregateInterval(v) 572 | } 573 | units = append(units, v) 574 | 575 | if max < v { 576 | max = v 577 | index = i 578 | } 579 | if min == 0 || min > v { 580 | min = v 581 | } 582 | } 583 | return 584 | } 585 | 586 | func (archive *archiveInfo) bufferUnitCount() int { 587 | return len(archive.buffer) / PointSize / (archive.next.secondsPerPoint / archive.secondsPerPoint) 588 | } 589 | 590 | func (archive *archiveInfo) getBufferByUnit(unit int) []byte { 591 | count := archive.next.secondsPerPoint / archive.secondsPerPoint 592 | lb := unit * PointSize * count 593 | ub := (unit + 1) * PointSize * count 594 | return archive.buffer[lb:ub] 595 | } 596 | 597 | func (archive *archiveInfo) appendToBlockAndRotate(dps []dataPoint) (rotated bool, err error) { 598 | whisper := archive.whisper // TODO: optimize away? 599 | 600 | // Why MaxCompressedPointSize+1 and endOfBlockSize*2: 601 | // 602 | // MaxCompressedPointSize is set to 14, but in reality, a maximum compressed 603 | // data point has a bit length of 14.125(113 bits). And there might be times 604 | // when the current block is almost full, and more data needs to be set 0. So 605 | // here go-whisper should prefer to have a large buffer just to be on the safe 606 | // side. 607 | // 608 | // An edge case that can be fixed by this allocation strategy is that: the 609 | // current block has less than 14 bytes plus endOfBlockSize (5) bytes 610 | // available, and a data point that can't be well compressed comes in, then the 611 | // buffer isn't large enough to fill the generated binary data. 612 | // 613 | // This allocation strategy makes sure that there is enough space in the block 614 | // buffer for compression output. 615 | blockBuffer := make([]byte, len(dps)*(MaxCompressedPointSize+1)+endOfBlockSize*2) 616 | 617 | for { 618 | offset := archive.cblock.lastByteOffset // lastByteOffset is updated in AppendPointsToBlock 619 | size, left, rotate := archive.AppendPointsToBlock(blockBuffer, dps) 620 | 621 | // flush block 622 | if size >= len(blockBuffer) { 623 | // TODO: panic? 624 | size = len(blockBuffer) 625 | } 626 | if err := whisper.fileWriteAt(blockBuffer[:size], int64(offset)); err != nil { 627 | return rotated, err 628 | } 629 | 630 | if len(left) == 0 { 631 | break 632 | } 633 | 634 | // reset block 635 | for i := 0; i < len(blockBuffer); i++ { 636 | blockBuffer[i] = 0 637 | } 638 | 639 | dps = left 640 | if !rotate { 641 | continue 642 | } 643 | 644 | var nblock blockInfo 645 | nblock.index = (archive.cblock.index + 1) % len(archive.blockRanges) 646 | nblock.lastByteBitPos = 7 647 | nblock.lastByteOffset = archive.blockOffset(nblock.index) 648 | archive.cblock = nblock 649 | archive.blockRanges[nblock.index].start = 0 650 | archive.blockRanges[nblock.index].end = 0 651 | 652 | rotated = true 653 | } 654 | 655 | return rotated, nil 656 | } 657 | 658 | func (whisper *Whisper) extendIfNeeded() error { 659 | var rets []*Retention 660 | var mixSpecs []MixAggregationSpec 661 | var mixSizes = make(map[int][]float32) 662 | var extend bool 663 | var msg string 664 | var nferrs []error 665 | for _, arc := range whisper.archives { 666 | ret := &Retention{ 667 | secondsPerPoint: arc.secondsPerPoint, 668 | numberOfPoints: arc.numberOfPoints, 669 | avgCompressedPointSize: arc.avgCompressedPointSize, 670 | blockCount: arc.blockCount, 671 | } 672 | 673 | var totalPoints int 674 | var totalBlocks int 675 | for _, b := range arc.getSortedBlockRanges() { 676 | if b.index == arc.cblock.index { 677 | break 678 | } 679 | 680 | totalBlocks++ 681 | totalPoints += b.count 682 | } 683 | if totalPoints > 0 { 684 | avgPointSize := float32(totalBlocks*arc.blockSize) / float32(totalPoints) 685 | if avgPointSize > arc.avgCompressedPointSize { 686 | extend = true 687 | if avgPointSize-arc.avgCompressedPointSize < sizeEstimationBuffer { 688 | avgPointSize += sizeEstimationBuffer 689 | } 690 | if debugExtend { 691 | msg += fmt.Sprintf("%s:%v->%v ", ret, ret.avgCompressedPointSize, avgPointSize) 692 | } 693 | ret.avgCompressedPointSize = avgPointSize 694 | arc.stats.extended++ 695 | } 696 | } 697 | 698 | rets = append(rets, ret) 699 | } 700 | 701 | if !extend { 702 | return nil 703 | } 704 | 705 | if debugExtend { 706 | fmt.Println("extend:", whisper.file.Name(), msg) 707 | } 708 | 709 | filename := whisper.file.Name() 710 | if err := os.Remove(whisper.file.Name() + ".extend"); err != nil && !os.IsNotExist(err) { 711 | nferrs = append(nferrs, err) 712 | } 713 | 714 | if whisper.aggregationMethod == Mix && len(rets) > 1 { 715 | rets, mixSpecs, mixSizes = extractMixSpecs(rets, whisper.archives) 716 | } 717 | 718 | nwhisper, err := CreateWithOptions( 719 | whisper.file.Name()+".extend", rets, 720 | whisper.aggregationMethod, whisper.xFilesFactor, 721 | &Options{ 722 | Compressed: true, 723 | PointsPerBlock: DefaultPointsPerBlock, 724 | InMemory: whisper.opts.InMemory, 725 | MixAggregationSpecs: mixSpecs, 726 | MixAvgCompressedPointSizes: mixSizes, 727 | }, 728 | ) 729 | if err != nil { 730 | return fmt.Errorf("extend: %s", err) 731 | } 732 | 733 | for i := len(whisper.archives) - 1; i >= 0; i-- { 734 | archive := whisper.archives[i] 735 | copy(nwhisper.archives[i].buffer, archive.buffer) 736 | nwhisper.archives[i].stats = archive.stats 737 | 738 | for _, block := range archive.getSortedBlockRanges() { 739 | buf := make([]byte, archive.blockSize) 740 | if err := whisper.fileReadAt(buf, int64(archive.blockOffset(block.index))); err != nil { 741 | return fmt.Errorf("archives[%d].blocks[%d].file.read: %s", i, block.index, err) 742 | } 743 | dst, _, err := archive.ReadFromBlock(buf, []dataPoint{}, 0, maxInt) 744 | if err != nil { 745 | return fmt.Errorf("archives[%d].blocks[%d].read: %s", i, block.index, err) 746 | } 747 | if _, err := nwhisper.archives[i].appendToBlockAndRotate(dst); err != nil { 748 | return fmt.Errorf("archives[%d].blocks[%d].write: %s", i, block.index, err) 749 | } 750 | } 751 | 752 | nwhisper.archives[i].buffer = archive.buffer 753 | } 754 | if err := nwhisper.WriteHeaderCompressed(); err != nil { 755 | return fmt.Errorf("extend: failed to writer header: %s", err) 756 | } 757 | 758 | if err := whisper.Close(); err != nil { 759 | nferrs = append(nferrs, err) 760 | } 761 | if err := nwhisper.file.Close(); err != nil { 762 | nferrs = append(nferrs, err) 763 | } 764 | 765 | if whisper.opts.InMemory { 766 | whisper.file.(*memFile).data = nwhisper.file.(*memFile).data 767 | releaseMemFile(filename + ".extend") 768 | } else if err = os.Rename(filename+".extend", filename); err != nil { 769 | return fmt.Errorf("extend/rename: %s", err) 770 | } 771 | 772 | nwhisper, err = OpenWithOptions(filename, whisper.opts) 773 | *whisper = *nwhisper 774 | whisper.Extended = true 775 | whisper.NonFatalErrors = append(whisper.NonFatalErrors, nferrs...) 776 | 777 | return err 778 | } 779 | 780 | func extractMixSpecs(orets Retentions, arcs []*archiveInfo) (Retentions, []MixAggregationSpec, map[int][]float32) { 781 | var nrets Retentions 782 | var specs []MixAggregationSpec 783 | var sizes = make(map[int][]float32) 784 | var specsCont bool 785 | 786 | for i, ret := range orets { 787 | sizes[ret.secondsPerPoint] = append(sizes[ret.secondsPerPoint], ret.avgCompressedPointSize) 788 | 789 | if len(nrets) == 0 { 790 | nrets = append(nrets, ret) 791 | continue 792 | } 793 | 794 | if ret.secondsPerPoint != nrets[len(nrets)-1].secondsPerPoint { 795 | nrets = append(nrets, ret) 796 | 797 | if len(specs) == 0 { 798 | specs = append(specs, *arcs[i].aggregationSpec) 799 | specsCont = true 800 | } else { 801 | specsCont = false 802 | } 803 | } else if specsCont { 804 | specs = append(specs, *arcs[i].aggregationSpec) 805 | } 806 | } 807 | 808 | return nrets, specs, sizes 809 | } 810 | 811 | func (arc *archiveInfo) avgPointsPerBlockReal() float32 { 812 | var totalPoints int 813 | var totalBlocks int 814 | for _, b := range arc.getSortedBlockRanges() { 815 | if b.index == arc.cblock.index { 816 | break 817 | } 818 | 819 | totalBlocks++ 820 | totalPoints += b.count 821 | } 822 | if totalPoints > 0 { 823 | return float32(totalBlocks*arc.blockSize) / float32(totalPoints) 824 | } 825 | return 0 826 | } 827 | 828 | // Timestamp: 829 | // 1. The block header stores the starting time stamp, t−1, 830 | // which is aligned to a two hour window; the first time 831 | // stamp, t0, in the block is stored as a delta from t−1 in 832 | // 14 bits. 1 833 | // 2. For subsequent time stamps, tn: 834 | // (a) Calculate the delta of delta: 835 | // D = (tn − tn−1) − (tn−1 − tn−2) 836 | // (b) If D is zero, then store a single ‘0’ bit 837 | // (c) If D is between [-63, 64], store ‘10’ followed by 838 | // the value (7 bits) 839 | // (d) If D is between [-255, 256], store ‘110’ followed by 840 | // the value (9 bits) 841 | // (e) if D is between [-2047, 2048], store ‘1110’ followed 842 | // by the value (12 bits) 843 | // (f) Otherwise store ‘1111’ followed by D using 32 bits 844 | // 845 | // Value: 846 | // 1. The first value is stored with no compression 847 | // 2. If XOR with the previous is zero (same value), store 848 | // single ‘0’ bit 849 | // 3. When XOR is non-zero, calculate the number of leading 850 | // and trailing zeros in the XOR, store bit ‘1’ followed 851 | // by either a) or b): 852 | // (a) (Control bit ‘0’) If the block of meaningful bits 853 | // falls within the block of previous meaningful bits, 854 | // i.e., there are at least as many leading zeros and 855 | // as many trailing zeros as with the previous value, 856 | // use that information for the block position and 857 | // just store the meaningful XORed value. 858 | // (b) (Control bit ‘1’) Store the length of the number 859 | // of leading zeros in the next 5 bits, then store the 860 | // length of the meaningful XORed value in the next 861 | // 6 bits. Finally store the meaningful bits of the 862 | // XORed value. 863 | 864 | func (a *archiveInfo) AppendPointsToBlock(buf []byte, ps []dataPoint) (written int, left []dataPoint, rotate bool) { 865 | var bw bitsWriter 866 | bw.buf = buf 867 | bw.bitPos = a.cblock.lastByteBitPos 868 | 869 | // set and clean possible end-of-block maker 870 | bw.buf[0] = a.cblock.lastByte 871 | bw.buf[0] &= 0xFF ^ (1< 0 { 888 | bw.index += left 889 | } 890 | 891 | a.cblock.crc32 = crc32(buf[:bw.index+1], a.cblock.crc32) 892 | a.cblock.lastByteOffset = blockEnd 893 | } else if written > 0 { 894 | // exclude eob for crc32 when block isn't full 895 | a.cblock.crc32 = crc32(buf[:written], a.cblock.crc32) 896 | } 897 | 898 | written = bw.index + 1 899 | 900 | a.blockRanges[a.cblock.index].start = a.cblock.p0.interval 901 | a.blockRanges[a.cblock.index].end = a.cblock.pn1.interval 902 | a.blockRanges[a.cblock.index].count = a.cblock.count 903 | a.blockRanges[a.cblock.index].crc32 = a.cblock.crc32 904 | }() 905 | 906 | if debugCompress { 907 | fmt.Printf("AppendPointsToBlock(%s): cblock.index=%d bw.index = %d lastByteOffset = %d blockSize = %d\n", a.Retention, a.cblock.index, bw.index, a.cblock.lastByteOffset, a.blockSize) 908 | } 909 | 910 | // TODO: return error if interval is not monotonically increasing? 911 | 912 | for i, p := range ps { 913 | if p.interval == 0 { 914 | continue 915 | } else if p.interval <= a.cblock.pn1.interval { 916 | a.stats.discard.oldInterval++ 917 | continue 918 | } 919 | 920 | oldBwIndex := bw.index 921 | oldBwBitPos := bw.bitPos 922 | oldBwLastByte := bw.buf[bw.index] 923 | 924 | var delta1, delta2 int 925 | if a.cblock.p0.interval == 0 { 926 | a.cblock.p0 = p 927 | a.cblock.pn1 = p 928 | a.cblock.pn2 = p 929 | 930 | copy(buf, p.Bytes()) 931 | bw.index += PointSize 932 | 933 | if debugCompress { 934 | fmt.Printf("begin\n") 935 | fmt.Printf("%d: %v\n", p.interval, p.value) 936 | } 937 | 938 | continue 939 | } 940 | 941 | delta1 = p.interval - a.cblock.pn1.interval 942 | delta2 = a.cblock.pn1.interval - a.cblock.pn2.interval 943 | delta := (delta1 - delta2) / a.secondsPerPoint 944 | 945 | if debugCompress { 946 | fmt.Printf("%d %d: %v\n", i, p.interval, p.value) 947 | } 948 | 949 | // TODO: use two's complement instead to extend delta range? 950 | if delta == 0 { 951 | if debugCompress { 952 | fmt.Printf("\tbuf.index = %d/%d delta = %d: %0s\n", bw.bitPos, bw.index, delta, dumpBits(1, 0)) 953 | } 954 | 955 | bw.Write(1, 0) 956 | a.stats.interval.len1++ 957 | } else if -63 < delta && delta < 64 { 958 | if delta < 0 { 959 | delta *= -1 960 | delta |= 64 961 | } 962 | 963 | if debugCompress { 964 | fmt.Printf("\tbuf.index = %d/%d delta = %d: %0s\n", bw.bitPos, bw.index, delta, dumpBits(2, 2, 7, uint64(delta))) 965 | } 966 | 967 | bw.Write(2, 2) 968 | bw.Write(7, uint64(delta)) 969 | a.stats.interval.len9++ 970 | } else if -255 < delta && delta < 256 { 971 | if delta < 0 { 972 | delta *= -1 973 | delta |= 256 974 | } 975 | if debugCompress { 976 | fmt.Printf("\tbuf.index = %d/%d delta = %d: %0s\n", bw.bitPos, bw.index, delta, dumpBits(3, 6, 9, uint64(delta))) 977 | } 978 | 979 | bw.Write(3, 6) 980 | bw.Write(9, uint64(delta)) 981 | a.stats.interval.len12++ 982 | } else if -2047 < delta && delta < 2048 { 983 | if delta < 0 { 984 | delta *= -1 985 | delta |= 2048 986 | } 987 | 988 | if debugCompress { 989 | fmt.Printf("\tbuf.index = %d/%d delta = %d: %0s\n", bw.bitPos, bw.index, delta, dumpBits(4, 14, 12, uint64(delta))) 990 | } 991 | 992 | bw.Write(4, 14) 993 | bw.Write(12, uint64(delta)) 994 | a.stats.interval.len16++ 995 | } else { 996 | if debugCompress { 997 | fmt.Printf("\tbuf.index = %d/%d delta = %d: %0s\n", bw.bitPos, bw.index, delta, dumpBits(4, 15, 32, uint64(delta))) 998 | } 999 | 1000 | bw.Write(4, 15) 1001 | bw.Write(32, uint64(p.interval)) 1002 | a.stats.interval.len36++ 1003 | } 1004 | 1005 | pn1val := math.Float64bits(a.cblock.pn1.value) 1006 | pn2val := math.Float64bits(a.cblock.pn2.value) 1007 | val := math.Float64bits(p.value) 1008 | pxor := pn1val ^ pn2val 1009 | xor := pn1val ^ val 1010 | 1011 | if debugCompress { 1012 | fmt.Printf(" %v %016x\n", a.cblock.pn2.value, pn2val) 1013 | fmt.Printf(" %v %016x\n", a.cblock.pn1.value, pn1val) 1014 | fmt.Printf(" %v %016x\n", p.value, val) 1015 | fmt.Printf(" pxor: %016x (%064b)\n xor: %016x (%064b)\n", pxor, pxor, xor, xor) 1016 | } 1017 | 1018 | if xor == 0 { 1019 | bw.Write(1, 0) 1020 | if debugCompress { 1021 | fmt.Printf("\tsame, write 0\n") 1022 | } 1023 | 1024 | a.stats.value.same++ 1025 | } else { 1026 | plz := bits.LeadingZeros64(pxor) 1027 | lz := bits.LeadingZeros64(xor) 1028 | ptz := bits.TrailingZeros64(pxor) 1029 | tz := bits.TrailingZeros64(xor) 1030 | if plz <= lz && ptz <= tz { 1031 | mlen := 64 - plz - ptz // meaningful block size 1032 | bw.Write(2, 2) 1033 | bw.Write(mlen, xor>>uint64(ptz)) 1034 | if debugCompress { 1035 | fmt.Printf("\tsame-length meaningful block: %0s\n", dumpBits(2, 2, uint64(mlen), xor>>uint(ptz))) 1036 | } 1037 | 1038 | a.stats.value.sameLen++ 1039 | } else { 1040 | if lz >= 1<<5 { 1041 | lz = 31 // 11111 1042 | } 1043 | mlen := 64 - lz - tz // meaningful block size 1044 | wmlen := mlen 1045 | 1046 | if mlen == 64 { 1047 | mlen = 63 1048 | } else if mlen == 63 { 1049 | wmlen = 64 1050 | } else { 1051 | xor >>= uint64(tz) 1052 | } 1053 | 1054 | if debugCompress { 1055 | fmt.Printf("lz = %+v\n", lz) 1056 | fmt.Printf("mlen = %+v\n", mlen) 1057 | fmt.Printf("xor mblock = %08b\n", xor) 1058 | } 1059 | 1060 | bw.Write(2, 3) 1061 | bw.Write(5, uint64(lz)) 1062 | bw.Write(6, uint64(mlen)) 1063 | bw.Write(wmlen, xor) 1064 | if debugCompress { 1065 | fmt.Printf("\tvaried-length meaningful block: %0s\n", dumpBits(2, 3, 5, uint64(lz), 6, uint64(mlen), uint64(wmlen), xor)) 1066 | } 1067 | 1068 | a.stats.value.variedLen++ 1069 | } 1070 | } 1071 | 1072 | if bw.isFull() || bw.index+a.cblock.lastByteOffset+endOfBlockSize >= a.blockOffset(a.cblock.index)+a.blockSize { 1073 | rotate = bw.index+a.cblock.lastByteOffset+endOfBlockSize >= a.blockOffset(a.cblock.index)+a.blockSize 1074 | 1075 | // reset dirty buffer tail 1076 | bw.buf[oldBwIndex] = oldBwLastByte 1077 | for i := oldBwIndex + 1; i <= bw.index; i++ { 1078 | bw.buf[i] = 0 1079 | } 1080 | 1081 | bw.index = oldBwIndex 1082 | bw.bitPos = oldBwBitPos 1083 | left = ps[i:] 1084 | 1085 | if debugCompress { 1086 | fmt.Printf("buffer is full, write aborted: oldBwIndex = %d oldBwLastByte = %08x\n", oldBwIndex, oldBwLastByte) 1087 | } 1088 | 1089 | break 1090 | } 1091 | 1092 | a.cblock.pn2 = a.cblock.pn1 1093 | a.cblock.pn1 = p 1094 | a.cblock.count++ 1095 | 1096 | if debugCompress { 1097 | start := oldBwIndex 1098 | end := bw.index + 2 1099 | if end > len(bw.buf) { 1100 | end = len(bw.buf) - 1 1101 | } 1102 | eob := bw.index + a.cblock.lastByteOffset 1103 | size := eob - a.blockOffset(a.cblock.index) 1104 | fmt.Printf("buf[%d-%d](index=%d len=%d eob=%d size=%d/%d): %08b\n", start, end, bw.index, len(bw.buf), eob, size, a.blockSize, bw.buf[start:end]) 1105 | } 1106 | } 1107 | 1108 | return 1109 | } 1110 | 1111 | type bitsWriter struct { 1112 | buf []byte 1113 | index int // index 1114 | bitPos int // 0 indexed 1115 | } 1116 | 1117 | func (bw *bitsWriter) isFull() bool { 1118 | return bw.index+1 >= len(bw.buf) 1119 | } 1120 | 1121 | func mask(l int) uint { 1122 | return (1 << uint(l)) - 1 1123 | } 1124 | 1125 | func (bw *bitsWriter) Write(lenb int, data uint64) { 1126 | buf := make([]byte, 8) 1127 | switch { 1128 | case lenb <= 8: 1129 | buf[0] = byte(data) 1130 | case lenb <= 16: 1131 | binary.LittleEndian.PutUint16(buf, uint16(data)) 1132 | case lenb <= 32: 1133 | binary.LittleEndian.PutUint32(buf, uint32(data)) 1134 | case lenb <= 64: 1135 | binary.LittleEndian.PutUint64(buf, data) 1136 | default: 1137 | panic(fmt.Sprintf("write size = %d > 64", lenb)) 1138 | } 1139 | 1140 | index := bw.index 1141 | end := bw.index + 5 1142 | if debugBitsWrite { 1143 | if end >= len(bw.buf) { 1144 | end = len(bw.buf) - 1 1145 | } 1146 | fmt.Printf("bw.bitPos = %+v\n", bw.bitPos) 1147 | fmt.Printf("bw.buf = %08b\n", bw.buf[bw.index:end]) 1148 | } 1149 | 1150 | for _, b := range buf { 1151 | if lenb <= 0 || bw.isFull() { 1152 | break 1153 | } 1154 | 1155 | if bw.bitPos+1 > lenb { 1156 | bw.buf[bw.index] |= b << uint(bw.bitPos+1-lenb) 1157 | bw.bitPos -= lenb 1158 | lenb = 0 1159 | } else { 1160 | var left int 1161 | if lenb < 8 { 1162 | left = lenb - 1 - bw.bitPos 1163 | lenb = 0 1164 | } else { 1165 | left = 7 - bw.bitPos 1166 | lenb -= 8 1167 | } 1168 | bw.buf[bw.index] |= b >> uint(left) 1169 | 1170 | if bw.index == len(bw.buf)-1 { 1171 | break 1172 | } 1173 | bw.index++ 1174 | bw.buf[bw.index] |= (b & byte(mask(left))) << uint(8-left) 1175 | bw.bitPos = 7 - left 1176 | } 1177 | } 1178 | if debugBitsWrite { 1179 | fmt.Printf("bw.buf = %08b\n", bw.buf[index:end]) 1180 | } 1181 | } 1182 | 1183 | func (a *archiveInfo) ReadFromBlock(buf []byte, dst []dataPoint, start, end int) ([]dataPoint, int, error) { 1184 | var br bitsReader 1185 | br.buf = buf 1186 | br.bitPos = 7 1187 | br.current = PointSize 1188 | 1189 | // the first data point is not compressed 1190 | p := unpackDataPoint(buf) 1191 | if start <= p.interval && p.interval <= end { 1192 | dst = append(dst, p) 1193 | } 1194 | 1195 | var pn1, pn2 *dataPoint = &p, &p 1196 | var exitByEOB bool 1197 | 1198 | readloop: 1199 | for { 1200 | if br.current >= len(br.buf) { 1201 | break 1202 | } 1203 | 1204 | var p dataPoint 1205 | 1206 | if debugCompress { 1207 | endd := br.current + 8 1208 | if endd >= len(br.buf) { 1209 | endd = len(br.buf) - 1 1210 | } 1211 | fmt.Printf("new point %d:\n br.index = %d/%d br.bitPos = %d byte = %08b peek(1) = %08b peek(2) = %08b peek(3) = %08b peek(4) = %08b buf[%d:%d] = %08b\n", len(dst), br.current, len(br.buf), br.bitPos, br.buf[br.current], br.Peek(1), br.Peek(2), br.Peek(3), br.Peek(4), br.current, endd, br.buf[br.current:endd]) 1212 | } 1213 | 1214 | var skip, toRead int 1215 | switch { 1216 | case br.Peek(1) == 0: // 0xxx 1217 | skip = 0 1218 | toRead = 1 1219 | case br.Peek(2) == 2: // 10xx 1220 | skip = 2 1221 | toRead = 7 1222 | case br.Peek(3) == 6: // 110x 1223 | skip = 3 1224 | toRead = 9 1225 | case br.Peek(4) == 14: // 1110 1226 | skip = 4 1227 | toRead = 12 1228 | case br.Peek(4) == 15: // 1111 1229 | skip = 4 1230 | toRead = 32 1231 | default: 1232 | if br.current >= len(buf)-1 { 1233 | break readloop 1234 | } 1235 | start, endd, data := br.trailingDebug() 1236 | return dst, br.current, fmt.Errorf("unknown timestamp prefix (archive[%d]): %04b at %d@%d, context[%d-%d] = %08b len(dst) = %d", a.secondsPerPoint, br.Peek(4), br.current, br.bitPos, start, endd, data, len(dst)) 1237 | } 1238 | 1239 | br.Read(skip) 1240 | delta := int(br.Read(toRead)) 1241 | 1242 | if debugCompress { 1243 | fmt.Printf("\tskip = %d toRead = %d delta = %d\n", skip, toRead, delta) 1244 | } 1245 | 1246 | switch toRead { 1247 | case 0: 1248 | if debugCompress { 1249 | fmt.Println("\tended by 0 bits to read") 1250 | } 1251 | break readloop 1252 | case 32: 1253 | if delta == 0 { 1254 | if debugCompress { 1255 | fmt.Println("\tended by EOB") 1256 | } 1257 | 1258 | exitByEOB = true 1259 | break readloop 1260 | } 1261 | p.interval = delta 1262 | 1263 | if debugCompress { 1264 | fmt.Printf("\tfull interval read: %d\n", delta) 1265 | } 1266 | default: 1267 | // TODO: incorrect? 1268 | if skip > 0 && delta&(1< 0 { // POC: toRead-1 1269 | delta &= (1 << uint(toRead-1)) - 1 1270 | delta *= -1 1271 | } 1272 | delta *= a.secondsPerPoint 1273 | p.interval = 2*pn1.interval + delta - pn2.interval 1274 | 1275 | if debugCompress { 1276 | fmt.Printf("\tp.interval = 2*%d + %d - %d = %d\n", pn1.interval, delta, pn2.interval, p.interval) 1277 | } 1278 | } 1279 | 1280 | if debugCompress { 1281 | fmt.Printf(" br.index = %d/%d br.bitPos = %d byte = %08b peek(1) = %08b peek(2) = %08b\n", br.current, len(br.buf), br.bitPos, br.buf[br.current], br.Peek(1), br.Peek(2)) 1282 | } 1283 | 1284 | switch { 1285 | case br.Peek(1) == 0: // 0x 1286 | br.Read(1) 1287 | p.value = pn1.value 1288 | 1289 | if debugCompress { 1290 | fmt.Printf("\tsame as previous value %016x (%v)\n", math.Float64bits(pn1.value), p.value) 1291 | } 1292 | case br.Peek(2) == 2: // 10 1293 | br.Read(2) 1294 | xor := math.Float64bits(pn1.value) ^ math.Float64bits(pn2.value) 1295 | lz := bits.LeadingZeros64(xor) 1296 | tz := bits.TrailingZeros64(xor) 1297 | val := br.Read(64 - lz - tz) 1298 | p.value = math.Float64frombits(math.Float64bits(pn1.value) ^ (val << uint(tz))) 1299 | 1300 | if debugCompress { 1301 | fmt.Printf("\tsame-length meaningful block\n") 1302 | fmt.Printf("\txor: %016x val: %016x (%v)\n", val<= end { 1338 | if debugCompress { 1339 | fmt.Printf("ended by hitting end interval\n") 1340 | } 1341 | break 1342 | } 1343 | } 1344 | 1345 | endOffset := br.current 1346 | if exitByEOB && endOffset > endOfBlockSize { 1347 | endOffset -= endOfBlockSize - 1 1348 | } 1349 | 1350 | return dst, endOffset, nil 1351 | } 1352 | 1353 | type bitsReader struct { 1354 | buf []byte 1355 | current int 1356 | bitPos int // 0 indexed 1357 | badRead bool 1358 | } 1359 | 1360 | func (br *bitsReader) trailingDebug() (start, end int, data []byte) { 1361 | start = br.current - 1 1362 | if br.current == 0 { 1363 | start = 0 1364 | } 1365 | end = br.current + 1 1366 | if end >= len(br.buf) { 1367 | end = len(br.buf) - 1 1368 | } 1369 | data = br.buf[start : end+1] 1370 | return 1371 | } 1372 | 1373 | func (br *bitsReader) Peek(c int) byte { 1374 | if br.current >= len(br.buf) { 1375 | return 0 1376 | } 1377 | if br.bitPos+1 >= c { 1378 | return (br.buf[br.current] & (1<> uint(br.bitPos+1-c) 1379 | } 1380 | if br.current+1 >= len(br.buf) { 1381 | return 0 1382 | } 1383 | var b byte 1384 | left := c - br.bitPos - 1 1385 | b = (br.buf[br.current] & (1<> uint(8-left) 1387 | return b 1388 | } 1389 | 1390 | func (br *bitsReader) Read(c int) uint64 { 1391 | if c > 64 { 1392 | panic("bitsReader can't read more than 64 bits") 1393 | } 1394 | 1395 | var data uint64 1396 | oldc := c 1397 | for { 1398 | if br.badRead = br.current >= len(br.buf); br.badRead || c <= 0 { 1399 | // TODO: should reset data? 1400 | // data = 0 1401 | break 1402 | } 1403 | 1404 | if c < br.bitPos+1 { 1405 | data <<= uint(c) 1406 | data |= (uint64(br.buf[br.current]>>uint(br.bitPos+1-c)) & ((1 << uint(c)) - 1)) 1407 | br.bitPos -= c 1408 | break 1409 | } 1410 | 1411 | data <<= uint(br.bitPos + 1) 1412 | data |= (uint64(br.buf[br.current] & ((1 << uint(br.bitPos+1)) - 1))) 1413 | c -= br.bitPos + 1 1414 | br.current++ 1415 | br.bitPos = 7 1416 | continue 1417 | } 1418 | 1419 | var result uint64 1420 | for i := 8; i <= 64; i += 8 { 1421 | if oldc-i < 0 { 1422 | result |= (data & (1<> uint(oldc-i)) & 0xFF) << uint(i-8) 1426 | } 1427 | return result 1428 | } 1429 | 1430 | func dumpBits(data ...uint64) string { 1431 | var bw bitsWriter 1432 | bw.buf = make([]byte, 16) 1433 | bw.bitPos = 7 1434 | var l uint64 1435 | for i := 0; i < len(data); i += 2 { 1436 | bw.Write(int(data[i]), data[i+1]) 1437 | l += data[i] 1438 | } 1439 | return fmt.Sprintf("%08b len(%d) end_bit_pos(%d)", bw.buf[:bw.index+1], l, bw.bitPos) 1440 | } 1441 | 1442 | // For archive.Buffer handling, CompressTo assumes a simple archive layout that 1443 | // higher archive will propagate to lower archive. [wrong] 1444 | // 1445 | // CompressTo should stop compression/return errors when runs into any issues (if feasible). 1446 | func (whisper *Whisper) CompressTo(dstPath string) error { 1447 | // Note: doesn't support mix-aggregation. 1448 | if whisper.aggregationMethod == Mix { 1449 | return errors.New("mix aggregation policy isn't supported") 1450 | } 1451 | 1452 | var rets []*Retention 1453 | for _, arc := range whisper.archives { 1454 | rets = append(rets, &Retention{secondsPerPoint: arc.secondsPerPoint, numberOfPoints: arc.numberOfPoints}) 1455 | } 1456 | 1457 | var pointsByArchives = make([][]dataPoint, len(whisper.archives)) 1458 | for i := len(whisper.archives) - 1; i >= 0; i-- { 1459 | archive := whisper.archives[i] 1460 | 1461 | b := make([]byte, archive.Size()) 1462 | err := whisper.fileReadAt(b, archive.Offset()) 1463 | if err != nil { 1464 | return err 1465 | } 1466 | points := unpackDataPointsStrict(b) 1467 | sort.Slice(points, func(i, j int) bool { 1468 | return points[i].interval < points[j].interval 1469 | }) 1470 | 1471 | // filter null data points 1472 | var bound = int(time.Now().Unix()) - archive.MaxRetention() 1473 | for i := 0; i < len(points); i++ { 1474 | if points[i].interval >= bound { 1475 | points = points[i:] 1476 | break 1477 | } 1478 | } 1479 | 1480 | pointsByArchives[i] = points 1481 | rets[i].avgCompressedPointSize = estimatePointSize(points, rets[i], DefaultPointsPerBlock) 1482 | } 1483 | 1484 | dst, err := CreateWithOptions( 1485 | dstPath, rets, 1486 | whisper.aggregationMethod, whisper.xFilesFactor, 1487 | &Options{FLock: true, Compressed: true, PointsPerBlock: DefaultPointsPerBlock}, 1488 | ) 1489 | if err != nil { 1490 | return err 1491 | } 1492 | defer dst.Close() 1493 | 1494 | // TODO: consider support moving the last data points to buffer 1495 | for i := len(whisper.archives) - 1; i >= 0; i-- { 1496 | points := pointsByArchives[i] 1497 | if _, err := dst.archives[i].appendToBlockAndRotate(points); err != nil { 1498 | return err 1499 | } 1500 | } 1501 | 1502 | if err := dst.WriteHeaderCompressed(); err != nil { 1503 | return err 1504 | } 1505 | 1506 | // TODO: check if compression is done correctly 1507 | 1508 | return err 1509 | } 1510 | 1511 | // estimatePointSize calculates point size estimation by doing an on-the-fly 1512 | // compression without changing archiveInfo state. 1513 | func estimatePointSize(ps []dataPoint, ret *Retention, pointsPerBlock int) float32 { 1514 | // Certain number of datapoints is needed in order to calculate a good size. 1515 | // Because when there is not enough data point for calculation, it would make a 1516 | // inaccurately big size. 1517 | // 1518 | // 30 is semi-ramdonly chosen based on a simple test. 1519 | if len(ps) < 30 { 1520 | return avgCompressedPointSize 1521 | } 1522 | 1523 | var sum int 1524 | for i := 0; i < len(ps); { 1525 | end := i + pointsPerBlock 1526 | if end > len(ps) { 1527 | end = len(ps) 1528 | } 1529 | 1530 | buf := make([]byte, pointsPerBlock*(MaxCompressedPointSize)+endOfBlockSize) 1531 | na := archiveInfo{ 1532 | Retention: *ret, 1533 | offset: 0, 1534 | blockRanges: make([]blockRange, 1), 1535 | blockSize: len(buf), 1536 | cblock: blockInfo{ 1537 | index: 0, 1538 | lastByteBitPos: 7, 1539 | lastByteOffset: 0, 1540 | }, 1541 | } 1542 | 1543 | size, left, _ := na.AppendPointsToBlock(buf, ps[i:end]) 1544 | if len(left) > 0 { 1545 | i += len(ps) - len(left) 1546 | } else { 1547 | i += pointsPerBlock 1548 | } 1549 | sum += size 1550 | } 1551 | size := float32(sum) / float32(len(ps)) 1552 | if math.IsNaN(float64(size)) || size <= 0 { 1553 | size = avgCompressedPointSize 1554 | } else { 1555 | size += sizeEstimationBuffer 1556 | } 1557 | return size 1558 | } 1559 | 1560 | func (whisper *Whisper) IsCompressed() bool { return whisper.compressed } 1561 | 1562 | // memFile is simple implementation of in-memory file system. 1563 | // Close doesn't release the file from memory, need to call releaseMemFile. 1564 | type memFile struct { 1565 | name string 1566 | data []byte 1567 | offset int64 1568 | } 1569 | 1570 | var memFiles sync.Map 1571 | 1572 | func newMemFile(name string) *memFile { 1573 | val, ok := memFiles.Load(name) 1574 | if ok { 1575 | val.(*memFile).offset = 0 1576 | return val.(*memFile) 1577 | } 1578 | var mf memFile 1579 | mf.name = name 1580 | memFiles.Store(name, &mf) 1581 | return &mf 1582 | } 1583 | 1584 | func releaseMemFile(name string) { memFiles.Delete(name) } 1585 | 1586 | func (mf *memFile) Fd() uintptr { return uintptr(unsafe.Pointer(mf)) } // skipcq: GSC-G103 1587 | func (mf *memFile) Name() string { return mf.name } 1588 | func (mf *memFile) Close() error { return nil } // skipcq: RVV-B0013 1589 | 1590 | func (mf *memFile) Seek(offset int64, whence int) (int64, error) { 1591 | switch whence { 1592 | case 0: 1593 | mf.offset = offset 1594 | case 1: 1595 | mf.offset += offset 1596 | case 2: 1597 | mf.offset = int64(len(mf.data)) + offset 1598 | } 1599 | return mf.offset, nil 1600 | } 1601 | 1602 | func (mf *memFile) ReadAt(b []byte, off int64) (n int, err error) { 1603 | n = copy(b, mf.data[off:]) 1604 | if n < len(b) { 1605 | err = io.EOF 1606 | } 1607 | return 1608 | } 1609 | func (mf *memFile) WriteAt(b []byte, off int64) (n int, err error) { 1610 | if l := int64(len(mf.data)); l <= off { 1611 | mf.data = append(mf.data, make([]byte, off-l+1)...) 1612 | } 1613 | for l, i := len(mf.data[off:]), 0; i < len(b)-l; i++ { 1614 | mf.data = append(mf.data, 0) 1615 | } 1616 | n = copy(mf.data[off:], b) 1617 | if n < len(b) { 1618 | err = io.EOF 1619 | } 1620 | return 1621 | } 1622 | 1623 | func (mf *memFile) Read(b []byte) (n int, err error) { 1624 | n = copy(b, mf.data[mf.offset:]) 1625 | if n < len(b) { 1626 | err = io.EOF 1627 | } 1628 | mf.offset += int64(n) 1629 | return 1630 | } 1631 | func (mf *memFile) Write(b []byte) (n int, err error) { 1632 | n, err = mf.WriteAt(b, mf.offset) 1633 | mf.offset += int64(n) 1634 | return 1635 | } 1636 | 1637 | func (mf *memFile) Truncate(size int64) error { 1638 | if int64(len(mf.data)) >= size { 1639 | mf.data = mf.data[:size] 1640 | } else { 1641 | mf.data = append(mf.data, make([]byte, size-int64(len(mf.data)))...) 1642 | } 1643 | return nil 1644 | } 1645 | 1646 | func (mf *memFile) dumpOnDisk(fpath string) error { return ioutil.WriteFile(fpath, mf.data, 0644) } 1647 | 1648 | // FillCompressed backfill cwhisper files from srcw. 1649 | // The old and new whisper should have the same retention policies. 1650 | func (dstw *Whisper) FillCompressed(srcw *Whisper) error { 1651 | defer dstw.Close() 1652 | 1653 | pointsByArchives, err := dstw.retrieveAndMerge(srcw) 1654 | if err != nil { 1655 | return err 1656 | } 1657 | 1658 | var rets []*Retention 1659 | for i, arc := range dstw.archives { 1660 | ret := &Retention{secondsPerPoint: arc.secondsPerPoint, numberOfPoints: arc.numberOfPoints} 1661 | points := pointsByArchives[i] 1662 | 1663 | ret.avgCompressedPointSize = estimatePointSize(points, ret, ret.calculateSuitablePointsPerBlock(dstw.pointsPerBlock)) 1664 | 1665 | rets = append(rets, ret) 1666 | } 1667 | 1668 | var mixSpecs []MixAggregationSpec 1669 | var mixSizes = make(map[int][]float32) 1670 | if dstw.aggregationMethod == Mix && len(rets) > 1 { 1671 | rets, mixSpecs, mixSizes = extractMixSpecs(rets, srcw.archives) 1672 | } 1673 | 1674 | newDst, err := CreateWithOptions( 1675 | dstw.file.Name()+".fill", rets, 1676 | dstw.aggregationMethod, dstw.xFilesFactor, 1677 | &Options{ 1678 | FLock: true, Compressed: true, 1679 | PointsPerBlock: DefaultPointsPerBlock, 1680 | InMemory: true, // need to close file if switch to non in-memory 1681 | MixAggregationSpecs: mixSpecs, 1682 | MixAvgCompressedPointSizes: mixSizes, 1683 | }, 1684 | ) 1685 | if err != nil { 1686 | return err 1687 | } 1688 | defer releaseMemFile(newDst.file.Name()) 1689 | 1690 | for i := len(dstw.archives) - 1; i >= 0; i-- { 1691 | points := pointsByArchives[i] 1692 | if _, err := newDst.archives[i].appendToBlockAndRotate(points); err != nil { 1693 | return err 1694 | } 1695 | copy(newDst.archives[i].buffer, dstw.archives[i].buffer) 1696 | } 1697 | if err := newDst.WriteHeaderCompressed(); err != nil { 1698 | return err 1699 | } 1700 | 1701 | data := newDst.file.(*memFile).data 1702 | if err := dstw.file.Truncate(int64(len(data))); err != nil { 1703 | fmt.Printf("convert: failed to truncate %s: %s", dstw.file.Name(), err) 1704 | } 1705 | if err := dstw.fileWriteAt(data, 0); err != nil { 1706 | return err 1707 | } 1708 | 1709 | f := dstw.file 1710 | *dstw = *newDst 1711 | dstw.file = f 1712 | 1713 | return nil 1714 | } 1715 | 1716 | func (whisper *Whisper) propagateToMixedArchivesCompressed() error { 1717 | var lastArchive = whisper.archives[len(whisper.archives)-1] 1718 | var largestSPP = lastArchive.secondsPerPoint 1719 | if largestSPP == 0 { 1720 | return nil 1721 | } 1722 | 1723 | var firstArchive = whisper.archives[0] 1724 | var firstStart, firstEnd = firstArchive.getRange() 1725 | var _, lastEnd = lastArchive.getRange() 1726 | 1727 | var from int 1728 | if lastEnd > 0 { 1729 | from = lastEnd 1730 | } else { 1731 | from = firstStart 1732 | } 1733 | 1734 | // 1s:1d,1m:30d,1h:1y,1d:10y 1735 | // 86400,43200,8760,3650 1736 | // 1737 | // 7200 -> 2h 1738 | // 1739 | // [0 - 7200) 1740 | // [7200 - 14400) 1741 | 1742 | // Why "- 1": always exclude the last data point to make sure it's not 1743 | // a pre-mature propagation. propagation aggregation is "mod down", check 1744 | // archiveInfo.AggregateInterval. 1745 | var until = lastArchive.Interval(firstEnd) - largestSPP*5 - 1 1746 | 1747 | if until-from <= 0 { 1748 | return nil 1749 | } 1750 | 1751 | dps, err := whisper.fetchCompressed(int64(from), int64(until), firstArchive) 1752 | if err != nil { 1753 | return fmt.Errorf("mix: failed to firstArchive.fetchCompressed(%d, %d): %s", from, until, err) 1754 | } 1755 | 1756 | adps := whisper.aggregateByArchives(dps) 1757 | for _, arc := range whisper.archives[1:] { 1758 | if dps := adps[arc]; len(dps) > 0 { 1759 | if _, err := arc.appendToBlockAndRotate(dps); err != nil { 1760 | return fmt.Errorf("mix: failed to propagate archive %s: %s", arc.Retention, err) 1761 | } 1762 | } 1763 | } 1764 | 1765 | return nil 1766 | } 1767 | 1768 | // NOTE: this method could be called from both read and write paths. 1769 | func (whisper *Whisper) aggregateByArchives(dps []dataPoint) (adps map[*archiveInfo][]dataPoint) { 1770 | adps = map[*archiveInfo][]dataPoint{} 1771 | 1772 | if len(dps) == 0 { 1773 | return // TODO: should be an error? 1774 | } 1775 | 1776 | var spps []int 1777 | for _, arc := range whisper.archives[1:] { 1778 | var knownSPP bool 1779 | for _, spp := range spps { 1780 | knownSPP = knownSPP || (arc.secondsPerPoint == spp) 1781 | } 1782 | if !knownSPP { 1783 | spps = append(spps, arc.secondsPerPoint) 1784 | } 1785 | } 1786 | 1787 | sort.SliceStable(dps, func(i, j int) bool { return dps[i].interval < dps[j].interval }) 1788 | 1789 | type groupedDataPoint struct { 1790 | interval int 1791 | values []float64 1792 | } 1793 | var dpsBySPP = map[int][]groupedDataPoint{} 1794 | 1795 | for i, dp := range dps { 1796 | if i < len(dps)-1 && dps[i+1].interval == dp.interval { 1797 | continue 1798 | } 1799 | 1800 | for _, spp := range spps { 1801 | interval := dp.interval - mod(dp.interval, spp) // same as archiveInfo.AggregateInterval 1802 | 1803 | if len(dpsBySPP[spp]) == 0 { 1804 | gdp := groupedDataPoint{ 1805 | interval: interval, 1806 | values: []float64{dp.value}, 1807 | } 1808 | 1809 | dpsBySPP[spp] = append(dpsBySPP[spp], gdp) 1810 | continue 1811 | } 1812 | 1813 | gdp := &dpsBySPP[spp][len(dpsBySPP[spp])-1] 1814 | if gdp.interval == interval { 1815 | gdp.values = append(gdp.values, dp.value) 1816 | continue 1817 | } 1818 | 1819 | // check we have enough data points to propagate a value 1820 | baseArchive := whisper.archives[0] 1821 | knownPercent := float32(len(gdp.values)) / float32(spp/baseArchive.secondsPerPoint) 1822 | if knownPercent < whisper.xFilesFactor { 1823 | // clean up the last data point 1824 | gdp.interval = interval 1825 | gdp.values = []float64{dp.value} 1826 | continue 1827 | } 1828 | 1829 | gdp = &groupedDataPoint{ 1830 | interval: interval, 1831 | values: []float64{dp.value}, 1832 | } 1833 | 1834 | dpsBySPP[spp] = append(dpsBySPP[spp], *gdp) 1835 | continue 1836 | } 1837 | } 1838 | 1839 | for _, arc := range whisper.archives[1:] { 1840 | gdps := dpsBySPP[arc.secondsPerPoint] 1841 | dps := make([]dataPoint, 0, len(gdps)) 1842 | _, limit := arc.getRange() // NOTE: not supporting propagation rewrite/out of order 1843 | for _, gdp := range gdps { 1844 | if gdp.interval <= limit { 1845 | continue 1846 | } 1847 | 1848 | dps = append(dps, dataPoint{}) 1849 | dp := &dps[len(dps)-1] 1850 | dp.interval = gdp.interval 1851 | 1852 | if arc.aggregationSpec == nil { 1853 | values := gdp.values 1854 | dp.value = aggregate(whisper.aggregationMethod, values) 1855 | } else if arc.aggregationSpec.Method == Percentile { 1856 | // sorted for percentiles 1857 | sort.Float64s(gdp.values) 1858 | dp.value = aggregatePercentile(arc.aggregationSpec.Percentile, gdp.values) 1859 | } else { 1860 | dp.value = aggregate(arc.aggregationSpec.Method, gdp.values) 1861 | } 1862 | } 1863 | if len(dps) == 0 { 1864 | continue 1865 | } 1866 | 1867 | adps[arc] = dps 1868 | } 1869 | 1870 | return 1871 | } 1872 | 1873 | // Same implementation copied from carbonapi, without using quickselect for 1874 | // keeping zero dependency. 1875 | // percentile values: 0 - 100 1876 | func aggregatePercentile(p float32, vals []float64) float64 { 1877 | if len(vals) == 0 || p < 0 || p > 100 { 1878 | return math.NaN() 1879 | } 1880 | 1881 | k := (float64(len(vals)-1) * float64(p)) / 100 1882 | index := int(math.Ceil(k)) 1883 | remainder := k - float64(int(k)) 1884 | if remainder == 0 { 1885 | return vals[index] 1886 | } 1887 | return (vals[index] * remainder) + (vals[index-1] * (1 - remainder)) 1888 | } 1889 | -------------------------------------------------------------------------------- /compress_test.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "math" 9 | "math/rand" 10 | "os" 11 | "reflect" 12 | "testing" 13 | "time" 14 | 15 | "github.com/google/go-cmp/cmp" 16 | "github.com/google/go-cmp/cmp/cmpopts" 17 | "github.com/kr/pretty" 18 | ) 19 | 20 | // to run a full test suite: go test -full-test3 21 | 22 | func init() { 23 | log.SetFlags(log.Lshortfile) 24 | rand.Seed(time.Now().UnixNano()) // skipcq: GSC-G404 25 | 26 | if err := os.MkdirAll("tmp", 0750); err != nil { 27 | panic(err) 28 | } 29 | } 30 | 31 | func TestBitsReadWrite(t *testing.T) { 32 | buf := make([]byte, 256) 33 | 34 | var bw bitsWriter 35 | bw.buf = buf 36 | bw.bitPos = 7 37 | 38 | var br bitsReader 39 | br.buf = buf 40 | br.bitPos = 7 41 | 42 | input := []struct { 43 | val uint64 44 | len int 45 | }{ 46 | {len: 8, val: 5}, 47 | {len: 16, val: 97}, 48 | {len: 32, val: 123}, 49 | {len: 64, val: math.Float64bits(95.1)}, 50 | {len: 64, val: 0xf1f2f3f4f5f6f7f8}, 51 | 52 | {len: 1, val: 1}, 53 | {len: 15, val: 0x7f7f}, 54 | {len: 55, val: 0x7f2f3f4f5f6f7f}, 55 | } 56 | for _, d := range input { 57 | bw.Write(d.len, d.val) 58 | } 59 | for i, d := range input { 60 | if got, want := br.Read(d.len), d.val; got != want { 61 | t.Errorf("%d: br.Read(%d) = %x; want %b", i, d.len, got, want) 62 | } 63 | } 64 | } 65 | 66 | func TestBlockReadWrite1(t *testing.T) { 67 | for i := 0; i < 1; i++ { 68 | var acv archiveInfo 69 | acv.secondsPerPoint = 1 70 | acv.numberOfPoints = 64 71 | acv.cblock.lastByteBitPos = 7 72 | acv.blockSize = 64 * PointSize 73 | acv.blockRanges = make([]blockRange, 1) 74 | 75 | ts := 1543689630 76 | var delta int 77 | next := func(incs ...int) int { 78 | for _, i := range incs { 79 | delta += i 80 | } 81 | return ts + delta 82 | } 83 | 84 | var input []dataPoint 85 | rand.Seed(time.Now().Unix()) // skipcq: GSC-G404 86 | input = append( 87 | input, 88 | dataPoint{interval: next(1), value: 1}, 89 | dataPoint{interval: next(1), value: 1}, 90 | dataPoint{interval: next(1), value: 1}, 91 | ) 92 | for i := 0; i < 200; i++ { 93 | input = append(input, dataPoint{interval: next(rand.Intn(10) + 1), value: rand.NormFloat64()}) // skipcq: GSC-G404 94 | } 95 | 96 | buf := make([]byte, acv.blockSize) 97 | _, left, _ := acv.AppendPointsToBlock(buf, input) 98 | 99 | points := make([]dataPoint, 0, 200) 100 | points, _, err := acv.ReadFromBlock(buf, points, ts, ts+60*60*60) 101 | if err != nil { 102 | t.Error(err) 103 | } 104 | 105 | if !reflect.DeepEqual(input, append(points, left...)) { 106 | if diff := cmp.Diff(input, points, cmp.AllowUnexported(dataPoint{})); diff != "" { 107 | t.Error(diff) 108 | } 109 | 110 | t.FailNow() 111 | } 112 | } 113 | } 114 | 115 | func TestBlockReadWrite2(t *testing.T) { 116 | for i := 0; i < 1; i++ { 117 | var acv archiveInfo 118 | acv.secondsPerPoint = 1 119 | acv.numberOfPoints = 100 120 | acv.cblock.lastByteBitPos = 7 121 | acv.blockSize = int(float32(acv.numberOfPoints) * avgCompressedPointSize) 122 | acv.blockRanges = make([]blockRange, 1) 123 | 124 | var input []dataPoint = []dataPoint{ 125 | {interval: 1544456874, value: 12}, 126 | {interval: 1544456875, value: 24}, 127 | {interval: 1544456876, value: 15}, 128 | {interval: 1544456877, value: 1}, 129 | {interval: 1544456878, value: 2}, 130 | {interval: 1544456888, value: 3}, 131 | {interval: 1544456889, value: 4}, 132 | {interval: 1544457000, value: 4}, 133 | {interval: 1544458000, value: 4}, 134 | {interval: 1544476889, value: 4}, 135 | } 136 | 137 | buf := make([]byte, acv.blockSize) 138 | acv.AppendPointsToBlock(buf, input[:1]) 139 | acv.AppendPointsToBlock(buf[acv.cblock.lastByteOffset:], input[1:5]) 140 | acv.AppendPointsToBlock(buf[acv.cblock.lastByteOffset:], input[5:]) 141 | 142 | points, _, err := acv.ReadFromBlock(buf, make([]dataPoint, 0, 200), 1544456874, 1544477000) 143 | if err != nil { 144 | t.Error(err) 145 | } 146 | 147 | if !reflect.DeepEqual(input, points) { 148 | pretty.Printf("%# v\n", input) 149 | pretty.Printf("%# v\n", points) 150 | if diff := cmp.Diff(input, points, cmp.AllowUnexported(dataPoint{})); diff != "" { 151 | t.Error(diff) 152 | } 153 | 154 | t.FailNow() 155 | } 156 | } 157 | } 158 | 159 | func TestCompressedWhisperReadWrite1(t *testing.T) { 160 | fpath := "tmp/comp1.whisper" 161 | os.Remove(fpath) 162 | whisper, err := CreateWithOptions( 163 | fpath, 164 | []*Retention{ 165 | {secondsPerPoint: 1, numberOfPoints: 100}, 166 | {secondsPerPoint: 5, numberOfPoints: 100}, 167 | }, 168 | Sum, 169 | 0.7, 170 | &Options{Compressed: true, PointsPerBlock: 7200}, 171 | ) 172 | if err != nil { 173 | panic(err) 174 | } 175 | 176 | ts := int(Now().Add(time.Second * -60).Unix()) 177 | var delta int 178 | next := func(incs int) int { 179 | delta += incs 180 | return ts + delta 181 | } 182 | input := []*TimeSeriesPoint{ 183 | {Time: next(1), Value: 12}, 184 | {Time: next(1), Value: 24}, 185 | {Time: next(1), Value: 15}, 186 | {Time: next(1), Value: 1}, 187 | {Time: next(1), Value: 2}, 188 | {Time: next(10), Value: 3}, 189 | {Time: next(1), Value: 4}, 190 | {Time: next(1), Value: 15.5}, 191 | {Time: next(1), Value: 14.0625}, 192 | {Time: next(1), Value: 3.25}, 193 | {Time: next(1), Value: 8.625}, 194 | {Time: next(1), Value: 13.1}, 195 | } 196 | 197 | if err := whisper.UpdateMany(input); err != nil { 198 | t.Error(err) 199 | } 200 | 201 | // this negative data points should be ignored 202 | outOfOrderDataPoint := TimeSeriesPoint{Time: next(0) - 10, Value: 12} 203 | if err := whisper.UpdateMany([]*TimeSeriesPoint{&outOfOrderDataPoint}); err != nil { 204 | t.Error(err) 205 | } 206 | if got, want := whisper.GetDiscardedPointsSinceOpen(), uint32(1); got != want { 207 | t.Errorf("whisper.GetDiscardedPointsSinceOpen() = %d; want %d", got, want) 208 | } 209 | 210 | whisper.Close() 211 | 212 | whisper, err = OpenWithOptions(fpath, &Options{}) 213 | if err != nil { 214 | t.Fatal(err) 215 | } 216 | 217 | // t.Run("out_of_order_write", func(t *testing.T) { 218 | // expectVals := make([]float64, 60) 219 | // for i := 0; i < 60; i++ { 220 | // expectVals[i] = math.NaN() 221 | // } 222 | // for _, p := range input { 223 | // expectVals[p.Time-ts-1] = p.Value 224 | // } 225 | // expectVals[outOfOrderDataPoint.Time-ts-1] = outOfOrderDataPoint.Value 226 | // expect := &TimeSeries{ 227 | // fromTime: ts + 1, 228 | // untilTime: ts + 61, 229 | // step: 1, 230 | // values: expectVals, 231 | // } 232 | // if ts, err := whisper.Fetch(ts, ts+300); err != nil { 233 | // t.Error(err) 234 | // } else if diff := cmp.Diff(ts, expect, cmp.AllowUnexported(TimeSeries{}), cmpopts.EquateNaNs()); diff != "" { 235 | // t.Error(diff) 236 | // } 237 | // }) 238 | 239 | // this test case is no longer valid for cwhisper version 2, buffer 240 | // design is deprecated. 241 | t.Run("buffer_overflow", func(t *testing.T) { 242 | // fmt.Println("---") 243 | // whisper.archives[0].dumpDataPointsCompressed() 244 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 245 | {Time: next(5), Value: 10}, 246 | {Time: next(5), Value: 11}, 247 | {Time: next(5), Value: 12}, 248 | }); err != nil { 249 | t.Error(err) 250 | } 251 | // fmt.Println("---") 252 | // whisper.archives[0].dumpDataPointsCompressed() 253 | 254 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 255 | {Time: next(0) - 15, Value: 13}, 256 | {Time: next(0) - 10, Value: 13}, 257 | }); err != nil { 258 | t.Error(err) 259 | } 260 | // fmt.Println("---") 261 | // whisper.archives[0].dumpDataPointsCompressed() 262 | 263 | // debugCompress = true 264 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 265 | {Time: next(0) - 5, Value: 14}, 266 | {Time: next(0) - 0, Value: 15}, 267 | }); err != nil { 268 | t.Error(err) 269 | } 270 | // debugCompress = false 271 | 272 | // fmt.Println("---") 273 | // whisper.archives[0].dumpDataPointsCompressed() 274 | 275 | expect := []TimeSeriesPoint{ 276 | {Time: next(0) - 14, Value: math.NaN()}, 277 | {Time: next(0) - 13, Value: math.NaN()}, 278 | {Time: next(0) - 12, Value: math.NaN()}, 279 | {Time: next(0) - 11, Value: math.NaN()}, 280 | {Time: next(0) - 10, Value: 10}, 281 | {Time: next(0) - 9, Value: math.NaN()}, 282 | {Time: next(0) - 8, Value: math.NaN()}, 283 | {Time: next(0) - 7, Value: math.NaN()}, 284 | {Time: next(0) - 6, Value: math.NaN()}, 285 | {Time: next(0) - 5, Value: 14}, 286 | {Time: next(0) - 4, Value: math.NaN()}, 287 | {Time: next(0) - 3, Value: math.NaN()}, 288 | {Time: next(0) - 2, Value: math.NaN()}, 289 | {Time: next(0) - 1, Value: math.NaN()}, 290 | {Time: next(0) - 0, Value: 15}, 291 | } 292 | if ts, err := whisper.Fetch(next(0)-15, next(0)); err != nil { 293 | t.Error(err) 294 | } else if diff := cmp.Diff(ts.Points(), expect, cmp.AllowUnexported(TimeSeries{}), cmpopts.EquateNaNs()); diff != "" { 295 | t.Error(diff) 296 | } 297 | }) 298 | whisper.Close() 299 | } 300 | 301 | func TestCompressedWhisperReadWrite2(t *testing.T) { 302 | fpath := "tmp/comp2.whisper" 303 | os.Remove(fpath) 304 | whisper, err := CreateWithOptions( 305 | fpath, 306 | []*Retention{ 307 | {secondsPerPoint: 1, numberOfPoints: 100}, 308 | {secondsPerPoint: 5, numberOfPoints: 100}, 309 | }, 310 | Sum, 311 | 0.7, 312 | &Options{Compressed: true, PointsPerBlock: 7200}, 313 | ) 314 | if err != nil { 315 | panic(err) 316 | } 317 | 318 | nowTs := 1544478230 319 | Now = func() time.Time { return time.Unix(int64(nowTs), 0) } 320 | defer func() { Now = time.Now }() 321 | 322 | input := []*TimeSeriesPoint{ 323 | {Time: nowTs - 300, Value: 666}, 324 | 325 | {Time: 1544478201, Value: 12}, 326 | 327 | {Time: 1544478211, Value: 24}, 328 | {Time: 1544478212, Value: 15}, 329 | {Time: 1544478213, Value: 1}, 330 | {Time: 1544478214, Value: 2}, 331 | 332 | {Time: 1544478224, Value: 3}, 333 | {Time: 1544478225, Value: 4}, 334 | {Time: 1544478226, Value: 15.5}, 335 | {Time: 1544478227, Value: 14.0625}, 336 | {Time: 1544478228, Value: 3.25}, 337 | {Time: 1544478229, Value: 8.625}, 338 | {Time: nowTs, Value: 13.1}, 339 | } 340 | 341 | for _, p := range input { 342 | if err := whisper.UpdateMany([]*TimeSeriesPoint{p}); err != nil { 343 | t.Error(err) 344 | } 345 | } 346 | whisper.Close() 347 | 348 | whisper, err = OpenWithOptions(fpath, &Options{Compressed: true, PointsPerBlock: 7200}) 349 | if err != nil { 350 | t.Fatal(err) 351 | } 352 | 353 | { 354 | expectVals := make([]float64, 4) 355 | for i := 0; i < 4; i++ { 356 | expectVals[i] = math.NaN() 357 | } 358 | expectVals[1] = input[0].Value 359 | expect := &TimeSeries{ 360 | fromTime: 1544477925, 361 | untilTime: 1544477945, 362 | step: 5, 363 | values: expectVals, 364 | } 365 | if ts, err := whisper.Fetch(nowTs-310, nowTs-290); err != nil { 366 | t.Error(err) 367 | } else if diff := cmp.Diff(ts, expect, cmp.AllowUnexported(TimeSeries{}), cmpopts.EquateNaNs()); diff != "" { 368 | t.Error(diff) 369 | } 370 | } 371 | 372 | { 373 | expectVals := make([]float64, 30) 374 | for i := 0; i < 30; i++ { 375 | expectVals[i] = math.NaN() 376 | } 377 | for _, p := range input[1:] { 378 | expectVals[29-(nowTs-p.Time)] = p.Value 379 | } 380 | expect := &TimeSeries{ 381 | fromTime: 1544478201, 382 | untilTime: 1544478231, 383 | step: 1, 384 | values: expectVals, 385 | } 386 | if ts, err := whisper.Fetch(nowTs-30, nowTs); err != nil { 387 | t.Error(err) 388 | } else if diff := cmp.Diff(ts, expect, cmp.AllowUnexported(TimeSeries{}), cmpopts.EquateNaNs()); diff != "" { 389 | t.Error(diff) 390 | } 391 | } 392 | } 393 | 394 | var fullTest3 = flag.Bool("full-test3", false, "run a full test of TestCompressedWhisperReadWrite3") 395 | var cacheTest3Data = flag.Bool("debug-test3", false, "save a data of TestCompressedWhisperReadWrite3 for debugging") 396 | 397 | // To run a full test of TestCompressedWhisperReadWrite3, it would take about 10 398 | // minutes, the slowness comes from standard whisper file propagation (around 10 399 | // times slower and comsume much more memory than compressed format). 400 | // 401 | // Parallel is disabled because we need to manipulate Now in order to simulate 402 | // updates. 403 | // 404 | // TODO: cache data to make failed tests repeatable and easier to debug 405 | func TestCompressedWhisperReadWrite3(t *testing.T) { 406 | // TODO: add a test case of mixing random and sequential values/times 407 | inputs := []struct { 408 | name string 409 | randLimit func() int 410 | fullTest func() bool 411 | gen func(prevTime time.Time, index int) *TimeSeriesPoint 412 | }{ 413 | { 414 | name: "random_time", 415 | fullTest: func() bool { return true }, 416 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 417 | return &TimeSeriesPoint{ 418 | Value: 0, 419 | Time: int(prevTime.Add(time.Duration(rand.Intn(4096)+1) * time.Second).Unix()), // skipcq: GSC-G404 420 | } 421 | }, 422 | }, 423 | { 424 | name: "random_time_value", 425 | fullTest: func() bool { return true }, 426 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 427 | return &TimeSeriesPoint{ 428 | Value: rand.NormFloat64(), // skipcq: GSC-G404 429 | Time: int(prevTime.Add(time.Duration(rand.Intn(3600*24)+1) * time.Second).Unix()), // skipcq: GSC-G404 430 | } 431 | }, 432 | }, 433 | { 434 | name: "less_random_time_value", 435 | fullTest: func() bool { return true }, 436 | // randLimit: func() int { return 300 }, 437 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 438 | return &TimeSeriesPoint{ 439 | Value: 2000.0 + float64(rand.Intn(1000)), // skipcq: GSC-G404 440 | Time: int(prevTime.Add(time.Duration(rand.Intn(60)) * time.Second).Unix()), // skipcq: GSC-G404 441 | } 442 | }, 443 | }, 444 | { 445 | name: "fast_simple", 446 | fullTest: func() bool { return true }, 447 | randLimit: func() int { return 300 }, 448 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 449 | return &TimeSeriesPoint{Value: 2000.0 + float64(rand.Intn(1000)), Time: int(prevTime.Add(time.Second * 60).Unix())} // skipcq: GSC-G404 450 | }, 451 | }, 452 | 453 | // these are slow tests, turned off by default 454 | { 455 | name: "random_value", 456 | fullTest: func() bool { return *fullTest3 }, 457 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 458 | return &TimeSeriesPoint{ 459 | Value: rand.NormFloat64(), // skipcq: GSC-G404 460 | Time: int(prevTime.Add(time.Second).Unix()), 461 | } 462 | }, 463 | }, 464 | { 465 | name: "random_value2", 466 | fullTest: func() bool { return *fullTest3 }, 467 | randLimit: func() int { return rand.Intn(300) + (60 * 60 * 24) }, // skipcq: GSC-G404 468 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 469 | return &TimeSeriesPoint{ 470 | Value: 2000.0 + float64(rand.Intn(1000)), // skipcq: GSC-G404 471 | Time: int(prevTime.Add(time.Second).Unix()), 472 | } 473 | }, 474 | }, 475 | { 476 | name: "simple", 477 | fullTest: func() bool { return *fullTest3 }, 478 | gen: func(prevTime time.Time, index int) *TimeSeriesPoint { 479 | return &TimeSeriesPoint{Value: 0, Time: int(prevTime.Add(time.Second).Unix())} 480 | }, 481 | }, 482 | } 483 | 484 | os.MkdirAll("tmp", 0750) 485 | inMemory := true 486 | for i := range inputs { 487 | input := inputs[i] 488 | if input.randLimit == nil { 489 | input.randLimit = func() int { return rand.Intn(300) } // skipcq: GSC-G404 490 | } 491 | 492 | t.Run(input.name, func(t *testing.T) { 493 | // can't run tests parallel here because they modify Now 494 | // t.Parallel() 495 | t.Logf("case: %s\n", input.name) 496 | 497 | fpath := fmt.Sprintf("tmp/test3_%s.wsp", input.name) 498 | os.Remove(fpath) 499 | os.Remove(fpath + ".cwsp") 500 | 501 | var dataDebugFile *os.File 502 | if *cacheTest3Data { 503 | var err error 504 | dataDebugFile, err = os.Create(fmt.Sprintf("tmp/test3_%s.data", input.name)) 505 | if err != nil { 506 | t.Fatal(err) 507 | } 508 | } 509 | 510 | cwhisper, err := CreateWithOptions( 511 | fpath+".cwsp", 512 | []*Retention{ 513 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 514 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 515 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 516 | }, 517 | Average, 518 | 0, 519 | &Options{Compressed: true, PointsPerBlock: 7200, InMemory: inMemory}, 520 | ) 521 | if err != nil { 522 | panic(err) 523 | } 524 | ncwhisper, err := CreateWithOptions( 525 | fpath, 526 | []*Retention{ 527 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 528 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 529 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 530 | }, 531 | Average, 532 | 0, 533 | &Options{Compressed: false, PointsPerBlock: 7200, InMemory: inMemory}, 534 | ) 535 | if err != nil { 536 | panic(err) 537 | } 538 | cwhisper.Close() 539 | ncwhisper.Close() 540 | 541 | // var now = time.Now() 542 | var now = time.Unix(1589720099, 0) 543 | var total = 60*60*24*365*2 + 37 544 | var start = now.Add(time.Second * time.Duration(total) * -1) 545 | Now = func() time.Time { return start } 546 | defer func() { Now = time.Now }() 547 | 548 | // var psArr [][]*TimeSeriesPoint 549 | var ps []*TimeSeriesPoint 550 | var limit = input.randLimit() 551 | var statTotalUpdates, extended, totalPoints int 552 | firstArchiveBound := cwhisper.Retentions()[0].MaxRetention() 553 | for i := 0; i < total; i++ { 554 | p := input.gen(start, i) 555 | var toAppend = true 556 | if len(ps) == 0 || p.Time-ps[0].Time < firstArchiveBound { 557 | ps = append(ps, p) 558 | start = time.Unix(int64(p.Time), 0) 559 | toAppend = false 560 | } 561 | 562 | if toAppend || len(ps) >= limit || start.After(now) { 563 | // fmt.Printf("%d toAppend = %v\r", start.Unix(), len(ps)) 564 | // fmt.Printf("progress: %.2f%% len(points): %d\r", 100-float64(now.Unix()-start.Unix())*100/float64(total), len(ps)) 565 | limit = input.randLimit() 566 | statTotalUpdates++ 567 | totalPoints += len(ps) 568 | // psArr = append(psArr, ps) 569 | 570 | cwhisper, err = OpenWithOptions(fpath+".cwsp", &Options{InMemory: inMemory}) 571 | if err != nil { 572 | t.Fatal(err) 573 | } 574 | if err := cwhisper.UpdateMany(ps); err != nil { 575 | t.Fatal(err) 576 | } 577 | if err := cwhisper.Close(); err != nil { 578 | t.Fatal(err) 579 | } 580 | 581 | if cwhisper.Extended { 582 | // for _, a := range cwhisper.archives { 583 | // t.Logf("extended: %s: %d\n", a.Retention, a.totalPoints()) 584 | // } 585 | extended++ 586 | } 587 | 588 | if input.fullTest() { 589 | if *cacheTest3Data { 590 | // if _, err := fmt.Fprintf(dataDebugFile, "%d\n", len(ps)); err != nil { 591 | // t.Fatal(err) 592 | // } 593 | for _, p := range ps { 594 | if _, err := fmt.Fprintf(dataDebugFile, "%d %d %d %v\n", p.Time, p.Time-mod(p.Time, 60), p.Time-mod(p.Time, 3600), p.Value); err != nil { 595 | t.Fatal(err) 596 | } 597 | } 598 | } 599 | ncwhisper, err = OpenWithOptions(fpath, &Options{InMemory: inMemory}) 600 | if err != nil { 601 | t.Fatal(err) 602 | } 603 | if err := ncwhisper.UpdateMany(ps); err != nil { 604 | t.Fatal(err) 605 | } 606 | if err := ncwhisper.Close(); err != nil { 607 | t.Fatal(err) 608 | } 609 | } 610 | 611 | ps = ps[:0] 612 | } 613 | if start.After(now) { 614 | break 615 | } 616 | } 617 | 618 | if *cacheTest3Data { 619 | dataDebugFile.Close() 620 | } 621 | 622 | t.Logf("statTotalUpdates: %d extended: %d totalPoints: %d\n", statTotalUpdates, extended, totalPoints) 623 | // for _, a := range cwhisper.archives { 624 | // t.Logf("%s: %d\n", a.Retention, a.totalPoints()) 625 | // } 626 | 627 | if inMemory { 628 | if err := newMemFile(fpath).dumpOnDisk(fpath); err != nil { 629 | t.Fatal(err) 630 | } 631 | if err := newMemFile(fpath + ".cwsp").dumpOnDisk(fpath + ".cwsp"); err != nil { 632 | t.Fatal(err) 633 | } 634 | } 635 | 636 | // { 637 | // data, err := json.Marshal(psArr) 638 | // if err != nil { 639 | // panic(err) 640 | // } 641 | // if err := ioutil.WriteFile("test3.json", data, 0644); err != nil { 642 | // panic(err) 643 | // } 644 | // } 645 | 646 | if input.fullTest() { 647 | t.Log("go", "run", "cmd/compare.go", "-v", "-now", fmt.Sprintf("%d", now.Unix()), fpath, fpath+".cwsp") 648 | // output, err := exec.Command("go", "run", "cmd/compare.go", "-now", fmt.Sprintf("%d", now.Unix()), fpath, fpath+".cwsp").CombinedOutput() 649 | output, err := Compare(fpath, fpath+".cwsp", int(now.Unix()), false, "", false, false, 2) 650 | 651 | if err != nil { 652 | t.Log(string(output)) 653 | t.Error(err) 654 | } 655 | } 656 | 657 | std, err := os.Stat(fpath) 658 | if err != nil { 659 | t.Error(err) 660 | } 661 | cmp, err := os.Stat(fpath + ".cwsp") 662 | if err != nil { 663 | t.Error(err) 664 | } 665 | t.Logf("compression ratio %s: %.2f%%\n", input.name, float64(cmp.Size()*100)/float64(std.Size())) 666 | }) 667 | } 668 | } 669 | 670 | func TestCompressedWhisperBufferOOOWrite(t *testing.T) { 671 | fpath := "tmp/cwhisper_buffer_ooo_write.wsp" 672 | os.Remove(fpath) 673 | 674 | whisper, err := CreateWithOptions( 675 | fpath, 676 | MustParseRetentionDefs("1m:15d,30m:2y"), 677 | Sum, 678 | 0, 679 | &Options{Compressed: true, PointsPerBlock: 7200}, 680 | ) 681 | if err != nil { 682 | t.Fatal(err) 683 | } 684 | 685 | start := 1544478230 686 | nowTs := start 687 | Now = func() time.Time { return time.Unix(int64(nowTs), 0) } 688 | defer func() { Now = time.Now }() 689 | 690 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 691 | {Time: nowTs - 1800 - 300, Value: 666}, 692 | {Time: nowTs - 1800 - 240, Value: 666}, 693 | {Time: nowTs - 1800 - 180, Value: 666}, 694 | {Time: nowTs - 1800 - 120, Value: 666}, 695 | {Time: nowTs - 1800 - 60, Value: 666}, 696 | {Time: nowTs - 1800 - 60 + 1020, Value: 666}, 697 | 698 | {Time: nowTs - 300, Value: 333}, 699 | {Time: nowTs - 240, Value: 333}, 700 | {Time: nowTs - 180, Value: 333}, 701 | {Time: nowTs - 120, Value: 333}, 702 | {Time: nowTs - 60, Value: 333}, 703 | }); err != nil { 704 | t.Error(err) 705 | } 706 | 707 | nowTs += 3600 708 | 709 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 710 | {Time: nowTs - 300, Value: 222}, 711 | {Time: nowTs - 240, Value: 222}, 712 | {Time: nowTs - 180, Value: 222}, 713 | {Time: nowTs - 120, Value: 222}, 714 | {Time: nowTs - 60, Value: 222}, 715 | }); err != nil { 716 | t.Error(err) 717 | } 718 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 719 | {Time: nowTs + 1800 - 300, Value: 777}, 720 | {Time: nowTs + 1800 - 240, Value: 777}, 721 | {Time: nowTs + 1800 - 180, Value: 777}, 722 | {Time: nowTs + 1800 - 120, Value: 777}, 723 | {Time: nowTs + 1800 - 60, Value: 777}, 724 | }); err != nil { 725 | t.Error(err) 726 | } 727 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 728 | {Time: nowTs - 400, Value: 111}, 729 | {Time: nowTs - 340, Value: 111}, 730 | {Time: nowTs - 280, Value: 111}, 731 | {Time: nowTs - 120, Value: 111}, 732 | {Time: nowTs - 60, Value: 111}, 733 | }); err != nil { 734 | t.Error(err) 735 | } 736 | 737 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 738 | {Time: nowTs + 1800 - 400, Value: 778}, 739 | {Time: nowTs + 1800 - 340, Value: 778}, 740 | {Time: nowTs + 1800 - 280, Value: 778}, 741 | {Time: nowTs + 1800 - 120, Value: 778}, 742 | {Time: nowTs + 1800 - 60, Value: 778}, 743 | }); err != nil { 744 | t.Error(err) 745 | } 746 | 747 | nowTs += 3600 748 | 749 | // flushing buffer value 750 | if err := whisper.UpdateMany([]*TimeSeriesPoint{ 751 | {Time: nowTs, Value: 000}, 752 | {Time: nowTs + 1800, Value: 000}, 753 | }); err != nil { 754 | t.Error(err) 755 | } 756 | 757 | whisper.Close() 758 | 759 | whisper, err = OpenWithOptions(fpath, &Options{}) 760 | if err != nil { 761 | t.Fatal(err) 762 | } 763 | 764 | ts, err := whisper.Fetch(start-3600, start+7200) 765 | if err != nil { 766 | t.Fatal(err) 767 | } 768 | 769 | var points []TimeSeriesPoint 770 | for _, p := range ts.Points() { 771 | if !math.IsNaN(p.Value) { 772 | points = append(points, p) 773 | } 774 | } 775 | 776 | if diff := cmp.Diff(points, []TimeSeriesPoint{ 777 | {Time: 1544476080, Value: 666}, {Time: 1544476140, Value: 666}, {Time: 1544476200, Value: 666}, 778 | {Time: 1544476260, Value: 666}, {Time: 1544476320, Value: 666}, {Time: 1544477340, Value: 666}, 779 | {Time: 1544477880, Value: 333}, {Time: 1544477940, Value: 333}, {Time: 1544478000, Value: 333}, 780 | {Time: 1544478060, Value: 333}, {Time: 1544478120, Value: 333}, 781 | {Time: 1544481420, Value: 111}, {Time: 1544481480, Value: 111}, {Time: 1544481540, Value: 111}, 782 | {Time: 1544481600, Value: 222}, {Time: 1544481660, Value: 111}, {Time: 1544481720, Value: 111}, 783 | {Time: 1544483220, Value: 778}, {Time: 1544483280, Value: 778}, {Time: 1544483340, Value: 778}, 784 | {Time: 1544483400, Value: 777}, {Time: 1544483460, Value: 778}, {Time: 1544483520, Value: 778}, 785 | {Time: 1544485380}, 786 | }, cmpopts.EquateNaNs()); diff != "" { 787 | t.Error(diff) 788 | } 789 | } 790 | 791 | func TestCompressedWhisperSingleRetentionOutOfOrderWrite(t *testing.T) { 792 | fpath := "tmp/test_single_retention_ooo.cwsp" 793 | os.Remove(fpath) 794 | 795 | rets := []*Retention{ 796 | {secondsPerPoint: 1, numberOfPoints: 7200}, 797 | } 798 | cwhisper, err := CreateWithOptions( 799 | fpath, rets, Sum, 0, 800 | &Options{ 801 | Compressed: true, PointsPerBlock: 1200, 802 | InMemory: false, IgnoreNowOnWrite: true, 803 | }, 804 | ) 805 | if err != nil { 806 | panic(err) 807 | } 808 | 809 | now := int(time.Now().Unix()) - 3600 810 | cwhisper.UpdateMany([]*TimeSeriesPoint{ 811 | {Value: 1, Time: now + 0}, 812 | {Value: 1, Time: now + 1}, 813 | {Value: 1, Time: now + 2}, 814 | }) 815 | cwhisper.UpdateMany([]*TimeSeriesPoint{ 816 | {Value: 0, Time: now + 1}, 817 | }) 818 | 819 | data, err := cwhisper.Fetch(now-1, now+2) 820 | if err != nil { 821 | t.Error(err) 822 | } 823 | if got, want := data.Points(), []TimeSeriesPoint{ 824 | {Time: now + 0, Value: 1}, 825 | {Time: now + 1, Value: 1}, 826 | {Time: now + 2, Value: 1}, 827 | }; !reflect.DeepEqual(got, want) { 828 | t.Errorf("data.Points() = %v; want %v", got, want) 829 | } 830 | 831 | cwhisper.Close() 832 | } 833 | 834 | func TestCompressTo(t *testing.T) { 835 | fpath := "compress_to.wsp" 836 | os.Remove(fpath) 837 | 838 | whisper, err := CreateWithOptions( 839 | fpath, 840 | []*Retention{ 841 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 842 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 843 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 844 | }, 845 | Average, 846 | 0, 847 | &Options{Compressed: false, PointsPerBlock: 7200, InMemory: true}, 848 | ) 849 | if err != nil { 850 | panic(err) 851 | } 852 | whisper.Close() 853 | 854 | for _, archive := range whisper.archives { 855 | var ps []*TimeSeriesPoint 856 | for i := 0; i < archive.numberOfPoints; i++ { 857 | start := Now().Add(time.Second * time.Duration(archive.secondsPerPoint*i) * -1) 858 | ps = append(ps, &TimeSeriesPoint{ 859 | // Time: int(start.Add(time.Duration(i) * time.Second).Unix()), 860 | Time: int(start.Unix()), 861 | // Value: float64(i), 862 | // Value: 2000.0 + float64(rand.Intn(100000))/100.0, // skipcq: GSC-G404 863 | // Value: rand.NormFloat64(), // skipcq: GSC-G404 864 | Value: float64(rand.Intn(100000)), // skipcq: GSC-G404 865 | }) 866 | } 867 | whisper, err = OpenWithOptions(fpath, &Options{InMemory: true}) 868 | if err != nil { 869 | t.Fatal(err) 870 | } 871 | if err := whisper.UpdateMany(ps); err != nil { 872 | t.Fatal(err) 873 | } 874 | if err := whisper.Close(); err != nil { 875 | t.Fatal(err) 876 | } 877 | } 878 | whisper.file.(*memFile).dumpOnDisk(fpath) 879 | 880 | whisper, err = OpenWithOptions(fpath, &Options{}) 881 | if err != nil { 882 | t.Fatal(err) 883 | } 884 | os.Remove(fpath + ".cwsp") 885 | if err := whisper.CompressTo(fpath + ".cwsp"); err != nil { 886 | t.Fatal(err) 887 | } 888 | 889 | // output, err := exec.Command("go", "run", "cmd/compare.go", fpath, fpath+".cwsp").CombinedOutput() 890 | t.Log("go", "run", "cmd/compare.go", "-v", fpath, fpath+".cwsp") 891 | output, err := Compare(fpath, fpath+".cwsp", 0, false, "", false, false, 2) 892 | if err != nil { 893 | t.Fatalf("%s: %s", err, output) 894 | } 895 | } 896 | 897 | func TestRandomReadWrite(t *testing.T) { 898 | // os.Remove("test_random_read_write.wsp") 899 | fileTs := time.Now().Unix() 900 | cwhisper, err := CreateWithOptions( 901 | fmt.Sprintf("test_random_read_write.%d.wsp", fileTs), 902 | []*Retention{ 903 | {secondsPerPoint: 1, numberOfPoints: 1728000}, 904 | // {secondsPerPoint: 60, numberOfPoints: 40320}, 905 | // {secondsPerPoint: 3600, numberOfPoints: 17520}, 906 | }, 907 | Sum, 908 | 0, 909 | &Options{Compressed: true, PointsPerBlock: 7200}, 910 | ) 911 | if err != nil { 912 | panic(err) 913 | } 914 | 915 | start := Now() 916 | ptime := start 917 | var ps []*TimeSeriesPoint 918 | var vals []float64 919 | var entropy int 920 | for i := 0; i < cwhisper.Retentions()[0].numberOfPoints; i++ { 921 | gap := rand.Intn(10) + 1 // skipcq: GSC-G404 922 | ptime = ptime.Add(time.Second * time.Duration(gap)) 923 | if ptime.After(start.Add(time.Duration(cwhisper.Retentions()[0].numberOfPoints) * time.Second)) { 924 | break 925 | } 926 | for j := gap; j > 1; j-- { 927 | vals = append(vals, math.NaN()) 928 | } 929 | ts := &TimeSeriesPoint{ 930 | Time: int(ptime.Unix()), 931 | Value: rand.NormFloat64(), // skipcq: GSC-G404 932 | // Value: 2000.0 + float64(rand.Intn(100000))/100.0, // skipcq: GSC-G404 933 | // Value: float64(rand.Intn(100000)), // skipcq: GSC-G404 934 | } 935 | ps = append(ps, ts) 936 | vals = append(vals, ts.Value) 937 | entropy++ 938 | } 939 | 940 | if err := cwhisper.UpdateMany(ps); err != nil { 941 | t.Fatal(err) 942 | } 943 | 944 | Now = func() time.Time { return time.Unix(int64(ps[len(ps)-1].Time), 0) } 945 | defer func() { Now = time.Now }() 946 | 947 | ts, err := cwhisper.Fetch(int(start.Unix()), int(ptime.Unix())) 948 | if err != nil { 949 | t.Fatal(err) 950 | } 951 | 952 | // log.Printf("entropy = %+v\n", entropy) 953 | // log.Printf("len(vals) = %+v\n", len(vals)) 954 | // log.Printf("len(ts.Values()) = %+v\n", len(ts.Values())) 955 | 956 | if diff := cmp.Diff(ts.Values(), vals, cmp.AllowUnexported(dataPoint{}), cmpopts.EquateNaNs()); diff != "" { 957 | // t.Error(diff) 958 | t.Error("mismatch") 959 | cache, err := os.Create(fmt.Sprintf("test_random_read_write.%d.json", fileTs)) 960 | if err != nil { 961 | t.Fatal(err) 962 | } 963 | if err := json.NewEncoder(cache).Encode(ps); err != nil { 964 | t.Fatal(err) 965 | } 966 | cache.Close() 967 | } 968 | 969 | if err := cwhisper.Close(); err != nil { 970 | t.Fatal(err) 971 | } 972 | } 973 | 974 | func TestFillCompressed(t *testing.T) { 975 | fpath := "fill.wsp" 976 | os.Remove(fpath) 977 | os.Remove(fpath + ".cwsp") 978 | 979 | standard, err := CreateWithOptions( 980 | fpath, 981 | []*Retention{ 982 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 983 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 984 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 985 | }, 986 | Average, 987 | 0, 988 | &Options{Compressed: false, PointsPerBlock: 7200, InMemory: true}, 989 | ) 990 | if err != nil { 991 | panic(err) 992 | } 993 | 994 | points := []*TimeSeriesPoint{} 995 | twoYearsAgo := Now().Add(time.Hour * 24 * 365 * -2) 996 | for i := 0; i < 2*365*24-28*24; i++ { 997 | points = append(points, &TimeSeriesPoint{ 998 | Time: int(twoYearsAgo.Add(time.Hour * time.Duration(i)).Unix()), 999 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1000 | }) 1001 | } 1002 | if err := standard.UpdateMany(points); err != nil { 1003 | t.Error(err) 1004 | } 1005 | 1006 | points = []*TimeSeriesPoint{} 1007 | oneMonthAgo := Now().Add(time.Hour * 24 * -28) 1008 | for i := 0; i < 28*24*60-2*24*60; i++ { 1009 | points = append(points, &TimeSeriesPoint{ 1010 | Time: int(oneMonthAgo.Add(time.Minute * time.Duration(i)).Unix()), 1011 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1012 | }) 1013 | } 1014 | if err := standard.UpdateMany(points); err != nil { 1015 | t.Error(err) 1016 | } 1017 | 1018 | compressed, err := CreateWithOptions( 1019 | fpath+".cwsp", 1020 | []*Retention{ 1021 | {secondsPerPoint: 1, numberOfPoints: 172800, avgCompressedPointSize: 9}, // 1s:2d 1022 | {secondsPerPoint: 60, numberOfPoints: 40320, avgCompressedPointSize: 9}, // 1m:28d 1023 | {secondsPerPoint: 3600, numberOfPoints: 17520, avgCompressedPointSize: 9}, // 1h:2y 1024 | }, 1025 | Average, 1026 | 0, 1027 | &Options{Compressed: true, PointsPerBlock: 7200, InMemory: true}, 1028 | ) 1029 | if err != nil { 1030 | panic(err) 1031 | } 1032 | points = []*TimeSeriesPoint{} 1033 | twoDaysAgo := Now().Add(time.Hour * 24 * -2) 1034 | for i := 0; i < 60*60*24*2; i++ { 1035 | points = append(points, &TimeSeriesPoint{ 1036 | Time: int(twoDaysAgo.Add(time.Second * time.Duration(i)).Unix()), 1037 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1038 | }) 1039 | } 1040 | if err := compressed.UpdateMany(points); err != nil { 1041 | t.Error(err) 1042 | } 1043 | 1044 | if err := compressed.file.(*memFile).dumpOnDisk(fpath + ".original.cwsp"); err != nil { 1045 | t.Error(err) 1046 | } 1047 | 1048 | if err := compressed.FillCompressed(standard); err != nil { 1049 | t.Error(err) 1050 | } 1051 | 1052 | if err := compressed.file.(*memFile).dumpOnDisk(fpath + ".cwsp"); err != nil { 1053 | t.Error(err) 1054 | } 1055 | if err := standard.file.(*memFile).dumpOnDisk(fpath); err != nil { 1056 | t.Error(err) 1057 | } 1058 | 1059 | t.Log("comparing 2 years archive") 1060 | compareWhisperFiles(t, compressed, standard, int(twoYearsAgo.Unix()), int(Now().Add(time.Hour*24*-28).Unix())) 1061 | t.Log("comparing 1 month archive") 1062 | compareWhisperFiles(t, compressed, standard, int(oneMonthAgo.Add(time.Hour).Unix()), int(Now().Add(time.Hour*24*-2-time.Hour).Unix())) 1063 | 1064 | oldCompressed, err := OpenWithOptions(fpath+".original.cwsp", &Options{}) 1065 | if err != nil { 1066 | t.Error(err) 1067 | } 1068 | t.Log("comparing 2 days archive") 1069 | compareWhisperFiles(t, compressed, oldCompressed, int(Now().Add(time.Hour*24*-2+time.Hour).Unix()), int(Now().Unix())) 1070 | } 1071 | 1072 | func compareWhisperFiles(t *testing.T, w1, w2 *Whisper, from, until int) { 1073 | vals1, err := w1.Fetch(from, until) 1074 | if err != nil { 1075 | t.Error(err) 1076 | return 1077 | } 1078 | vals2, err := w2.Fetch(from, until) 1079 | if err != nil { 1080 | t.Error(err) 1081 | return 1082 | } 1083 | 1084 | var diff, same, nan int 1085 | for i := 0; i < len(vals1.values); i++ { 1086 | vc := vals1.values[i] 1087 | vs := vals2.values[i] 1088 | if math.IsNaN(vc) && math.IsNaN(vs) { 1089 | same++ 1090 | nan++ 1091 | } else if vc != vs { 1092 | t.Errorf("%d/%d %d (%s): %v != %v\n", i, len(vals1.values), vals1.fromTime+i*vals1.step, time.Unix(int64(vals1.fromTime+i*vals1.step), 0), vc, vs) 1093 | diff++ 1094 | } else { 1095 | same++ 1096 | } 1097 | } 1098 | fromt, untilt := time.Unix(int64(from), 0), time.Unix(int64(until), 0) 1099 | t.Logf("from %s until %s %s", fromt, untilt, untilt.Sub(fromt)) 1100 | if diff > 0 { 1101 | t.Errorf("diff = %d", diff) 1102 | t.Errorf("same = %d", same) 1103 | t.Errorf("nan = %d", nan) 1104 | } else { 1105 | t.Logf("diff = %d", diff) 1106 | t.Logf("same = %d", same) 1107 | t.Logf("nan = %d", nan) 1108 | } 1109 | } 1110 | 1111 | func TestSanitizeAvgCompressedPointSizeOnCreate(t *testing.T) { 1112 | var cases = []struct { 1113 | rets []*Retention 1114 | expect float32 1115 | }{ 1116 | { 1117 | rets: []*Retention{ 1118 | {secondsPerPoint: 1, numberOfPoints: 100, avgCompressedPointSize: math.Float32frombits(0xffc00000)}, // 32bits nan 1119 | {secondsPerPoint: 5, numberOfPoints: 100}, 1120 | }, 1121 | expect: avgCompressedPointSize, 1122 | }, 1123 | { 1124 | rets: []*Retention{ 1125 | {secondsPerPoint: 1, numberOfPoints: 100, avgCompressedPointSize: float32(math.NaN())}, // 62 bits nan 1126 | {secondsPerPoint: 5, numberOfPoints: 100}, 1127 | }, 1128 | expect: avgCompressedPointSize, 1129 | }, 1130 | { 1131 | rets: []*Retention{ 1132 | {secondsPerPoint: 1, numberOfPoints: 100, avgCompressedPointSize: 0}, 1133 | {secondsPerPoint: 5, numberOfPoints: 100}, 1134 | }, 1135 | expect: avgCompressedPointSize, 1136 | }, 1137 | { 1138 | rets: []*Retention{ 1139 | {secondsPerPoint: 1, numberOfPoints: 100, avgCompressedPointSize: -10}, 1140 | {secondsPerPoint: 5, numberOfPoints: 100}, 1141 | }, 1142 | expect: avgCompressedPointSize, 1143 | }, 1144 | { 1145 | rets: []*Retention{ 1146 | {secondsPerPoint: 1, numberOfPoints: 100, avgCompressedPointSize: 65536}, 1147 | {secondsPerPoint: 5, numberOfPoints: 100}, 1148 | }, 1149 | expect: MaxCompressedPointSize, 1150 | }, 1151 | } 1152 | for _, c := range cases { 1153 | fpath := "tmp/extend.whisper" 1154 | os.Remove(fpath) 1155 | whisper, err := CreateWithOptions( 1156 | fpath, 1157 | c.rets, 1158 | Sum, 1159 | 0.7, 1160 | &Options{Compressed: true, PointsPerBlock: 7200}, 1161 | ) 1162 | if err != nil { 1163 | panic(err) 1164 | } 1165 | whisper.Close() 1166 | 1167 | whisper, err = OpenWithOptions(fpath, &Options{Compressed: true, PointsPerBlock: 7200}) 1168 | if err != nil { 1169 | t.Fatal(err) 1170 | } 1171 | if got, want := whisper.archives[0].avgCompressedPointSize, c.expect; got != want { 1172 | t.Errorf("whisper.archives[0].avgCompressedPointSize = %f; want %f", got, want) 1173 | } 1174 | } 1175 | } 1176 | 1177 | func TestEstimatePointSize(t *testing.T) { 1178 | cases := []struct { 1179 | input []dataPoint 1180 | expect float32 1181 | }{ 1182 | // 0 datapoints 1183 | {input: []dataPoint{}, expect: avgCompressedPointSize}, 1184 | // not enough datapoints 1185 | { 1186 | input: []dataPoint{ 1187 | {interval: 1543449600, value: 5}, 1188 | {interval: 1543478400, value: 5}, 1189 | }, 1190 | expect: avgCompressedPointSize, 1191 | }, 1192 | } 1193 | for _, c := range cases { 1194 | // not enough datapoints 1195 | size := estimatePointSize(c.input, &Retention{secondsPerPoint: 10, numberOfPoints: 17280}, DefaultPointsPerBlock) 1196 | if got, want := size, c.expect; got != want { 1197 | t.Errorf("size = %f; want %f", got, want) 1198 | } 1199 | } 1200 | 1201 | // for i := 0; i < 500; i += 10 { 1202 | // var ds []dataPoint 1203 | // var start = 1543449600 1204 | // for j := 0; j < i; j++ { 1205 | // ds = append(ds, dataPoint{interval: start, value: rand.NormFloat64()}) // skipcq: GSC-G404 1206 | // // ds = append(ds, dataPoint{interval: start, value: 10}) 1207 | // 1208 | // start += 1 1209 | // // start += rand.Int() // skipcq: GSC-G404 1210 | // } 1211 | // size := estimatePointSize(ds, &Retention{secondsPerPoint: 10, numberOfPoints: 17280}, DefaultPointsPerBlock) 1212 | // fmt.Printf("%d: %f\n", i, size) 1213 | // } 1214 | } 1215 | 1216 | func TestFillCompressedMix(t *testing.T) { 1217 | srcPath := "tmp/fill-mix.src.cwsp" 1218 | dstPath := "tmp/fill-mix.dst.cwsp" 1219 | os.Remove(srcPath) 1220 | os.Remove(dstPath) 1221 | 1222 | srcMix, err := CreateWithOptions( 1223 | srcPath, 1224 | []*Retention{ 1225 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 1226 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 1227 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 1228 | }, 1229 | Mix, 1230 | 0, 1231 | &Options{ 1232 | Compressed: true, PointsPerBlock: 7200, InMemory: true, 1233 | MixAggregationSpecs: []MixAggregationSpec{ 1234 | {Method: Average, Percentile: 0}, 1235 | {Method: Sum, Percentile: 0}, 1236 | {Method: Last, Percentile: 0}, 1237 | {Method: Max, Percentile: 0}, 1238 | {Method: Min, Percentile: 0}, 1239 | {Method: Percentile, Percentile: 50}, 1240 | {Method: Percentile, Percentile: 95}, 1241 | {Method: Percentile, Percentile: 99}, 1242 | }, 1243 | }, 1244 | ) 1245 | if err != nil { 1246 | panic(err) 1247 | } 1248 | 1249 | var points []*TimeSeriesPoint 1250 | var limit int 1251 | var start = 1544478600 1252 | var now = start 1253 | Now = func() time.Time { return time.Unix(int64(now), 0) } 1254 | nowNext := func() time.Time { now++; return Now() } 1255 | defer func() { Now = time.Now }() 1256 | 1257 | limit = 300 + rand.Intn(100) // skipcq: GSC-G404 1258 | for i, end := 0, 60*60*24*80; i < end; i++ { 1259 | points = append(points, &TimeSeriesPoint{ 1260 | Time: int(nowNext().Unix()), 1261 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1262 | }) 1263 | 1264 | if len(points) > limit || i == end-1 { 1265 | limit = 300 + rand.Intn(100) // skipcq: GSC-G404 1266 | if err := srcMix.UpdateMany(points); err != nil { 1267 | t.Error(err) 1268 | } 1269 | points = points[:0] 1270 | } 1271 | } 1272 | 1273 | dstMix, err := CreateWithOptions( 1274 | dstPath, 1275 | []*Retention{ 1276 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 1277 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 1278 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 1279 | }, 1280 | Mix, 1281 | 0, 1282 | &Options{ 1283 | Compressed: true, PointsPerBlock: 7200, InMemory: true, 1284 | MixAggregationSpecs: []MixAggregationSpec{ 1285 | {Method: Average, Percentile: 0}, 1286 | {Method: Sum, Percentile: 0}, 1287 | {Method: Last, Percentile: 0}, 1288 | {Method: Max, Percentile: 0}, 1289 | {Method: Min, Percentile: 0}, 1290 | {Method: Percentile, Percentile: 50}, 1291 | {Method: Percentile, Percentile: 95}, 1292 | {Method: Percentile, Percentile: 99}, 1293 | }, 1294 | }, 1295 | ) 1296 | if err != nil { 1297 | panic(err) 1298 | } 1299 | points = []*TimeSeriesPoint{} 1300 | limit = 300 + rand.Intn(100) // skipcq: GSC-G404 1301 | for i, end := 0, 60*60*24*2; i < end; i++ { 1302 | points = append(points, &TimeSeriesPoint{ 1303 | Time: int(nowNext().Unix()), 1304 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1305 | }) 1306 | 1307 | if len(points) > limit || i == end-1 { 1308 | limit = 300 + rand.Intn(100) // skipcq: GSC-G404 1309 | if err := dstMix.UpdateMany(points); err != nil { 1310 | t.Error(err) 1311 | } 1312 | points = points[:0] 1313 | } 1314 | } 1315 | if err := dstMix.UpdateMany(points); err != nil { 1316 | t.Error(err) 1317 | } 1318 | 1319 | if err := dstMix.file.(*memFile).dumpOnDisk(dstPath + ".bak"); err != nil { 1320 | t.Error(err) 1321 | } 1322 | 1323 | if err := dstMix.FillCompressed(srcMix); err != nil { 1324 | t.Error(err) 1325 | } 1326 | 1327 | if err := dstMix.file.(*memFile).dumpOnDisk(dstPath); err != nil { 1328 | t.Error(err) 1329 | } 1330 | if err := srcMix.file.(*memFile).dumpOnDisk(srcPath); err != nil { 1331 | t.Error(err) 1332 | } 1333 | 1334 | // TODO: merge with compareWhisperFiles 1335 | compare := func(w1, w2 *Whisper, from, until int) { 1336 | valsc, err := w1.Fetch(from, until) 1337 | if err != nil { 1338 | t.Error(err) 1339 | } 1340 | valss, err := w2.Fetch(from, until) 1341 | if err != nil { 1342 | t.Error(err) 1343 | } 1344 | t.Logf(" dst %d src %d", len(valsc.values), len(valss.values)) 1345 | var diff, same, nonNans int 1346 | for i := 0; i < len(valsc.values); i++ { 1347 | vc := valsc.values[i] 1348 | vs := valss.values[i] 1349 | if math.IsNaN(vc) && math.IsNaN(vs) { 1350 | same++ 1351 | } else if vc != vs { 1352 | t.Errorf("%d/%d %d: %v != %v\n", i, len(valsc.values), valsc.fromTime+i*valsc.step, vc, vs) 1353 | diff++ 1354 | nonNans++ 1355 | } else { 1356 | same++ 1357 | nonNans++ 1358 | } 1359 | } 1360 | if diff > 0 { 1361 | t.Errorf(" diff %d", diff) 1362 | t.Errorf(" same %d", same) 1363 | } 1364 | t.Logf(" non-nans %d", nonNans) 1365 | } 1366 | 1367 | t.Log("comparing 2 years archive") 1368 | compare(dstMix, srcMix, now-365*24*60*60, now-28*24*60*60) 1369 | t.Log("comparing 1 month archive") 1370 | compare(dstMix, srcMix, now-28*24*60*60, now-30*2*60*60) 1371 | 1372 | oldDstMix, err := OpenWithOptions(dstPath+".bak", &Options{}) 1373 | if err != nil { 1374 | t.Error(err) 1375 | } 1376 | t.Log("comparing 2 days archive") 1377 | compare(dstMix, oldDstMix, int(Now().Add(time.Hour*24*-2+time.Hour).Unix()), int(Now().Unix())) 1378 | } 1379 | 1380 | func TestFetchCompressedMix(t *testing.T) { 1381 | srcPath := "tmp/fetch-mix.cwsp" 1382 | os.Remove(srcPath) 1383 | 1384 | srcMix, err := CreateWithOptions( 1385 | srcPath, 1386 | []*Retention{ 1387 | {secondsPerPoint: 1, numberOfPoints: 60 * 60}, // 1s:1h 1388 | {secondsPerPoint: 60, numberOfPoints: 3 * 60}, // 1m:3h 1389 | {secondsPerPoint: 600, numberOfPoints: 6 * 6}, // 10m:6h 1390 | }, 1391 | Mix, 1392 | 0, 1393 | &Options{ 1394 | Compressed: true, PointsPerBlock: 7200, InMemory: true, 1395 | MixAggregationSpecs: []MixAggregationSpec{ 1396 | {Method: Average, Percentile: 0}, 1397 | {Method: Sum, Percentile: 0}, 1398 | {Method: Last, Percentile: 0}, 1399 | {Method: Max, Percentile: 0}, 1400 | {Method: Min, Percentile: 0}, 1401 | {Method: Percentile, Percentile: 50}, 1402 | {Method: Percentile, Percentile: 95}, 1403 | {Method: Percentile, Percentile: 99}, 1404 | }, 1405 | }, 1406 | ) 1407 | if err != nil { 1408 | panic(err) 1409 | } 1410 | 1411 | points := []*TimeSeriesPoint{} 1412 | start := 1544478600 1413 | now := start 1414 | Now = func() time.Time { return time.Unix(int64(now), 0) } 1415 | defer func() { Now = time.Now }() 1416 | 1417 | for i, total := 0, 4*60*60; i < total; i++ { 1418 | points = append(points, &TimeSeriesPoint{ 1419 | Time: int(Now().Unix()), 1420 | Value: float64(i), 1421 | }) 1422 | now++ 1423 | 1424 | // To trigger frequent aggregations. Because of the current 1425 | // implementation logics if all data points are updated in a single 1426 | // function call, only one aggregation is triggered. 1427 | if len(points) > 1000 || i == total-1 { 1428 | if err := srcMix.UpdateMany(points); err != nil { 1429 | t.Error(err) 1430 | } 1431 | points = points[:0] 1432 | } 1433 | } 1434 | 1435 | if err := srcMix.file.(*memFile).dumpOnDisk(srcPath); err != nil { 1436 | t.Error(err) 1437 | } 1438 | 1439 | t.Run("Check1stArchive", func(t *testing.T) { 1440 | data, err := srcMix.FetchByAggregation(now-10, now, &MixAggregationSpec{Method: Min}) 1441 | if err != nil { 1442 | t.Fatal(err) 1443 | } 1444 | if diff := cmp.Diff(data.Points(), []TimeSeriesPoint{ 1445 | {Time: 1544492991, Value: 14391}, {Time: 1544492992, Value: 14392}, {Time: 1544492993, Value: 14393}, 1446 | {Time: 1544492994, Value: 14394}, {Time: 1544492995, Value: 14395}, {Time: 1544492996, Value: 14396}, 1447 | {Time: 1544492997, Value: 14397}, {Time: 1544492998, Value: 14398}, {Time: 1544492999, Value: 14399}, 1448 | {Time: 1544493000, Value: math.NaN()}, 1449 | }, cmp.AllowUnexported(TimeSeriesPoint{}), cmpopts.EquateNaNs()); diff != "" { 1450 | t.Error(diff) 1451 | } 1452 | }) 1453 | t.Run("Check2ndArchiveMin", func(t *testing.T) { 1454 | data, err := srcMix.FetchByAggregation(now-2*60*60, now, &MixAggregationSpec{Method: Min}) 1455 | if err != nil { 1456 | t.Fatal(err) 1457 | } 1458 | if diff := cmp.Diff(data.Points()[len(data.Points())-42:], []TimeSeriesPoint{ 1459 | {Time: 1544490540, Value: 11940}, {Time: 1544490600, Value: 12000}, {Time: 1544490660, Value: 12060}, 1460 | {Time: 1544490720, Value: 12120}, {Time: 1544490780, Value: 12180}, {Time: 1544490840, Value: 12240}, 1461 | {Time: 1544490900, Value: 12300}, {Time: 1544490960, Value: 12360}, {Time: 1544491020, Value: 12420}, 1462 | {Time: 1544491080, Value: 12480}, {Time: 1544491140, Value: 12540}, {Time: 1544491200, Value: 12600}, 1463 | {Time: 1544491260, Value: 12660}, {Time: 1544491320, Value: 12720}, {Time: 1544491380, Value: 12780}, 1464 | {Time: 1544491440, Value: 12840}, {Time: 1544491500, Value: 12900}, {Time: 1544491560, Value: 12960}, 1465 | {Time: 1544491620, Value: 13020}, {Time: 1544491680, Value: 13080}, {Time: 1544491740, Value: 13140}, 1466 | {Time: 1544491800, Value: 13200}, {Time: 1544491860, Value: 13260}, {Time: 1544491920, Value: 13320}, 1467 | {Time: 1544491980, Value: 13380}, {Time: 1544492040, Value: 13440}, {Time: 1544492100, Value: 13500}, 1468 | {Time: 1544492160, Value: 13560}, {Time: 1544492220, Value: 13620}, {Time: 1544492280, Value: 13680}, 1469 | {Time: 1544492340, Value: 13740}, {Time: 1544492400, Value: 13800}, {Time: 1544492460, Value: 13860}, 1470 | {Time: 1544492520, Value: 13920}, {Time: 1544492580, Value: 13980}, {Time: 1544492640, Value: 14040}, 1471 | {Time: 1544492700, Value: 14100}, {Time: 1544492760, Value: 14160}, {Time: 1544492820, Value: 14220}, 1472 | {Time: 1544492880, Value: 14280}, {Time: 1544492940, Value: 14340}, {Time: 1544493000, Value: math.NaN()}, 1473 | }, cmp.AllowUnexported(TimeSeriesPoint{}), cmpopts.EquateNaNs()); diff != "" { 1474 | t.Error(diff) 1475 | } 1476 | }) 1477 | t.Run("Check3rdArchiveMin", func(t *testing.T) { 1478 | data, err := srcMix.FetchByAggregation(start, now, &MixAggregationSpec{Method: Min}) 1479 | if err != nil { 1480 | t.Fatal(err) 1481 | } 1482 | if diff := cmp.Diff(data.Points(), []TimeSeriesPoint{ 1483 | {Time: 1544479200, Value: 600}, {Time: 1544479800, Value: 1200}, {Time: 1544480400, Value: 1800}, 1484 | {Time: 1544481000, Value: 2400}, {Time: 1544481600, Value: 3000}, {Time: 1544482200, Value: 3600}, 1485 | {Time: 1544482800, Value: 4200}, {Time: 1544483400, Value: 4800}, {Time: 1544484000, Value: 5400}, 1486 | {Time: 1544484600, Value: 6000}, {Time: 1544485200, Value: 6600}, {Time: 1544485800, Value: 7200}, 1487 | {Time: 1544486400, Value: 7800}, {Time: 1544487000, Value: 8400}, {Time: 1544487600, Value: 9000}, 1488 | {Time: 1544488200, Value: 9600}, {Time: 1544488800, Value: 10200}, {Time: 1544489400, Value: 10800}, 1489 | {Time: 1544490000, Value: 11400}, {Time: 1544490600, Value: 12000}, {Time: 1544491200, Value: 12600}, 1490 | {Time: 1544491800, Value: 13200}, {Time: 1544492400, Value: 13800}, {Time: 1544493000, Value: math.NaN()}, 1491 | }, cmp.AllowUnexported(TimeSeriesPoint{}), cmpopts.EquateNaNs()); diff != "" { 1492 | t.Error(diff) 1493 | } 1494 | }) 1495 | t.Run("Check3rdArchiveSum", func(t *testing.T) { 1496 | data, err := srcMix.FetchByAggregation(start, now, &MixAggregationSpec{Method: Sum}) 1497 | if err != nil { 1498 | t.Fatal(err) 1499 | } 1500 | if diff := cmp.Diff(data.Points(), []TimeSeriesPoint{ 1501 | {Time: 1544479200, Value: 539700}, {Time: 1544479800, Value: 899700}, 1502 | {Time: 1544480400, Value: 1.2597e+06}, {Time: 1544481000, Value: 1.6197e+06}, 1503 | {Time: 1544481600, Value: 1.9797e+06}, {Time: 1544482200, Value: 2.3397e+06}, 1504 | {Time: 1544482800, Value: 2.6997e+06}, {Time: 1544483400, Value: 3.0597e+06}, 1505 | {Time: 1544484000, Value: 3.4197e+06}, {Time: 1544484600, Value: 3.7797e+06}, 1506 | {Time: 1544485200, Value: 4.1397e+06}, {Time: 1544485800, Value: 4.4997e+06}, 1507 | {Time: 1544486400, Value: 4.8597e+06}, {Time: 1544487000, Value: 5.2197e+06}, 1508 | {Time: 1544487600, Value: 5.5797e+06}, {Time: 1544488200, Value: 5.9397e+06}, 1509 | {Time: 1544488800, Value: 6.2997e+06}, {Time: 1544489400, Value: 6.6597e+06}, 1510 | {Time: 1544490000, Value: 7.0197e+06}, {Time: 1544490600, Value: 7.3797e+06}, 1511 | {Time: 1544491200, Value: 7.7397e+06}, {Time: 1544491800, Value: 8.0997e+06}, 1512 | {Time: 1544492400, Value: 8.4597e+06}, {Time: 1544493000, Value: math.NaN()}, 1513 | }, cmp.AllowUnexported(TimeSeriesPoint{}), cmpopts.EquateNaNs()); diff != "" { 1514 | t.Error(diff) 1515 | } 1516 | }) 1517 | 1518 | t.Run("CheckDuplicateDataPoints", func(t *testing.T) { 1519 | for i, arc := range srcMix.archives { 1520 | m := map[int]bool{} 1521 | for _, block := range arc.blockRanges { 1522 | if block.start == 0 { 1523 | continue 1524 | } 1525 | 1526 | buf := make([]byte, arc.blockSize) 1527 | if err := arc.whisper.fileReadAt(buf, int64(arc.blockOffset(block.index))); err != nil { 1528 | panic(err) 1529 | } 1530 | 1531 | dps, _, err := arc.ReadFromBlock(buf, []dataPoint{}, block.start, block.end) 1532 | if err != nil { 1533 | panic(err) 1534 | } 1535 | 1536 | for _, dp := range dps { 1537 | if m[dp.interval] { 1538 | var spec string 1539 | if i > 0 { 1540 | spec = " " + arc.aggregationSpec.String() 1541 | } 1542 | t.Errorf("archive %d %s%s contains a duplicate timestamp: %d", i, arc.String(), spec, dp.interval) 1543 | } else { 1544 | m[dp.interval] = true 1545 | } 1546 | } 1547 | } 1548 | } 1549 | }) 1550 | } 1551 | 1552 | func BenchmarkWriteCompressed(b *testing.B) { 1553 | fpath := "tmp/benchmark_write.cwsp" 1554 | os.Remove(fpath) 1555 | cwhisper, err := CreateWithOptions( 1556 | fpath, 1557 | []*Retention{ 1558 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 1559 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 1560 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 1561 | }, 1562 | Sum, 1563 | 0, 1564 | &Options{Compressed: true, PointsPerBlock: 7200}, 1565 | ) 1566 | if err != nil { 1567 | b.Fatal(err) 1568 | } 1569 | 1570 | start := Now() 1571 | var ps, history []*TimeSeriesPoint 1572 | for i := 0; i < b.N; i++ { 1573 | p := &TimeSeriesPoint{ 1574 | Time: int(start.Add(time.Duration(i) * time.Second).Unix()), 1575 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1576 | } 1577 | history = append(history, p) 1578 | ps = append(ps, p) 1579 | 1580 | if len(ps) >= 300 { 1581 | if err := cwhisper.UpdateMany(ps); err != nil { 1582 | b.Fatal(err) 1583 | } 1584 | ps = ps[:0] 1585 | } 1586 | } 1587 | if err := cwhisper.Close(); err != nil { 1588 | b.Fatal(err) 1589 | } 1590 | td, err := os.Create("test_data") 1591 | if err != nil { 1592 | b.Fatal(err) 1593 | } 1594 | if err := json.NewEncoder(td).Encode(history); err != nil { 1595 | b.Fatal(err) 1596 | } 1597 | if err := td.Close(); err != nil { 1598 | b.Fatal(err) 1599 | } 1600 | } 1601 | 1602 | func BenchmarkReadCompressed(b *testing.B) { 1603 | fpath := "tmp/benchmark_write.cwsp" 1604 | cwhisper, err := OpenWithOptions(fpath, &Options{}) 1605 | if err != nil { 1606 | b.Fatal(err) 1607 | } 1608 | 1609 | for i := 0; i < b.N; i++ { 1610 | _, err := cwhisper.Fetch(int(time.Now().Add(-48*time.Hour).Unix()), int(time.Now().Unix())) 1611 | if err != nil { 1612 | b.Fatal(err) 1613 | } 1614 | } 1615 | if err := cwhisper.Close(); err != nil { 1616 | b.Fatal(err) 1617 | } 1618 | } 1619 | 1620 | func BenchmarkReadStandard(b *testing.B) { 1621 | fpath := "tmp/benchmark_write.wsp" 1622 | cwhisper, err := OpenWithOptions(fpath, &Options{}) 1623 | if err != nil { 1624 | b.Fatal(err) 1625 | } 1626 | 1627 | for i := 0; i < b.N; i++ { 1628 | _, err := cwhisper.Fetch(int(time.Now().Add(-48*time.Hour).Unix()), int(time.Now().Unix())) 1629 | if err != nil { 1630 | b.Fatal(err) 1631 | } 1632 | } 1633 | if err := cwhisper.Close(); err != nil { 1634 | b.Fatal(err) 1635 | } 1636 | } 1637 | 1638 | func BenchmarkWriteStandard(b *testing.B) { 1639 | fpath := "tmp/benchmark_write.wsp" 1640 | os.Remove(fpath) 1641 | cwhisper, err := CreateWithOptions( 1642 | fpath, 1643 | []*Retention{ 1644 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 1645 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 1646 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 1647 | }, 1648 | Sum, 1649 | 0, 1650 | &Options{Compressed: false, PointsPerBlock: 7200}, 1651 | ) 1652 | if err != nil { 1653 | b.Fatal(err) 1654 | } 1655 | 1656 | start := Now() 1657 | var ps []*TimeSeriesPoint 1658 | for i := 0; i < b.N; i++ { 1659 | ps = append(ps, &TimeSeriesPoint{ 1660 | Time: int(start.Add(time.Duration(i) * time.Second).Unix()), 1661 | Value: rand.NormFloat64(), // skipcq: GSC-G404 1662 | }) 1663 | 1664 | if len(ps) >= 300 { 1665 | if err := cwhisper.UpdateMany(ps); err != nil { 1666 | b.Fatal(err) 1667 | } 1668 | ps = ps[:0] 1669 | 1670 | } 1671 | } 1672 | if err := cwhisper.Close(); err != nil { 1673 | b.Fatal(err) 1674 | } 1675 | } 1676 | 1677 | func TestAggregatePercentile(t *testing.T) { 1678 | for _, c := range []struct { 1679 | percentile float32 1680 | expect float64 1681 | vals []float64 1682 | }{ 1683 | {50, 4.0, []float64{1, 2, 3, 4, 5, 6, 7}}, 1684 | {50, 4.5, []float64{1, 2, 3, 4, 5, 6, 7, 8}}, 1685 | {50, 1.0, []float64{1}}, 1686 | {50, math.NaN(), []float64{}}, 1687 | {90, 9.1, []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}, 1688 | {99, 99.01, func() []float64 { 1689 | var vals []float64 1690 | for i := 0; i < 100; i++ { 1691 | vals = append(vals, float64(i+1)) 1692 | } 1693 | return vals 1694 | }()}, 1695 | {99.9, 999.001015, func() []float64 { 1696 | var vals []float64 1697 | for i := 0; i < 1000; i++ { 1698 | vals = append(vals, float64(i+1)) 1699 | } 1700 | return vals 1701 | }()}, 1702 | } { 1703 | if got, want := aggregatePercentile(c.percentile, c.vals), c.expect; math.Trunc(got*10000) != math.Trunc(want*10000) && !(math.IsNaN(got) && math.IsNaN(want)) { 1704 | t.Errorf("aggregatePercentile(%.2f, %v) = %f; want %f", c.percentile, c.vals, got, want) 1705 | } 1706 | } 1707 | } 1708 | -------------------------------------------------------------------------------- /debug.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "math" 7 | "os" 8 | "strings" 9 | "sync" 10 | "time" 11 | ) 12 | 13 | func (whisper *Whisper) CheckIntegrity() error { 14 | meta := make([]byte, whisper.MetadataSize()) 15 | if err := whisper.fileReadAt(meta, 0); err != nil { 16 | panic(err) 17 | } 18 | 19 | // set crc32 in the header to 0 for re-calculation 20 | copy(meta[whisper.crc32Offset():], make([]byte, 4)) 21 | 22 | var msg string 23 | var metacrc = crc32(meta, 0) 24 | if metacrc != whisper.crc32 { 25 | msg += fmt.Sprintf(" header crc: disk: %08x cal: %08x\n", whisper.crc32, metacrc) 26 | } 27 | 28 | for _, arc := range whisper.archives { 29 | if arc.avgCompressedPointSize <= 0.0 || math.IsNaN(float64(arc.avgCompressedPointSize)) { 30 | msg += fmt.Sprintf(" archive.%s has bad avgCompressedPointSize: %f\n", arc.Retention, arc.avgCompressedPointSize) 31 | } 32 | for _, block := range arc.blockRanges { 33 | if block.start == 0 { 34 | continue 35 | } 36 | 37 | buf := make([]byte, arc.blockSize) 38 | if err := whisper.fileReadAt(buf, int64(arc.blockOffset(block.index))); err != nil { 39 | panic(err) 40 | } 41 | _, _, err := arc.ReadFromBlock(buf, []dataPoint{}, 0, maxInt) 42 | if err != nil { 43 | panic(err) 44 | } 45 | 46 | endOffset := arc.blockSize 47 | if block.index == arc.cblock.index { 48 | endOffset = arc.cblock.lastByteOffset - arc.blockOffset(block.index) 49 | } 50 | 51 | crc := crc32(buf[:endOffset], 0) 52 | if crc != block.crc32 { 53 | msg += fmt.Sprintf(" archive.%s.block.%d crc32: %08x check: %08x startOffset: %d endOffset: %d/%d\n", arc.Retention, block.index, block.crc32, crc, arc.blockOffset(block.index), endOffset, int(arc.blockOffset(block.index))+endOffset) 54 | } 55 | } 56 | } 57 | 58 | if msg != "" { 59 | return errors.New(msg) 60 | } 61 | return nil 62 | } 63 | 64 | // skipcq: RVV-A0005 65 | func (whisper *Whisper) Dump(all, showDecompressionInfo bool) { 66 | debugCompress = showDecompressionInfo 67 | 68 | fmt.Printf("compressed: %t\n", whisper.compressed) 69 | fmt.Printf("aggregation_method: %s\n", whisper.aggregationMethod) 70 | fmt.Printf("max_retention: %d\n", whisper.maxRetention) 71 | fmt.Printf("x_files_factor: %f\n", whisper.xFilesFactor) 72 | if whisper.compressed { 73 | whisper.compressed = false 74 | ssize := whisper.Size() 75 | whisper.compressed = true 76 | csize := whisper.Size() 77 | var ratio float64 78 | if ssize != 0 { 79 | ratio = float64(csize) / float64(ssize) 80 | } 81 | 82 | fmt.Printf("comp_version: %d\n", whisper.compVersion) 83 | fmt.Printf("points_per_block: %d\n", whisper.pointsPerBlock) 84 | fmt.Printf("avg_compressed_point_size: %f\n", whisper.avgCompressedPointSize) 85 | fmt.Printf("crc32: %X\n", whisper.crc32) 86 | fmt.Printf("compression_ratio: %f (compressed/standard: %d/%d)\n", ratio, csize, ssize) 87 | } 88 | 89 | fmt.Printf("archives: %d\n", len(whisper.archives)) 90 | for i, arc := range whisper.archives { 91 | var agg string 92 | if arc.aggregationSpec != nil { 93 | agg = fmt.Sprintf(" (%s)", arc.aggregationSpec) 94 | } 95 | fmt.Printf("archives.%d.retention: %s%s\n", i, arc.Retention, agg) 96 | } 97 | 98 | for i, arc := range whisper.archives { 99 | fmt.Printf("\nArchive %d info:\n", i) 100 | if whisper.compressed { 101 | arc.dumpInfoCompressed() 102 | } else { 103 | arc.dumpInfoStandard() 104 | } 105 | } 106 | 107 | if !all { 108 | return 109 | } 110 | 111 | for i, arc := range whisper.archives { 112 | fmt.Printf("\nArchive %d data:\n", i) 113 | if whisper.compressed { 114 | arc.dumpDataPointsCompressed() 115 | } else { 116 | whisper.dumpDataPointsStandard(arc) 117 | } 118 | } 119 | } 120 | 121 | func (archive *archiveInfo) dumpInfoCompressed() { 122 | fmt.Printf("retention: %s\n", archive.Retention) 123 | fmt.Printf("number_of_points: %d\n", archive.numberOfPoints) 124 | fmt.Printf("retention: %s\n", archive.Retention) 125 | fmt.Printf("buffer_size: %d\n", archive.bufferSize) 126 | fmt.Printf("block_size: %d\n", archive.blockSize) 127 | fmt.Printf("estimated_point_size: %f\n", archive.avgCompressedPointSize) 128 | fmt.Printf("real_avg_point_size: %f\n", archive.avgPointsPerBlockReal()) 129 | fmt.Printf("block_count: %d\n", archive.blockCount) 130 | fmt.Printf("points_per_block: %d\n", archive.calculateSuitablePointsPerBlock(archive.whisper.pointsPerBlock)) 131 | fmt.Printf("compression_ratio: %f (%d/%d)\n", float64(archive.blockSize*archive.blockCount)/float64(archive.Size()), archive.blockSize*archive.blockCount, archive.Size()) 132 | if archive.aggregationSpec != nil { 133 | fmt.Printf("aggregation: %s\n", archive.aggregationSpec) 134 | } 135 | 136 | toTime := func(t int) string { return time.Unix(int64(t), 0).Format("2006-01-02 15:04:05") } 137 | fmt.Printf("cblock\n") 138 | fmt.Printf(" index: %d\n", archive.cblock.index) 139 | fmt.Printf(" p[0].interval: %d %s\n", archive.cblock.p0.interval, toTime(archive.cblock.p0.interval)) 140 | fmt.Printf(" p[n-2].interval: %d %s\n", archive.cblock.pn2.interval, toTime(archive.cblock.pn2.interval)) 141 | fmt.Printf(" p[n-1].interval: %d %s\n", archive.cblock.pn1.interval, toTime(archive.cblock.pn1.interval)) 142 | fmt.Printf(" last_byte: %08b\n", archive.cblock.lastByte) 143 | fmt.Printf(" last_byte_offset: %d\n", archive.cblock.lastByteOffset) 144 | fmt.Printf(" last_byte_bit_pos: %d\n", archive.cblock.lastByteBitPos) 145 | fmt.Printf(" crc32: %08x\n", archive.cblock.crc32) 146 | fmt.Printf(" stats:\n") 147 | fmt.Printf(" extended: %d\n", archive.stats.extended) 148 | fmt.Printf(" discard.oldInterval: %d\n", archive.stats.discard.oldInterval) 149 | 150 | for _, block := range archive.getSortedBlockRanges() { 151 | lastByteOffset := archive.blockOffset(block.index) + archive.blockSize - 1 152 | if block.index == archive.cblock.index { 153 | lastByteOffset = archive.cblock.lastByteOffset 154 | } 155 | fmt.Printf( 156 | "%02d: %10d %s - %10d %s count:%5d crc32:%08x start:%d last_byte:%d end:%d\n", 157 | block.index, 158 | block.start, toTime(block.start), 159 | block.end, toTime(block.end), 160 | (func() int { 161 | if block.count == 0 { 162 | return 0 163 | } 164 | return block.count + 1 165 | })(), block.crc32, 166 | archive.blockOffset(block.index), lastByteOffset, 167 | archive.blockOffset(block.index)+archive.blockSize, 168 | ) 169 | } 170 | } 171 | 172 | func (arc *archiveInfo) dumpDataPointsCompressed() { 173 | if arc.hasBuffer() { 174 | arc.dumpBuffer() 175 | } 176 | 177 | if arc.aggregationSpec != nil { 178 | fmt.Printf("aggregation: %s\n", arc.aggregationSpec) 179 | } 180 | 181 | toTime := func(t int) string { return time.Unix(int64(t), 0).Format("2006-01-02 15:04:05") } 182 | for _, block := range arc.blockRanges { 183 | fmt.Printf("archive %s block %d @%d\n", arc.Retention, block.index, arc.blockOffset(block.index)) 184 | if block.start == 0 { 185 | fmt.Printf(" [empty]\n") 186 | continue 187 | } 188 | 189 | buf := make([]byte, arc.blockSize) 190 | if err := arc.whisper.fileReadAt(buf, int64(arc.blockOffset(block.index))); err != nil { 191 | panic(err) 192 | } 193 | 194 | dps, _, err := arc.ReadFromBlock(buf, []dataPoint{}, 0, maxInt) 195 | if err != nil { 196 | panic(err) 197 | } 198 | 199 | blockSize := arc.blockSize 200 | if block.index == arc.cblock.index { 201 | blockSize = arc.cblock.lastByteOffset - arc.blockOffset(block.index) 202 | } 203 | crc := crc32(buf[:blockSize], 0) 204 | 205 | startOffset := int(arc.blockOffset(block.index)) 206 | fmt.Printf("crc32: %08x check: %08x start: %d end: %d length: %d\n", block.crc32, crc, startOffset, startOffset+blockSize, blockSize) 207 | 208 | for i, p := range dps { 209 | // continue 210 | fmt.Printf(" %s % 4d %d %s: %v\n", arc.String(), i, p.interval, toTime(p.interval), p.value) 211 | } 212 | } 213 | } 214 | 215 | func (arc *archiveInfo) dumpBuffer() { 216 | fmt.Printf("archive %s buffer[%d]:\n", arc.Retention, len(arc.buffer)/PointSize) 217 | dps := unpackDataPoints(arc.buffer) 218 | for i, p := range dps { 219 | fmt.Printf(" % 4d %d: %f\n", i, p.interval, p.value) 220 | } 221 | } 222 | 223 | func (archive *archiveInfo) dumpInfoStandard() { 224 | fmt.Printf(" offset: %d\n", archive.offset) 225 | fmt.Printf(" second per point: %d\n", archive.secondsPerPoint) 226 | fmt.Printf(" points: %d\n", archive.numberOfPoints) 227 | fmt.Printf(" retention: %s\n", archive.Retention) 228 | fmt.Printf(" size: %d\n", archive.Size()) 229 | } 230 | 231 | func (whisper *Whisper) dumpDataPointsStandard(archive *archiveInfo) { 232 | b := make([]byte, archive.Size()) 233 | err := whisper.fileReadAt(b, archive.Offset()) 234 | if err != nil { 235 | panic(err) 236 | } 237 | points := unpackDataPoints(b) 238 | 239 | for i, p := range points { 240 | fmt.Printf("%s %d: %d,% 10v\n", archive.String(), i, p.interval, p.value) 241 | } 242 | } 243 | 244 | func GenTestArchive(buf []byte, ret Retention) *archiveInfo { 245 | na := archiveInfo{ 246 | Retention: ret, 247 | offset: 0, 248 | blockRanges: make([]blockRange, 1), 249 | blockSize: len(buf), 250 | cblock: blockInfo{ 251 | index: 0, 252 | lastByteBitPos: 7, 253 | lastByteOffset: 0, 254 | }, 255 | } 256 | 257 | return &na 258 | } 259 | 260 | func GenDataPointSlice() []dataPoint { return []dataPoint{} } 261 | 262 | // skipcq: RVV-A0005 263 | func Compare( 264 | file1 string, 265 | file2 string, 266 | now int, 267 | ignoreBuffer bool, 268 | quarantinesRaw string, 269 | verbose bool, 270 | strict bool, 271 | muteThreshold int, 272 | ) (msg string, err error) { 273 | oflag := os.O_RDONLY 274 | db1, err := OpenWithOptions(file1, &Options{OpenFileFlag: &oflag}) 275 | if err != nil { 276 | return "", err 277 | } 278 | db2, err := OpenWithOptions(file2, &Options{OpenFileFlag: &oflag}) 279 | if err != nil { 280 | return "", err 281 | } 282 | var quarantines [][2]int 283 | if quarantinesRaw != "" { 284 | for _, q := range strings.Split(quarantinesRaw, ";") { 285 | var quarantine [2]int 286 | for i, t := range strings.Split(q, ",") { 287 | tim, err := time.Parse("2006-01-02", t) 288 | if err != nil { 289 | return "", err 290 | } 291 | quarantine[i] = int(tim.Unix()) 292 | } 293 | quarantines = append(quarantines, quarantine) 294 | } 295 | } 296 | 297 | oldNow := Now 298 | Now = func() time.Time { 299 | if now > 0 { 300 | return time.Unix(int64(now), 0) 301 | } 302 | return time.Now() 303 | } 304 | defer func() { Now = oldNow }() 305 | 306 | var bad bool 307 | for index, ret := range db1.Retentions() { 308 | from := int(Now().Unix()) - ret.MaxRetention() + ret.SecondsPerPoint()*60 309 | until := int(Now().Unix()) 310 | 311 | msg += fmt.Sprintf("%d %s: from = %+v until = %+v (%s - %s)\n", index, ret, from, until, time.Unix(int64(from), 0).Format("2006-01-02 15:04:06"), time.Unix(int64(until), 0).Format("2006-01-02 15:04:06")) 312 | 313 | var dps1, dps2 *TimeSeries 314 | var wg sync.WaitGroup 315 | wg.Add(1) 316 | go func() { 317 | defer wg.Done() 318 | 319 | var err error 320 | dps1, err = db1.Fetch(from, until) 321 | if err != nil { 322 | panic(err) 323 | } 324 | }() 325 | 326 | wg.Add(1) 327 | go func() { 328 | defer wg.Done() 329 | 330 | var err error 331 | dps2, err = db2.Fetch(from, until) 332 | if err != nil { 333 | panic(err) 334 | } 335 | }() 336 | 337 | wg.Wait() 338 | 339 | if ignoreBuffer { 340 | { 341 | vals := dps1.Values() 342 | vals[len(vals)-1] = math.NaN() 343 | vals[len(vals)-2] = math.NaN() 344 | } 345 | { 346 | vals := dps2.Values() 347 | vals[len(vals)-1] = math.NaN() 348 | vals[len(vals)-2] = math.NaN() 349 | } 350 | } 351 | 352 | for _, quarantine := range quarantines { 353 | qfrom := quarantine[0] 354 | quntil := quarantine[1] 355 | if from <= qfrom && qfrom <= until { 356 | qfromIndex := (qfrom - from) / ret.SecondsPerPoint() 357 | quntilIndex := (quntil - from) / ret.SecondsPerPoint() 358 | { 359 | vals := dps1.Values() 360 | for i := qfromIndex; i <= quntilIndex && i < len(vals); i++ { 361 | vals[i] = math.NaN() 362 | } 363 | } 364 | { 365 | vals := dps2.Values() 366 | for i := qfromIndex; i <= quntilIndex && i < len(vals); i++ { 367 | vals[i] = math.NaN() 368 | } 369 | } 370 | } 371 | } 372 | 373 | var vals1, vals2 int 374 | for _, p := range dps1.Values() { 375 | if !math.IsNaN(p) { 376 | vals1++ 377 | } 378 | } 379 | for _, p := range dps2.Values() { 380 | if !math.IsNaN(p) { 381 | vals2++ 382 | } 383 | } 384 | 385 | msg += fmt.Sprintf(" len1 = %d len2 = %d vals1 = %d vals2 = %d\n", len(dps1.Values()), len(dps2.Values()), vals1, vals2) 386 | 387 | if len(dps1.Values()) != len(dps2.Values()) { 388 | bad = true 389 | msg += fmt.Sprintf(" size doesn't match: %d != %d\n", len(dps1.Values()), len(dps2.Values())) 390 | } 391 | if vals1 != vals2 { 392 | bad = true 393 | msg += fmt.Sprintf(" values doesn't match: %d != %d (%d)\n", vals1, vals2, vals1-vals2) 394 | } 395 | var ptDiff int 396 | for i, p1 := range dps1.Values() { 397 | if len(dps2.Values()) < i { 398 | break 399 | } 400 | p2 := dps2.Values()[i] 401 | if !((math.IsNaN(p1) && math.IsNaN(p2)) || p1 == p2) { 402 | bad = true 403 | ptDiff++ 404 | if verbose { 405 | msg += fmt.Sprintf(" %d: %d %v != %v\n", i, dps1.FromTime()+i*ret.SecondsPerPoint(), p1, p2) 406 | } 407 | } 408 | } 409 | msg += fmt.Sprintf(" point mismatches: %d\n", ptDiff) 410 | if ptDiff <= muteThreshold && !strict { 411 | bad = false 412 | } 413 | } 414 | if db1.IsCompressed() { 415 | if err := db1.CheckIntegrity(); err != nil { 416 | msg += fmt.Sprintf("integrity: %s\n%s", file1, err) 417 | bad = true 418 | } 419 | } 420 | if db2.IsCompressed() { 421 | if err := db2.CheckIntegrity(); err != nil { 422 | msg += fmt.Sprintf("integrity: %s\n%s", file2, err) 423 | bad = true 424 | } 425 | } 426 | 427 | if bad { 428 | err = errors.New("whispers not equal") 429 | } 430 | 431 | return msg, err 432 | } 433 | -------------------------------------------------------------------------------- /debug_test.go: -------------------------------------------------------------------------------- 1 | // +build debug 2 | 3 | package whisper 4 | 5 | import ( 6 | "encoding/json" 7 | "flag" 8 | "fmt" 9 | "log" 10 | "os" 11 | "os/exec" 12 | "sort" 13 | "testing" 14 | "time" 15 | 16 | "github.com/google/go-cmp/cmp" 17 | "github.com/google/go-cmp/cmp/cmpopts" 18 | "github.com/kr/pretty" 19 | ) 20 | 21 | // not really tests here, just for easy debugging/development. 22 | 23 | func TestBitWriter(t *testing.T) { 24 | var bw BitsWriter 25 | bw.buf = make([]byte, 8) 26 | bw.bitPos = 7 27 | 28 | bw.Write(1, 1) 29 | bw.Write(2, 1) 30 | bw.Write(3, 1) 31 | // bw.Write(2, 1) 32 | // for i := 0; i < 16; i++ { 33 | // bw.Write(1, 1) 34 | // } 35 | 36 | // fmt.Printf("-- %08b\n", bw.buf) 37 | 38 | bw.Write(8, 0xaa) 39 | bw.Write(12, 0x01aa) 40 | 41 | // 1010 01 0000 0000 1010 1010 42 | fmt.Printf("-- %08b\n", bw.buf) 43 | fmt.Printf("-- %08b\n", 12) 44 | } 45 | 46 | func TestBitReader(t *testing.T) { 47 | var br BitsReader 48 | br.buf = []byte{0xB3, 0x02, 0xFF, 0xFF, 0xFF} 49 | // br.buf = []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x08} 50 | br.bitPos = 7 51 | 52 | fmt.Printf("%08b\n", br.buf) 53 | fmt.Printf("%08b\n", br.Read(1)) 54 | fmt.Printf("%08b\n", br.Read(2)) 55 | fmt.Printf("%08b\n", br.Read(3)) 56 | fmt.Printf("%08b\n", br.Read(4)) 57 | fmt.Printf("%08b\n", br.Read(16)) 58 | } 59 | 60 | var whisperFile = flag.String("file", "", "whipser filepath") 61 | 62 | func TestCompressedWhisperInplaceConvert(t *testing.T) { 63 | data := []*TimeSeriesPoint{{Time: int(time.Now().Add(-time.Minute).Unix()), Value: 1024.4096}} 64 | from, until := int(time.Now().Add(-time.Hour).Unix()), int(time.Now().Unix()) 65 | 66 | if _, err := os.Stat(*whisperFile + ".original"); err != nil && os.IsNotExist(err) { 67 | exec.Command("cp", *whisperFile, *whisperFile+".original").CombinedOutput() 68 | } 69 | 70 | cwsp, err := OpenWithOptions(*whisperFile, &Options{Compressed: true}) 71 | if err != nil { 72 | t.Fatal(err) 73 | } 74 | if err := cwsp.UpdateMany(data); err != nil { 75 | t.Fatal(err) 76 | } 77 | nps, err := cwsp.Fetch(from, until) 78 | if err != nil { 79 | t.Fatal(err) 80 | } 81 | 82 | wsp, err := OpenWithOptions(*whisperFile+".original", &Options{}) 83 | if err != nil { 84 | t.Fatal(err) 85 | } 86 | if err := wsp.UpdateMany(data); err != nil { 87 | t.Fatal(err) 88 | } 89 | ops, err := wsp.Fetch(from, until) 90 | if err != nil { 91 | t.Fatal(err) 92 | } 93 | 94 | if diff := cmp.Diff(nps, ops, cmp.AllowUnexported(TimeSeries{}), cmpopts.EquateNaNs()); diff != "" { 95 | t.Errorf("inplace convert failed\n%s\n", diff) 96 | pretty.Println(nps) 97 | pretty.Println(ops) 98 | } 99 | 100 | cwsp.Close() 101 | wsp.Close() 102 | } 103 | 104 | func TestBrokenWhisperFile(t *testing.T) { 105 | wsp, err := OpenWithOptions("test/var/lib/carbon/whisper/loadbalancers/group/external_102/externallb-108_ams4_prod_booking_com/haproxy/backend/chat_booking_com_https_ams4/server/intercom-1003_ams4_prod_booking_com/stot.wsp", &Options{}) 106 | if err != nil { 107 | t.Fatal(err) 108 | } 109 | // ps, err := wsp.Fetch(1552764920, 1552854180) 110 | // if err != nil { 111 | // t.Fatal(err) 112 | // } 113 | start := 1552764920 114 | end := 1552854180 115 | 116 | var points []dataPoint 117 | { 118 | archive := wsp.archives[0] 119 | b := make([]byte, archive.Size()) 120 | err := wsp.fileReadAt(b, archive.Offset()) 121 | if err != nil { 122 | t.Fatal(err) 123 | } 124 | points = unpackDataPoints(b) 125 | sort.Slice(points, func(i, j int) bool { 126 | return points[i].interval < points[j].interval 127 | }) 128 | 129 | // filter null data points 130 | var index int 131 | for i := 0; i < len(points); i++ { 132 | if start <= points[i].interval && points[i].interval <= end { 133 | points[index] = points[i] 134 | index++ 135 | } 136 | } 137 | points = points[:index] 138 | } 139 | 140 | log.Printf("len(points) = %+v\n", len(points)) 141 | log.Printf("points[0] = %+v\n", points[0]) 142 | log.Printf("points[len(points)-1] = %+v\n", points[len(points)-1]) 143 | 144 | cwsp, err := OpenWithOptions("tmp_stot.cwsp", &Options{}) 145 | 146 | archive := cwsp.archives[0] 147 | var nblock blockInfo 148 | nblock.index = 3 149 | nblock.lastByteBitPos = 7 150 | nblock.lastByteOffset = archive.blockOffset(nblock.index) 151 | archive.cblock = nblock 152 | archive.blockRanges = make([]blockRange, 4) 153 | archive.blockRanges[nblock.index].start = 0 154 | archive.blockRanges[nblock.index].end = 0 155 | 156 | log.Printf("nblock.lastByteOffset = %+v\n", nblock.lastByteOffset) 157 | log.Printf("archive.blockSize = %+v\n", archive.blockSize) 158 | 159 | const extraPointSize = 2 160 | blockBuffer := make([]byte, len(points)*(PointSize+extraPointSize)+endOfBlockSize) 161 | 162 | // debugCompress = true 163 | size, left, rotate := archive.AppendPointsToBlock(blockBuffer, points) 164 | log.Printf("size = %+v\n", size) 165 | log.Printf("len(left) = %+v\n", len(left)) 166 | log.Printf("rotate = %+v\n", rotate) 167 | 168 | // blockBuffer2 := blockBuffer[6510:] 169 | // for i := 0; i < len(blockBuffer2); i += 16 { 170 | // for j := i; j < i+16; j += 2 { 171 | // fmt.Printf("%04x ", blockBuffer2[j:j+2]) 172 | // } 173 | // fmt.Println("") 174 | // } 175 | 176 | var dst []dataPoint 177 | dst2, _, err := archive.ReadFromBlock(blockBuffer, dst, start, end) 178 | if err != nil { 179 | t.Fatal(err) 180 | } 181 | log.Printf("len(dst) = %+v\n", len(dst2)) 182 | log.Printf("archive.blockRanges[3].crc32 = %x\n", archive.blockRanges[3].crc32) 183 | for i, p := range dst2 { 184 | // continue 185 | fmt.Printf(" % 4d %d: %f\n", i, p.interval, p.value) 186 | } 187 | } 188 | 189 | func TestReplayFile(t *testing.T) { 190 | data, err := os.Open("test_data") 191 | if err != nil { 192 | panic(err) 193 | } 194 | var ps []*TimeSeriesPoint 195 | if err := json.NewDecoder(data).Decode(&ps); err != nil { 196 | panic(err) 197 | } 198 | 199 | Now = func() time.Time { return time.Unix(1553545592, 0) } 200 | defer func() { Now = func() time.Time { return time.Now() } }() 201 | 202 | fpath := fmt.Sprintf("replay.%d.cwsp", time.Now().Unix()) 203 | os.Remove(fpath) 204 | cwhisper, err := CreateWithOptions( 205 | fpath, 206 | []*Retention{ 207 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 208 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 209 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 210 | }, 211 | Sum, 212 | 0, 213 | &Options{Compressed: true, PointsPerBlock: 7200}, 214 | ) 215 | if err != nil { 216 | panic(err) 217 | } 218 | 219 | for i := 0; i < len(ps); i += 300 { 220 | // end := i + rand.Intn(300) + 1 221 | end := i + 300 222 | if end > len(ps) { 223 | end = len(ps) 224 | } 225 | if err := cwhisper.UpdateMany(ps[i:end]); err != nil { 226 | panic(err) 227 | } 228 | // i = end 229 | } 230 | if err := cwhisper.Close(); err != nil { 231 | panic(err) 232 | } 233 | 234 | psm := map[int]float64{} 235 | for _, p := range ps { 236 | psm[p.Time] = p.Value 237 | } 238 | cwhisper, err = OpenWithOptions(fpath, &Options{}) 239 | if err != nil { 240 | panic(err) 241 | } 242 | archive := cwhisper.archives[0] 243 | var readCount int 244 | for _, block := range archive.getSortedBlockRanges() { 245 | buf := make([]byte, archive.blockSize) 246 | if err := cwhisper.fileReadAt(buf, int64(archive.blockOffset(block.index))); err != nil { 247 | t.Errorf("blocks[%d].file.read: %s", block.index, err) 248 | } 249 | dst, _, err := archive.ReadFromBlock(buf, []dataPoint{}, block.start, block.end) 250 | if err != nil { 251 | t.Errorf("blocks[%d].read: %s", block.index, err) 252 | } 253 | for _, p := range dst { 254 | if psm[p.interval] != p.value { 255 | t.Errorf("block[%d][%d] = %v != %v", block.index, p.interval, p.value, psm[p.interval]) 256 | } 257 | readCount++ 258 | delete(psm, p.interval) 259 | } 260 | } 261 | 262 | // TODO: investigate why there are 17000+ points left and improve 263 | fmt.Println("len(psm) =", len(psm)) 264 | fmt.Println("readCount =", readCount) 265 | } 266 | 267 | func TestReplayFile2(t *testing.T) { 268 | data, err := os.Open("test3.json") 269 | if err != nil { 270 | panic(err) 271 | } 272 | var psArr [][]*TimeSeriesPoint 273 | if err := json.NewDecoder(data).Decode(&psArr); err != nil { 274 | panic(err) 275 | } 276 | 277 | fpath := fmt.Sprintf("test3_replay.wsp") 278 | os.Remove(fpath) 279 | os.Remove(fpath + ".cwsp") 280 | 281 | inMemory := true 282 | cwhisper, err := CreateWithOptions( 283 | fpath+".cwsp", 284 | []*Retention{ 285 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 286 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 287 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 288 | }, 289 | Average, 290 | 0, 291 | &Options{Compressed: true, PointsPerBlock: 7200, InMemory: inMemory}, 292 | ) 293 | if err != nil { 294 | panic(err) 295 | } 296 | ncwhisper, err := CreateWithOptions( 297 | fpath, 298 | []*Retention{ 299 | {secondsPerPoint: 1, numberOfPoints: 172800}, // 1s:2d 300 | {secondsPerPoint: 60, numberOfPoints: 40320}, // 1m:28d 301 | {secondsPerPoint: 3600, numberOfPoints: 17520}, // 1h:2y 302 | }, 303 | Average, 304 | 0, 305 | &Options{Compressed: false, PointsPerBlock: 7200, InMemory: inMemory}, 306 | ) 307 | if err != nil { 308 | panic(err) 309 | } 310 | cwhisper.Close() 311 | ncwhisper.Close() 312 | 313 | var start time.Time 314 | Now = func() time.Time { return start } 315 | defer func() { Now = func() time.Time { return time.Now() } }() 316 | 317 | for i, ps := range psArr { 318 | log.Printf("batch = %+v\n", i) 319 | log.Printf("len(ps) = %+v\n", len(ps)) 320 | start = time.Unix(int64(ps[len(ps)-1].Time), 0) 321 | cwhisper, err = OpenWithOptions(fpath+".cwsp", &Options{InMemory: inMemory}) 322 | if err != nil { 323 | t.Fatal(err) 324 | } 325 | ncwhisper, err = OpenWithOptions(fpath, &Options{InMemory: inMemory}) 326 | if err != nil { 327 | t.Fatal(err) 328 | } 329 | 330 | if err := cwhisper.UpdateMany(ps); err != nil { 331 | t.Fatal(err) 332 | } 333 | if err := ncwhisper.UpdateMany(ps); err != nil { 334 | t.Fatal(err) 335 | } 336 | 337 | if cwhisper.Extended { 338 | for _, a := range cwhisper.archives { 339 | t.Logf("extended: %s: %v\n", a.Retention, a.avgCompressedPointSize) 340 | } 341 | } 342 | 343 | if err := cwhisper.Close(); err != nil { 344 | t.Fatal(err) 345 | } 346 | if err := ncwhisper.Close(); err != nil { 347 | t.Fatal(err) 348 | } 349 | } 350 | 351 | if inMemory { 352 | if err := newMemFile(fpath).dumpOnDisk(fpath); err != nil { 353 | t.Fatal(err) 354 | } 355 | if err := newMemFile(fpath + ".cwsp").dumpOnDisk(fpath + ".cwsp"); err != nil { 356 | t.Fatal(err) 357 | } 358 | } 359 | 360 | t.Log("go", "run", "cmd/compare.go", "-v", "-now", fmt.Sprintf("%d", start.Unix()), "-ignore-buffer", fpath, fpath+".cwsp") 361 | output, err := exec.Command("go", "run", "cmd/compare.go", "-now", fmt.Sprintf("%d", start.Unix()), "-ignore-buffer", fpath, fpath+".cwsp").CombinedOutput() 362 | if err != nil { 363 | t.Log(string(output)) 364 | t.Error(err) 365 | } 366 | 367 | std, err := os.Stat(fpath) 368 | if err != nil { 369 | t.Error(err) 370 | } 371 | cmp, err := os.Stat(fpath + ".cwsp") 372 | if err != nil { 373 | t.Error(err) 374 | } 375 | t.Logf("compression ratio: %.2f\n", float64(cmp.Size()*100)/float64(std.Size())) 376 | } 377 | -------------------------------------------------------------------------------- /doc/compressed.md: -------------------------------------------------------------------------------- 1 | # Example of a compressed whisper file format 2 | 3 | retention policy: 4 | 5 | ``` 6 | 1s:2d 7 | 1m:28d 8 | 1h:2y 9 | ``` 10 | 11 | ``` 12 | +-----------------------------------+ 13 | | metric_header (63) | 14 | +-----------------------------------+ 15 | | archive_0_header (96) | 16 | +-----------------------------------+ 17 | | archive_1_header (96) | 18 | +-----------------------------------+ 19 | | archive_2_header (96) | 20 | +-----------------------------------+ 21 | | archive_0_index | 22 | | +-----------------------------+ | 23 | | | archive_0_block_0_info (16) | | 24 | | +-----------------------------+ | 25 | | | archive_0_block_1_info (16) | | 26 | | +-----------------------------+ | 27 | | | ... | | 28 | | +-----------------------------+ | 29 | | |archive_0_block_23_info (16) | | 30 | | +-----------------------------+ | 31 | | | archive_0_buffer (120) | | 32 | | +-----------------------------+ | 33 | +-----------------------------------+ 34 | | archive_1_index | 35 | | +-----------------------------+ | 36 | | | archive_1_block_0_info (16) | | 37 | | +-----------------------------+ | 38 | | | archive_1_block_1_info (16) | | 39 | | +-----------------------------+ | 40 | | | ... | | 41 | | +-----------------------------+ | 42 | | | archive_1_block_5_info (16) | | 43 | | +-----------------------------+ | 44 | | | archive_1_buffer (120) | | 45 | | +-----------------------------+ | 46 | +-----------------------------------+ 47 | | archive_2_index | 48 | | +-----------------------------+ | 49 | | | archive_2_block_0_info (16) | | 50 | | +-----------------------------+ | 51 | | | archive_2_block_1_info (16) | | 52 | | +-----------------------------+ | 53 | | | archive_2_block_3_info (16) | | 54 | | +-----------------------------+ | 55 | +-----------------------------------+ 56 | | archive_0_data | 57 | | +-----------------------------+ | 58 | | | archive_0_block_0 (14400) | | 59 | | +-----------------------------+ | 60 | | | archive_0_block_1 (14400) | | 61 | | +-----------------------------+ | 62 | | | ... | | 63 | | +-----------------------------+ | 64 | | | archive_0_block_23 (14400) | | 65 | | +-----------------------------+ | 66 | +-----------------------------------+ 67 | | archive_1_data | 68 | | +-----------------------------+ | 69 | | | archive_1_block_0 (14400) | | 70 | | +-----------------------------+ | 71 | | | archive_1_block_1 (14400) | | 72 | | +-----------------------------+ | 73 | | | ... | | 74 | | +-----------------------------+ | 75 | | | archive_1_block_5 (14400) | | 76 | | +-----------------------------+ | 77 | +-----------------------------------+ 78 | | archive_2_data | 79 | | +-----------------------------+ | 80 | | | archive_2_block_0 (14400) | | 81 | | +-----------------------------+ | 82 | | | archive_2_block_1 (14400) | | 83 | | +-----------------------------+ | 84 | | | archive_2_block_3 (14400) | | 85 | | +-----------------------------+ | 86 | +-----------------------------------+ 87 | ``` 88 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/go-graphite/go-whisper 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.8 7 | github.com/kr/pretty v0.3.0 8 | ) 9 | 10 | require ( 11 | github.com/kr/text v0.2.0 // indirect 12 | github.com/rogpeppe/go-internal v1.6.1 // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 2 | github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= 3 | github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 4 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 5 | github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= 6 | github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 7 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 8 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 9 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 10 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 11 | github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= 12 | github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= 13 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 14 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 15 | -------------------------------------------------------------------------------- /internal/fuzzy_archive/fuzz.go: -------------------------------------------------------------------------------- 1 | package fuzzy_whisper 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | 7 | whisper "github.com/go-graphite/go-whisper" 8 | ) 9 | 10 | func Fuzz(data []byte) int { 11 | archive := whisper.GenTestArchive(data, whisper.NewRetention(1, 172800)) 12 | dps, _, err := archive.ReadFromBlock(data, whisper.GenDataPointSlice(), -1, math.MaxInt64) 13 | if err != nil { 14 | panic(err) 15 | } 16 | if len(dps) == 0 { 17 | panic("failed to read") 18 | } 19 | buf := make([]byte, len(data)+5) 20 | _, left, _ := archive.AppendPointsToBlock(buf, dps) 21 | if len(left) > 0 { 22 | panic(fmt.Sprintf("len(left) = %d", len(left))) 23 | } 24 | for i, b := range buf { 25 | if b != data[i] { 26 | panic("failed to write") 27 | } 28 | } 29 | return 0 30 | } 31 | -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/Inactive.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/Inactive.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/SwapTotal.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/SwapTotal.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/discarded_full.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/discarded_full.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/header: -------------------------------------------------------------------------------- 1 | whisper_compressed 2 | -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/reach.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/reach.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/rx_byte_ipv6.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/rx_byte_ipv6.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/rx_errors.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/rx_errors.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/rx_fifo.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/rx_fifo.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/tx_colls.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/tx_colls.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/tx_drop.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/tx_drop.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/corpus/tx_errors.wsp.cwsp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/go-graphite/go-whisper/e3110f57c01cc3812ef7295ecad10d9c9d7e691f/internal/fuzzy_whisper/corpus/tx_errors.wsp.cwsp -------------------------------------------------------------------------------- /internal/fuzzy_whisper/fuzz.go: -------------------------------------------------------------------------------- 1 | package fuzzy_whisper 2 | 3 | import ( 4 | "io/ioutil" 5 | "time" 6 | 7 | whisper "github.com/go-graphite/go-whisper" 8 | ) 9 | 10 | // TODO: not implemented yet? 11 | // 12 | // skipcq: RVV-B0012 13 | func Fuzz(data []byte) int { 14 | f, err := ioutil.TempFile("cwhisper", "*") 15 | if err != nil { 16 | panic("failed to create tempfile: " + err.Error()) 17 | } 18 | db, err := whisper.OpenWithOptions(f.Name(), &whisper.Options{FLock: true}) 19 | if err != nil { 20 | panic(err) 21 | } 22 | if _, err := db.Fetch(int(time.Now().Add(time.Hour*24*-60).Unix()), int(time.Now().Unix())); err != nil { 23 | panic(err) 24 | } 25 | return 0 26 | } 27 | -------------------------------------------------------------------------------- /whisper_test.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io/ioutil" 7 | "math" 8 | "math/rand" 9 | "os" 10 | "os/exec" 11 | "sort" 12 | "strings" 13 | "testing" 14 | "time" 15 | ) 16 | 17 | func checkBytes(t *testing.T, expected, received []byte) { 18 | if len(expected) != len(received) { 19 | t.Fatalf("Invalid number of bytes. Expected %v, received %v", len(expected), len(received)) 20 | } 21 | for i := range expected { 22 | if expected[i] != received[i] { 23 | t.Fatalf("Incorrect byte at %v. Expected %v, received %v", i+1, expected[i], received[i]) 24 | } 25 | } 26 | } 27 | 28 | // skipcq: RVV-A0005 29 | func testParseRetentionDef(t *testing.T, retentionDef string, expectedPrecision, expectedPoints int, hasError bool) { 30 | errTpl := fmt.Sprintf("Expected %%v to be %%v but received %%v for retentionDef %v", retentionDef) 31 | 32 | retention, err := ParseRetentionDef(retentionDef) 33 | 34 | if (err == nil && hasError) || (err != nil && !hasError) { 35 | if hasError { 36 | t.Fatalf("Expected error but received none for retentionDef %v", retentionDef) 37 | } else { 38 | t.Fatalf("Expected no error but received %v for retentionDef %v", err, retentionDef) 39 | } 40 | } 41 | if err == nil { 42 | if retention.secondsPerPoint != expectedPrecision { 43 | t.Fatalf(errTpl, "precision", expectedPrecision, retention.secondsPerPoint) 44 | } 45 | if retention.numberOfPoints != expectedPoints { 46 | t.Fatalf(errTpl, "points", expectedPoints, retention.numberOfPoints) 47 | } 48 | } 49 | } 50 | 51 | // skipcq: RVV-B0001 52 | func TestParseRetentionDef(t *testing.T) { 53 | testParseRetentionDef(t, "1s:5m", 1, 300, false) 54 | testParseRetentionDef(t, "1m:30m", 60, 30, false) 55 | testParseRetentionDef(t, "1m", 0, 0, true) 56 | testParseRetentionDef(t, "1m:30m:20s", 0, 0, true) 57 | testParseRetentionDef(t, "1f:30s", 0, 0, true) 58 | testParseRetentionDef(t, "1m:30f", 0, 0, true) 59 | } 60 | 61 | func TestParseRetentionDefs(t *testing.T) { 62 | retentions, err := ParseRetentionDefs("1s:5m,1m:30m") 63 | if err != nil { 64 | t.Fatalf("Unexpected error: %v", err) 65 | } 66 | if length := len(retentions); length != 2 { 67 | t.Fatalf("Expected 2 retentions, received %v", length) 68 | } 69 | } 70 | 71 | func TestSortRetentions(t *testing.T) { 72 | retentions := Retentions{ 73 | {secondsPerPoint: 300, numberOfPoints: 12}, 74 | {secondsPerPoint: 60, numberOfPoints: 30}, 75 | {secondsPerPoint: 1, numberOfPoints: 300}, 76 | } 77 | sort.Sort(retentionsByPrecision{retentions}) 78 | if retentions[0].secondsPerPoint != 1 { 79 | t.Fatalf("Retentions array is not sorted") 80 | } 81 | } 82 | 83 | func setUpCreate() (path string, fileExists func(string) bool, archiveList Retentions, tearDown func()) { 84 | path = "/tmp/whisper-testing.wsp" 85 | os.Remove(path) 86 | fileExists = func(path string) bool { 87 | fi, _ := os.Lstat(path) 88 | return fi != nil 89 | } 90 | archiveList = Retentions{ 91 | {secondsPerPoint: 1, numberOfPoints: 300}, 92 | {secondsPerPoint: 60, numberOfPoints: 30}, 93 | {secondsPerPoint: 300, numberOfPoints: 12}, 94 | } 95 | tearDown = func() { 96 | os.Remove(path) 97 | } 98 | return path, fileExists, archiveList, tearDown 99 | } 100 | 101 | func TestCreateCreatesFile(t *testing.T) { 102 | path, fileExists, retentions, tearDown := setUpCreate() 103 | expected := []byte{ 104 | // Metadata 105 | 0x00, 0x00, 0x00, 0x01, // Aggregation type 106 | 0x00, 0x00, 0x0e, 0x10, // Max retention 107 | 0x3f, 0x00, 0x00, 0x00, // xFilesFactor 108 | 0x00, 0x00, 0x00, 0x03, // Retention count 109 | // Archive Info 110 | // Retention 1 (1, 300) 111 | 0x00, 0x00, 0x00, 0x34, // offset 112 | 0x00, 0x00, 0x00, 0x01, // secondsPerPoint 113 | 0x00, 0x00, 0x01, 0x2c, // numberOfPoints 114 | // Retention 2 (60, 30) 115 | 0x00, 0x00, 0x0e, 0x44, // offset 116 | 0x00, 0x00, 0x00, 0x3c, // secondsPerPoint 117 | 0x00, 0x00, 0x00, 0x1e, // numberOfPoints 118 | // Retention 3 (300, 12) 119 | 0x00, 0x00, 0x0f, 0xac, // offset 120 | 0x00, 0x00, 0x01, 0x2c, // secondsPerPoint 121 | 0x00, 0x00, 0x00, 0x0c} // numberOfPoints 122 | whisper, err := Create(path, retentions, Average, 0.5) 123 | if err != nil { 124 | t.Fatalf("Failed to create whisper file: %v", err) 125 | } 126 | if whisper.aggregationMethod != Average { 127 | t.Fatalf("Unexpected aggregationMethod %v, expected %v", whisper.aggregationMethod, Average) 128 | } 129 | if whisper.maxRetention != 3600 { 130 | t.Fatalf("Unexpected maxRetention %v, expected 3600", whisper.maxRetention) 131 | } 132 | if whisper.xFilesFactor != 0.5 { 133 | t.Fatalf("Unexpected xFilesFactor %v, expected 0.5", whisper.xFilesFactor) 134 | } 135 | if len(whisper.archives) != 3 { 136 | t.Fatalf("Unexpected archive count %v, expected 3", len(whisper.archives)) 137 | } 138 | whisper.Close() 139 | if !fileExists(path) { 140 | t.Fatalf("File does not exist after create") 141 | } 142 | file, err := os.Open(path) 143 | if err != nil { 144 | t.Fatalf("Failed to open whisper file") 145 | } 146 | contents := make([]byte, len(expected)) 147 | file.Read(contents) 148 | 149 | for i := 0; i < len(contents); i++ { 150 | if expected[i] != contents[i] { 151 | // Show what is being written 152 | // for i := 0; i < 13; i++ { 153 | // for j := 0; j < 4; j ++ { 154 | // fmt.Printf(" %02x", contents[(i*4)+j]) 155 | // } 156 | // fmt.Print("\n") 157 | // } 158 | t.Fatalf("File is incorrect at character %v, expected %x got %x", i, expected[i], contents[i]) 159 | } 160 | } 161 | 162 | // test size 163 | info, err := os.Stat(path) 164 | if err != nil { 165 | t.Error(err) 166 | } 167 | if info.Size() != 4156 { 168 | t.Fatalf("File size is incorrect, expected %v got %v", 4156, info.Size()) 169 | } 170 | tearDown() 171 | } 172 | 173 | func TestCreateFileAlreadyExists(t *testing.T) { 174 | path, _, retentions, tearDown := setUpCreate() 175 | os.Create(path) 176 | _, err := Create(path, retentions, Average, 0.5) 177 | if err == nil { 178 | t.Fatalf("Existing file should cause create to fail.") 179 | } 180 | tearDown() 181 | } 182 | 183 | func TestCreateFileInvalidRetentionDefs(t *testing.T) { 184 | path, _, retentions, tearDown := setUpCreate() 185 | // Add a small retention def on the end 186 | retentions = append(retentions, &Retention{secondsPerPoint: 1, numberOfPoints: 200}) 187 | _, err := Create(path, retentions, Average, 0.5) 188 | if err == nil { 189 | t.Fatalf("Invalid retention definitions should cause create to fail.") 190 | } 191 | tearDown() 192 | } 193 | 194 | func TestOpenFile(t *testing.T) { 195 | path, _, retentions, tearDown := setUpCreate() 196 | whisper1, err := Create(path, retentions, Average, 0.5) 197 | if err != nil { 198 | t.Errorf("Failed to create: %v", err) 199 | } 200 | 201 | // write some points 202 | now := int(time.Now().Unix()) 203 | for i := 0; i < 2; i++ { 204 | whisper1.Update(100, now-(i*1)) 205 | } 206 | 207 | whisper2, err := Open(path) 208 | if err != nil { 209 | t.Fatalf("Failed to open whisper file: %v", err) 210 | } 211 | if whisper1.aggregationMethod != whisper2.aggregationMethod { 212 | t.Fatalf("aggregationMethod did not match, expected %v, received %v", whisper1.aggregationMethod, whisper2.aggregationMethod) 213 | } 214 | if whisper1.maxRetention != whisper2.maxRetention { 215 | t.Fatalf("maxRetention did not match, expected %v, received %v", whisper1.maxRetention, whisper2.maxRetention) 216 | } 217 | if whisper1.xFilesFactor != whisper2.xFilesFactor { 218 | t.Fatalf("xFilesFactor did not match, expected %v, received %v", whisper1.xFilesFactor, whisper2.xFilesFactor) 219 | } 220 | if len(whisper1.archives) != len(whisper2.archives) { 221 | t.Fatalf("archive count does not match, expected %v, received %v", len(whisper1.archives), len(whisper2.archives)) 222 | } 223 | for i := range whisper1.archives { 224 | if whisper1.archives[i].offset != whisper2.archives[i].offset { 225 | t.Fatalf("archive mismatch offset at %v [%v, %v]", i, whisper1.archives[i].offset, whisper2.archives[i].offset) 226 | } 227 | if whisper1.archives[i].Retention.secondsPerPoint != whisper2.archives[i].Retention.secondsPerPoint { 228 | t.Fatalf("Retention.secondsPerPoint mismatch offset at %v [%v, %v]", i, whisper1.archives[i].Retention.secondsPerPoint, whisper2.archives[i].Retention.secondsPerPoint) 229 | } 230 | if whisper1.archives[i].Retention.numberOfPoints != whisper2.archives[i].Retention.numberOfPoints { 231 | t.Fatalf("Retention.numberOfPoints mismatch offset at %v [%v, %v]", i, whisper1.archives[i].Retention.numberOfPoints, whisper2.archives[i].Retention.numberOfPoints) 232 | } 233 | 234 | } 235 | 236 | result1, err := whisper1.Fetch(now-3, now) 237 | if err != nil { 238 | t.Fatalf("Error retrieving result from created whisper") 239 | } 240 | result2, err := whisper2.Fetch(now-3, now) 241 | if err != nil { 242 | t.Fatalf("Error retrieving result from opened whisper") 243 | } 244 | 245 | if result1.String() != result2.String() { 246 | t.Fatalf("Results do not match") 247 | } 248 | 249 | tearDown() 250 | } 251 | 252 | /* 253 | Test the full cycle of creating a whisper file, adding some 254 | data points to it and then fetching a time series. 255 | */ 256 | func testCreateUpdateFetch(t *testing.T, aggregationMethod AggregationMethod, xFilesFactor float32, secondsAgo, fromAgo, fetchLength, step int, currentValue, increment float64) *TimeSeries { 257 | var whisper *Whisper 258 | var err error 259 | path, _, archiveList, tearDown := setUpCreate() 260 | whisper, err = Create(path, archiveList, aggregationMethod, xFilesFactor) 261 | if err != nil { 262 | t.Fatalf("Failed create: %v", err) 263 | } 264 | defer whisper.Close() 265 | oldestTime := whisper.StartTime() 266 | now := int(time.Now().Unix()) 267 | 268 | if (now - whisper.maxRetention) != oldestTime { 269 | t.Fatalf("Invalid whisper start time, expected %v, received %v", oldestTime, now-whisper.maxRetention) 270 | } 271 | 272 | for i := 0; i < secondsAgo; i++ { 273 | err = whisper.Update(currentValue, now-secondsAgo+i) 274 | if err != nil { 275 | t.Fatalf("Unexpected error for %v: %v", i, err) 276 | } 277 | currentValue += increment 278 | } 279 | 280 | fromTime := now - fromAgo 281 | untilTime := fromTime + fetchLength 282 | 283 | timeSeries, err := whisper.Fetch(fromTime, untilTime) 284 | if err != nil { 285 | t.Fatalf("Unexpected error: %v", err) 286 | } 287 | if !validTimestamp(timeSeries.fromTime, fromTime, step) { 288 | t.Fatalf("Invalid fromTime [%v/%v], expected %v, received %v", secondsAgo, fromAgo, fromTime, timeSeries.fromTime) 289 | } 290 | if !validTimestamp(timeSeries.untilTime, untilTime, step) { 291 | t.Fatalf("Invalid untilTime [%v/%v], expected %v, received %v", secondsAgo, fromAgo, untilTime, timeSeries.untilTime) 292 | } 293 | if timeSeries.step != step { 294 | t.Fatalf("Invalid step [%v/%v], expected %v, received %v", secondsAgo, fromAgo, step, timeSeries.step) 295 | } 296 | tearDown() 297 | 298 | return timeSeries 299 | } 300 | 301 | func TestCheckEmpty(t *testing.T) { 302 | var whisper *Whisper 303 | var err error 304 | var empty bool 305 | path, _, retentions, tearDown := setUpCreate() 306 | whisper, err = Create(path, retentions, Average, 0.5) 307 | if err != nil { 308 | t.Errorf("Failed to create: %v", err) 309 | } 310 | defer whisper.Close() 311 | now := int(time.Now().Unix()) 312 | oldestTime := now - 60 313 | 314 | empty, err = whisper.CheckEmpty(oldestTime, now) 315 | if err != nil { 316 | t.Fatalf("Error while check whisper file are empty: %s", err) 317 | } 318 | if !empty { 319 | t.Fatal("Series should be empty in a full check, but it dosent") 320 | } 321 | 322 | err = whisper.Update(1, now-30) 323 | if err != nil { 324 | t.Fatalf("Unexpected error for updating whisper file%s", err) 325 | } 326 | 327 | empty, err = whisper.CheckEmpty(oldestTime, now) 328 | if err != nil { 329 | t.Fatalf("Error while check whisper file are empty: %s", err) 330 | } 331 | if empty { 332 | t.Fatal("Series should be not empty in a full check, but it is") 333 | } 334 | 335 | empty, err = whisper.CheckEmpty(0, now) 336 | if err != nil { 337 | t.Fatalf("Error while check whisper file are empty: %s", err) 338 | } 339 | if empty { 340 | t.Fatal("Series should be not empty in a full check, but it is") 341 | } 342 | 343 | empty, err = whisper.CheckEmpty(now-100, now-40) 344 | if err != nil { 345 | t.Fatalf("Error while check whisper file are empty: %s", err) 346 | } 347 | if !empty { 348 | t.Fatal("Series should be not empty in a full check, but it is") 349 | } 350 | 351 | tearDown() 352 | } 353 | 354 | func validTimestamp(value, stamp, step int) bool { 355 | return value == nearestStep(stamp, step) || value == nearestStep(stamp, step)+step 356 | } 357 | func nearestStep(stamp, step int) int { 358 | return stamp - (stamp % step) + step 359 | } 360 | 361 | func assertFloatAlmostEqual(t *testing.T, received, expected, slop float64) { 362 | if math.Abs(expected-received) > slop { 363 | t.Fatalf("Expected %v to be within %v of %v", expected, slop, received) 364 | } 365 | } 366 | 367 | func assertFloatEqual(t *testing.T, received, expected float64) { 368 | if math.Abs(expected-received) > 0.00001 { 369 | t.Fatalf("Expected %v, received %v", expected, received) 370 | } 371 | } 372 | 373 | func TestFetchEmptyTimeseries(t *testing.T) { 374 | path, _, archiveList, tearDown := setUpCreate() 375 | whisper, err := Create(path, archiveList, Sum, 0.5) 376 | if err != nil { 377 | t.Fatalf("Failed create: %v", err) 378 | } 379 | defer whisper.Close() 380 | 381 | now := int(time.Now().Unix()) 382 | result, err := whisper.Fetch(now-3, now) 383 | if err != nil { 384 | t.Error(err) 385 | } 386 | for _, point := range result.Points() { 387 | if !math.IsNaN(point.Value) { 388 | t.Fatalf("Expecting NaN values got '%v'", point.Value) 389 | } 390 | } 391 | 392 | tearDown() 393 | } 394 | 395 | // skipcq: RVV-B0001 396 | func TestCreateUpdateFetch(t *testing.T) { 397 | var timeSeries *TimeSeries 398 | timeSeries = testCreateUpdateFetch(t, Average, 0.5, 3500, 3500, 1000, 300, 0.5, 0.2) 399 | assertFloatAlmostEqual(t, timeSeries.values[1], 150.1, 58.0) 400 | assertFloatAlmostEqual(t, timeSeries.values[2], 210.75, 28.95) 401 | 402 | timeSeries = testCreateUpdateFetch(t, Sum, 0.5, 600, 600, 500, 60, 0.5, 0.2) 403 | assertFloatAlmostEqual(t, timeSeries.values[0], 18.35, 5.95) 404 | assertFloatAlmostEqual(t, timeSeries.values[1], 30.35, 5.95) 405 | // 4 is a crazy one because it fluctuates between 60 and ~4k 406 | assertFloatAlmostEqual(t, timeSeries.values[5], 4356.05, 500.0) 407 | 408 | timeSeries = testCreateUpdateFetch(t, Last, 0.5, 300, 300, 200, 1, 0.5, 0.2) 409 | assertFloatAlmostEqual(t, timeSeries.values[0], 0.7, 0.001) 410 | assertFloatAlmostEqual(t, timeSeries.values[10], 2.7, 0.001) 411 | assertFloatAlmostEqual(t, timeSeries.values[20], 4.7, 0.001) 412 | 413 | } 414 | 415 | // Test for a bug in python whisper library: https://github.com/graphite-project/whisper/pull/136 416 | func TestCreateUpdateFetchOneValue(t *testing.T) { 417 | timeSeries := testCreateUpdateFetch(t, Average, 0.5, 3500, 3500, 1, 300, 0.5, 0.2) 418 | if len(timeSeries.values) > 1 { 419 | t.Fatalf("More then one point fetched\n") 420 | } 421 | } 422 | 423 | func BenchmarkCreateUpdateFetch(b *testing.B) { 424 | path, _, archiveList, tearDown := setUpCreate() 425 | var err error 426 | var whisper *Whisper 427 | var secondsAgo, now, fromTime, untilTime int 428 | var currentValue, increment float64 429 | for i := 0; i < b.N; i++ { 430 | whisper, err = Create(path, archiveList, Average, 0.5) 431 | if err != nil { 432 | b.Fatalf("Failed create %v", err) 433 | } 434 | 435 | secondsAgo = 3500 436 | currentValue = 0.5 437 | increment = 0.2 438 | now = int(time.Now().Unix()) 439 | 440 | for i := 0; i < secondsAgo; i++ { 441 | err = whisper.Update(currentValue, now-secondsAgo+i) 442 | if err != nil { 443 | b.Fatalf("Unexpected error for %v: %v", i, err) 444 | } 445 | currentValue += increment 446 | } 447 | 448 | fromTime = now - secondsAgo 449 | untilTime = fromTime + 1000 450 | 451 | whisper.Fetch(fromTime, untilTime) 452 | whisper.Close() 453 | tearDown() 454 | } 455 | } 456 | 457 | func BenchmarkFairCreateUpdateFetch(b *testing.B) { 458 | path, _, archiveList, tearDown := setUpCreate() 459 | var err error 460 | var whisper *Whisper 461 | var secondsAgo, now, fromTime, untilTime int 462 | var currentValue, increment float64 463 | for i := 0; i < b.N; i++ { 464 | whisper, err = Create(path, archiveList, Average, 0.5) 465 | if err != nil { 466 | b.Fatalf("Failed create %v", err) 467 | } 468 | whisper.Close() 469 | 470 | secondsAgo = 3500 471 | currentValue = 0.5 472 | increment = 0.2 473 | now = int(time.Now().Unix()) 474 | 475 | for i := 0; i < secondsAgo; i++ { 476 | whisper, err = Open(path) 477 | if err != nil { 478 | b.Fatalf("Unexpected error for %v: %v", i, err) 479 | } 480 | err = whisper.Update(currentValue, now-secondsAgo+i) 481 | if err != nil { 482 | b.Fatalf("Unexpected error for %v: %v", i, err) 483 | } 484 | currentValue += increment 485 | whisper.Close() 486 | } 487 | 488 | fromTime = now - secondsAgo 489 | untilTime = fromTime + 1000 490 | 491 | whisper, err = Open(path) 492 | if err != nil { 493 | b.Error(err) 494 | } 495 | whisper.Fetch(fromTime, untilTime) 496 | whisper.Close() 497 | tearDown() 498 | } 499 | } 500 | 501 | func testCreateUpdateManyFetch(t *testing.T, aggregationMethod AggregationMethod, xFilesFactor float32, points []*TimeSeriesPoint, fromAgo, fetchLength int) *TimeSeries { 502 | var whisper *Whisper 503 | var err error 504 | path, _, archiveList, tearDown := setUpCreate() 505 | whisper, err = Create(path, archiveList, aggregationMethod, xFilesFactor) 506 | if err != nil { 507 | t.Fatalf("Failed create: %v", err) 508 | } 509 | defer whisper.Close() 510 | now := int(time.Now().Unix()) 511 | 512 | whisper.UpdateMany(points) 513 | 514 | fromTime := now - fromAgo 515 | untilTime := fromTime + fetchLength 516 | 517 | timeSeries, err := whisper.Fetch(fromTime, untilTime) 518 | if err != nil { 519 | t.Fatalf("Unexpected error: %v", err) 520 | } 521 | tearDown() 522 | 523 | return timeSeries 524 | } 525 | 526 | func makeGoodPoints(count, step int, value func(int) float64) []*TimeSeriesPoint { 527 | points := make([]*TimeSeriesPoint, count) 528 | now := int(time.Now().Unix()) 529 | for i := 0; i < count; i++ { 530 | points[i] = &TimeSeriesPoint{now - (i * step), value(i)} 531 | } 532 | return points 533 | } 534 | 535 | func makeBadPoints(count, minAge int) []*TimeSeriesPoint { 536 | points := make([]*TimeSeriesPoint, count) 537 | now := int(time.Now().Unix()) 538 | for i := 0; i < count; i++ { 539 | points[i] = &TimeSeriesPoint{now - (minAge + i), 123.456} 540 | } 541 | return points 542 | } 543 | 544 | func printPoints(points []*TimeSeriesPoint) { 545 | fmt.Print("[") 546 | for i, point := range points { 547 | if i > 0 { 548 | fmt.Print(", ") 549 | } 550 | fmt.Printf("%v", point) 551 | } 552 | fmt.Println("]") 553 | } 554 | 555 | // skipcq: RVV-B0001 556 | func TestCreateUpdateManyFetch(t *testing.T) { 557 | var timeSeries *TimeSeries 558 | 559 | points := makeGoodPoints(1000, 2, func(i int) float64 { return float64(i) }) 560 | points = append(points, points[len(points)-1]) 561 | timeSeries = testCreateUpdateManyFetch(t, Sum, 0.5, points, 1000, 800) 562 | 563 | // fmt.Println(timeSeries) 564 | 565 | assertFloatAlmostEqual(t, timeSeries.values[0], 455, 15) 566 | 567 | // all the ones 568 | points = makeGoodPoints(10000, 1, func(_ int) float64 { return 1 }) 569 | timeSeries = testCreateUpdateManyFetch(t, Sum, 0.5, points, 10000, 10000) 570 | for i := 0; i < 6; i++ { 571 | assertFloatEqual(t, timeSeries.values[i], 1) 572 | } 573 | for i := 6; i < 10; i++ { 574 | assertFloatEqual(t, timeSeries.values[i], 5) 575 | } 576 | } 577 | 578 | // should not panic if all points are out of range 579 | func TestCreateUpdateManyOnly_old_points(t *testing.T) { 580 | points := makeBadPoints(1, 10000) 581 | 582 | path, _, archiveList, tearDown := setUpCreate() 583 | whisper, err := Create(path, archiveList, Sum, 0.5) 584 | if err != nil { 585 | t.Fatalf("Failed create: %v", err) 586 | } 587 | defer whisper.Close() 588 | 589 | whisper.UpdateMany(points) 590 | 591 | tearDown() 592 | } 593 | 594 | func Test_extractPoints(t *testing.T) { 595 | points := makeGoodPoints(100, 1, func(i int) float64 { return float64(i) }) 596 | now := int(time.Now().Unix()) 597 | currentPoints, remainingPoints := extractPoints(points, now, 50) 598 | if length := len(currentPoints); length != 50 { 599 | t.Fatalf("First: %v", length) 600 | } 601 | if length := len(remainingPoints); length != 50 { 602 | t.Fatalf("Second: %v", length) 603 | } 604 | } 605 | 606 | // extractPoints should return empty slices if the first point is out of range 607 | func Test_extractPoints_only_old_points(t *testing.T) { 608 | now := int(time.Now().Unix()) 609 | points := makeBadPoints(1, 100) 610 | 611 | currentPoints, remainingPoints := extractPoints(points, now, 50) 612 | if length := len(currentPoints); length != 0 { 613 | t.Fatalf("First: %v", length) 614 | } 615 | if length := len(remainingPoints); length != 1 { 616 | t.Fatalf("Second2: %v", length) 617 | } 618 | } 619 | 620 | func test_aggregate(t *testing.T, method AggregationMethod, expected float64) { 621 | received := aggregate(method, []float64{1.0, 2.0, 3.0, 5.0, 4.0}) 622 | if expected != received { 623 | t.Fatalf("Expected %v, received %v", expected, received) 624 | } 625 | } 626 | func Test_aggregateAverage(t *testing.T) { 627 | test_aggregate(t, Average, 3.0) 628 | } 629 | 630 | func Test_aggregateSum(t *testing.T) { 631 | test_aggregate(t, Sum, 15.0) 632 | } 633 | 634 | func Test_aggregateFirst(t *testing.T) { 635 | test_aggregate(t, First, 1.0) 636 | } 637 | 638 | func Test_aggregateLast(t *testing.T) { 639 | test_aggregate(t, Last, 4.0) 640 | } 641 | 642 | func Test_aggregateMax(t *testing.T) { 643 | test_aggregate(t, Max, 5.0) 644 | } 645 | 646 | func Test_aggregateMin(t *testing.T) { 647 | test_aggregate(t, Min, 1.0) 648 | } 649 | 650 | func TestDataPointBytes(t *testing.T) { 651 | point := dataPoint{1234, 567.891} 652 | b := []byte{0, 0, 4, 210, 64, 129, 191, 32, 196, 155, 165, 227} 653 | checkBytes(t, b, point.Bytes()) 654 | } 655 | 656 | func TestTimeSeriesPoints(t *testing.T) { 657 | ts := TimeSeries{fromTime: 1348003785, untilTime: 1348003795, step: 1, values: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}} 658 | points := ts.Points() 659 | if length := len(points); length != 10 { 660 | t.Fatalf("Unexpected number of points in time series, %v", length) 661 | } 662 | } 663 | 664 | func TestUpdateManyWithManyRetentions(t *testing.T) { 665 | path, _, archiveList, tearDown := setUpCreate() 666 | lastArchive := archiveList[len(archiveList)-1] 667 | 668 | valueMin := 41 669 | valueMax := 43 670 | 671 | whisper, err := Create(path, archiveList, Average, 0.5) 672 | if err != nil { 673 | t.Fatalf("Failed create: %v", err) 674 | } 675 | 676 | points := make([]*TimeSeriesPoint, 1) 677 | 678 | now := int(time.Now().Unix()) 679 | for i := 0; i < lastArchive.secondsPerPoint*2; i++ { 680 | points[0] = &TimeSeriesPoint{ 681 | Time: now - i, 682 | Value: float64(valueMin*(i%2) + valueMax*((i+1)%2)), // valueMin, valueMax, valueMin... 683 | } 684 | whisper.UpdateMany(points) 685 | } 686 | 687 | whisper.Close() 688 | 689 | // check data in last archive 690 | whisper, err = Open(path) 691 | if err != nil { 692 | t.Fatalf("Failed open: %v", err) 693 | } 694 | 695 | result, err := whisper.Fetch(now-lastArchive.numberOfPoints*lastArchive.secondsPerPoint, now) 696 | if err != nil { 697 | t.Fatalf("Failed fetch: %v", err) 698 | } 699 | 700 | foundValues := 0 701 | for i := 0; i < len(result.values); i++ { 702 | if !math.IsNaN(result.values[i]) { 703 | if result.values[i] >= float64(valueMin) && 704 | result.values[i] <= float64(valueMax) { 705 | foundValues++ 706 | } 707 | } 708 | } 709 | if foundValues < 2 { 710 | t.Fatalf("Not found values in archive %#v", lastArchive) 711 | } 712 | 713 | whisper.Close() 714 | 715 | tearDown() 716 | } 717 | 718 | func TestUpdateManyWithEqualTimestamp(t *testing.T) { 719 | now := int(time.Now().Unix()) 720 | points := []*TimeSeriesPoint{} 721 | 722 | // add points 723 | // now timestamp: 0,99,2,97,...,3,99,1 724 | // now-1 timestamp: 100,1,98,...,97,2,99 725 | 726 | for i := 0; i < 100; i++ { 727 | if i%2 == 0 { 728 | points = append( 729 | points, 730 | &TimeSeriesPoint{now, float64(i)}, 731 | &TimeSeriesPoint{now - 1, float64(100 - i)}, 732 | ) 733 | } else { 734 | points = append( 735 | points, 736 | &TimeSeriesPoint{now, float64(100 - i)}, 737 | &TimeSeriesPoint{now - 1, float64(i)}, 738 | ) 739 | } 740 | } 741 | 742 | result := testCreateUpdateManyFetch(t, Average, 0.5, points, 2, 10) 743 | 744 | if result.values[0] != 99.0 { 745 | t.Fatalf("Incorrect saved value. Expected %v, received %v", 99.0, result.values[0]) 746 | } 747 | if result.values[1] != 1.0 { 748 | t.Fatalf("Incorrect saved value. Expected %v, received %v", 1.0, result.values[1]) 749 | } 750 | } 751 | 752 | func TestOpenValidatation(t *testing.T) { 753 | 754 | testOpen := func(data []byte) { 755 | path, _, _, tearDown := setUpCreate() 756 | defer tearDown() 757 | 758 | err := ioutil.WriteFile(path, data, 0777) 759 | if err != nil { 760 | t.Fatal(err) 761 | } 762 | 763 | wsp, err := Open(path) 764 | if wsp != nil { 765 | t.Fatal("Opened bad file") 766 | } 767 | if err == nil { 768 | t.Fatal("No error with file") 769 | } 770 | } 771 | 772 | testWrite := func(data []byte) { 773 | path, _, _, tearDown := setUpCreate() 774 | defer tearDown() 775 | 776 | err := ioutil.WriteFile(path, data, 0777) 777 | if err != nil { 778 | t.Fatal(err) 779 | } 780 | 781 | wsp, err := Open(path) 782 | if wsp == nil || err != nil { 783 | t.Fatal("Open error") 784 | } 785 | 786 | err = wsp.Update(42, int(time.Now().Unix())) 787 | if err == nil { 788 | t.Fatal("Update broken wsp without error") 789 | } 790 | 791 | points := makeGoodPoints(1000, 2, func(i int) float64 { return float64(i) }) 792 | err = wsp.UpdateMany(points) 793 | if err == nil { 794 | t.Fatal("Update broken wsp without error") 795 | } 796 | } 797 | 798 | // Bad file with archiveCount = 1296223489 799 | testOpen([]byte{ 800 | 0xb8, 0x81, 0xd1, 0x1, 801 | 0xc, 0x0, 0x1, 0x2, 802 | 0x2e, 0x0, 0x0, 0x0, 803 | 0x4d, 0x42, 0xcd, 0x1, // archiveCount 804 | 0xc, 0x0, 0x2, 0x2, 805 | }) 806 | 807 | fullHeader := []byte{ 808 | // Metadata 809 | 0x00, 0x00, 0x00, 0x01, // Aggregation type 810 | 0x00, 0x00, 0x0e, 0x10, // Max retention 811 | 0x3f, 0x00, 0x00, 0x00, // xFilesFactor 812 | 0x00, 0x00, 0x00, 0x03, // Retention count 813 | // Archive Info 814 | // Retention 1 (1, 300) 815 | 0x00, 0x00, 0x00, 0x34, // offset 816 | 0x00, 0x00, 0x00, 0x01, // secondsPerPoint 817 | 0x00, 0x00, 0x01, 0x2c, // numberOfPoints 818 | // Retention 2 (60, 30) 819 | 0x00, 0x00, 0x0e, 0x44, // offset 820 | 0x00, 0x00, 0x00, 0x3c, // secondsPerPoint 821 | 0x00, 0x00, 0x00, 0x1e, // numberOfPoints 822 | // Retention 3 (300, 12) 823 | 0x00, 0x00, 0x0f, 0xac, // offset 824 | 0x00, 0x00, 0x01, 0x2c, // secondsPerPoint 825 | 0x00, 0x00, 0x00, 0x0c, // numberOfPoints 826 | } 827 | 828 | for i := 0; i < len(fullHeader); i++ { 829 | testOpen(fullHeader[:i]) 830 | } 831 | 832 | testWrite(fullHeader) 833 | } 834 | 835 | func testEqualIntervals(intervals1, intervals2 []int) bool { 836 | if len(intervals1) != len(intervals2) { 837 | return false 838 | } 839 | for i, interval1 := range intervals1 { 840 | if interval1 != intervals2[i] { 841 | return false 842 | } 843 | } 844 | return true 845 | } 846 | 847 | func TestPackSequences(t *testing.T) { 848 | archive := &archiveInfo{ 849 | Retention: Retention{ 850 | secondsPerPoint: 1, 851 | }, 852 | } 853 | points := []dataPoint{ 854 | {interval: 1348003785, value: 1}, 855 | {interval: 1348003786, value: 2}, 856 | {interval: 1348003787, value: 3}, 857 | {interval: 1348003789, value: 5}, 858 | {interval: 1348003790, value: 6}, 859 | {interval: 1348003792, value: 8}, 860 | } 861 | gotIntervals, _ := packSequences(archive, points) 862 | wantIntervals := []int{ 863 | 1348003785, 864 | 1348003789, 865 | 1348003792, 866 | } 867 | if !testEqualIntervals(gotIntervals, wantIntervals) { 868 | t.Errorf("intervals unmatch, got=%v, want=%v", 869 | gotIntervals, wantIntervals) 870 | } 871 | } 872 | 873 | var keepUpdateConfigTestData = flag.Bool("keep-update-config-test-data", false, "keep update config test data") 874 | 875 | // TODO: mix aggregation policy 876 | func TestUpdateConfig(t *testing.T) { 877 | for _, c := range []struct { 878 | oldRets string 879 | newRets string 880 | oldAggregation AggregationMethod 881 | newAggregation AggregationMethod 882 | oldXFF float32 883 | newXFF float32 884 | checkRanges [][2]time.Duration 885 | }{ 886 | { 887 | oldRets: "1m:30d,1h:10y", 888 | newRets: "1m:60d,1h:20y", 889 | oldAggregation: Average, 890 | newAggregation: Sum, 891 | oldXFF: 0.5, 892 | newXFF: 0, 893 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 3650, 0}}, 894 | }, 895 | { 896 | oldRets: "1m:60d,1h:20y", 897 | newRets: "1m:30d,1h:10y", 898 | oldAggregation: Average, 899 | newAggregation: Sum, 900 | oldXFF: 0.5, 901 | newXFF: 0, 902 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 3650, 0}}, 903 | }, 904 | 905 | { 906 | oldRets: "1m:30d,1h:10y", 907 | newRets: "1s:2d,30s:60d,30m:20y", 908 | oldAggregation: Average, 909 | newAggregation: Sum, 910 | oldXFF: 0.5, 911 | newXFF: 0, 912 | // checkRanges: [][2]time.Duration{}, 913 | }, 914 | { 915 | oldRets: "1s:2d,30s:60d,30m:20y", 916 | newRets: "1m:30d,1h:10y", 917 | oldAggregation: Average, 918 | newAggregation: Sum, 919 | oldXFF: 0.5, 920 | newXFF: 0, 921 | // checkRanges: [][2]time.Duration{}, 922 | }, 923 | 924 | { 925 | oldRets: "1m:30d,1h:10y", 926 | newRets: "1m:60d,1h:5y,1d:100y", 927 | oldAggregation: Average, 928 | newAggregation: Sum, 929 | oldXFF: 0.5, 930 | newXFF: 0, 931 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 365 * 5, 0}}, 932 | }, 933 | { 934 | oldRets: "1m:60d,1h:5y,1d:100y", 935 | newRets: "1m:30d,1h:10y", 936 | oldAggregation: Average, 937 | newAggregation: Sum, 938 | oldXFF: 0.5, 939 | newXFF: 0, 940 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 365 * 5, 0}}, 941 | }, 942 | 943 | { 944 | oldRets: "1m:30d,1h:10y", 945 | newRets: "1m:60d,30m:1y,1h:10y", 946 | oldAggregation: Average, 947 | newAggregation: Sum, 948 | oldXFF: 0.5, 949 | newXFF: 0, 950 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 365 * 10, 0}}, 951 | }, 952 | { 953 | oldRets: "1m:60d,30m:1y,1h:10y", 954 | newRets: "1m:30d,1h:10y", 955 | oldAggregation: Average, 956 | newAggregation: Sum, 957 | oldXFF: 0.5, 958 | newXFF: 0, 959 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 365 * 10, 0}}, 960 | }, 961 | 962 | { 963 | oldRets: "1m:30d,1h:10y", 964 | newRets: "30s:30d,30m:1y,1h:10y", 965 | oldAggregation: Average, 966 | newAggregation: Sum, 967 | oldXFF: 0.5, 968 | newXFF: 0, 969 | checkRanges: [][2]time.Duration{{-3600 * 24 * 365 * 10, 0}}, 970 | }, 971 | { 972 | oldRets: "30s:30d,30m:1y,1h:10y", 973 | newRets: "1m:30d,1h:10y", 974 | oldAggregation: Average, 975 | newAggregation: Sum, 976 | oldXFF: 0.5, 977 | newXFF: 0, 978 | checkRanges: [][2]time.Duration{{-3600 * 24 * 365 * 10, 0}}, 979 | }, 980 | 981 | { 982 | oldRets: "1m:30d,1h:10y", 983 | newRets: "1s:4d,1m:60d,1h:20y", 984 | oldAggregation: Average, 985 | newAggregation: Sum, 986 | oldXFF: 0.5, 987 | newXFF: 0, 988 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 365 * 10, 0}}, 989 | }, 990 | { 991 | oldRets: "1s:4d,1m:60d,1h:20y", 992 | newRets: "1m:30d,1h:10y", 993 | oldAggregation: Average, 994 | newAggregation: Sum, 995 | oldXFF: 0.5, 996 | newXFF: 0, 997 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 365 * 10, 0}}, 998 | }, 999 | 1000 | { 1001 | oldRets: "1m:30d,1h:10y", 1002 | newRets: "1s:4d,1m:20d,1h:20y", 1003 | oldAggregation: Average, 1004 | newAggregation: Sum, 1005 | oldXFF: 0.5, 1006 | newXFF: 0, 1007 | checkRanges: [][2]time.Duration{{-3600 * 24 * 20, 0}, {-3600 * 24 * 365 * 10, 0}}, 1008 | }, 1009 | { 1010 | oldRets: "1s:4d,1m:20d,1h:20y", 1011 | newRets: "1m:30d,1h:10y", 1012 | oldAggregation: Average, 1013 | newAggregation: Sum, 1014 | oldXFF: 0.5, 1015 | newXFF: 0, 1016 | checkRanges: [][2]time.Duration{{-3600 * 24 * 20, 0}, {-3600 * 24 * 365 * 10, 0}}, 1017 | }, 1018 | 1019 | { 1020 | oldRets: "1m:30d,1h:1y,1d:10y", 1021 | newRets: "1m:2d,1h:1y,1d:10y", 1022 | oldAggregation: Average, 1023 | newAggregation: Sum, 1024 | oldXFF: 0.5, 1025 | newXFF: 0, 1026 | checkRanges: [][2]time.Duration{{-3600*24*2 + 120, 0}, {-3600*24*365*1 + 3600, 0}, {-3600*24*365*10 + 86400, 0}}, 1027 | }, 1028 | 1029 | { 1030 | oldRets: "1m:30d,30m:184d,1h:1y,1d:10y", 1031 | newRets: "1m:30d,1h:1y,1d:10y", 1032 | oldAggregation: Average, 1033 | newAggregation: Sum, 1034 | oldXFF: 0.5, 1035 | newXFF: 0, 1036 | checkRanges: [][2]time.Duration{{-3600*24*30 + 120, 0}, {-3600*24*365*1 + 86400, 0}, {-3600*24*365*10 + 86400, 0}}, 1037 | }, 1038 | 1039 | // no retetion changes 1040 | { 1041 | oldRets: "1m:30d,1h:10y", 1042 | newRets: "1m:30d,1h:10y", 1043 | oldAggregation: Average, 1044 | newAggregation: Sum, 1045 | oldXFF: 0.5, 1046 | newXFF: 0, 1047 | checkRanges: [][2]time.Duration{{-3600 * 24 * 30, 0}, {-3600 * 24 * 3650, -3600}}, // why -3600 for 1h:10y: cwhisper does live aggregation from higher-resolutions archive 1048 | }, 1049 | } { 1050 | for _, wtype := range []string{"compressed", "standard"} { 1051 | name := strings.ReplaceAll(fmt.Sprintf("%s-%s", c.oldRets, c.newRets), ",", "_") 1052 | oldRets := MustParseRetentionDefs(c.oldRets) 1053 | newRets := MustParseRetentionDefs(c.newRets) 1054 | 1055 | t.Run(fmt.Sprintf("%s.%s", name, wtype), func(t *testing.T) { 1056 | testFilename := fmt.Sprintf("config-update.%s.%s.wsp", name, wtype) 1057 | originalFilename := fmt.Sprintf("config-update.%s.%s.original.wsp", name, wtype) 1058 | compressed := wtype == "compressed" 1059 | os.Remove(testFilename) 1060 | os.Remove(originalFilename) 1061 | 1062 | classic, err := CreateWithOptions(testFilename, oldRets, c.oldAggregation, c.oldXFF, &Options{Compressed: compressed}) 1063 | if err != nil { 1064 | t.Fatal(err) 1065 | } 1066 | 1067 | if err := populateTestFile(classic, 1); err != nil { 1068 | t.Fatal(err) 1069 | } 1070 | 1071 | classic.Close() 1072 | exec.Command("cp", testFilename, originalFilename).CombinedOutput() 1073 | 1074 | classic, err = OpenWithOptions(testFilename, &Options{}) 1075 | if err != nil { 1076 | t.Error(err) 1077 | } 1078 | 1079 | if err := classic.UpdateConfig(newRets, c.newAggregation, c.newXFF, &Options{Compressed: compressed}); err != nil { 1080 | t.Error(err) 1081 | } 1082 | classic.Close() 1083 | 1084 | newfile, err := OpenWithOptions(testFilename, &Options{}) 1085 | if err != nil { 1086 | t.Fatal(err) 1087 | } 1088 | oldfile, err := OpenWithOptions(originalFilename, &Options{}) 1089 | if err != nil { 1090 | t.Fatal(err) 1091 | } 1092 | 1093 | t.Log("Compare values after update") 1094 | for _, crange := range c.checkRanges { 1095 | compareWhisperFiles(t, newfile, oldfile, int(Now().Add(time.Second*crange[0]).Unix()), int(Now().Add(time.Second*crange[1]).Unix())) 1096 | } 1097 | 1098 | if got, want := newfile.aggregationMethod, c.newAggregation; got != want { 1099 | t.Errorf("newfile.aggregationMethod = %s; want %s", got, want) 1100 | } 1101 | if got, want := newfile.xFilesFactor, c.newXFF; got != want { 1102 | t.Errorf("newfile.xFilesFactor = %v; want %v", got, want) 1103 | } 1104 | 1105 | if !*keepUpdateConfigTestData { 1106 | os.Remove(testFilename) 1107 | os.Remove(originalFilename) 1108 | } 1109 | }) 1110 | } 1111 | } 1112 | } 1113 | 1114 | func populateTestFile(w *Whisper, gapn int) error { 1115 | start := Now() 1116 | for _, r := range w.Retentions() { 1117 | var ps []*TimeSeriesPoint 1118 | ptime := start.Add(-time.Second * time.Duration(r.MaxRetention())) 1119 | for i := 0; i <= r.numberOfPoints; i += gapn { 1120 | ps = append(ps, &TimeSeriesPoint{ 1121 | Time: int(ptime.Add(time.Second * time.Duration(i*r.secondsPerPoint)).Unix()), 1122 | Value: rand.NormFloat64(), 1123 | // Value: 2000.0 + float64(rand.Intn(100000))/100.0, 1124 | // Value: float64(rand.Intn(100000)), 1125 | }) 1126 | } 1127 | 1128 | if err := w.UpdateManyForArchive(ps, r.MaxRetention()); err != nil { 1129 | return err 1130 | } 1131 | } 1132 | 1133 | return nil 1134 | } 1135 | -------------------------------------------------------------------------------- /whisper_test.py: -------------------------------------------------------------------------------- 1 | import whisper 2 | import os 3 | import time 4 | 5 | def set_up_create(): 6 | path = "/tmp/whisper-testing.wsp" 7 | try: 8 | os.remove(path) 9 | except: 10 | pass 11 | archive_list = [[1,300], [60,30], [300,12]] 12 | def tear_down(): 13 | os.remove(path) 14 | 15 | return path, archive_list, tear_down 16 | 17 | def benchmark_create_update_fetch(): 18 | path, archive_list, tear_down = set_up_create() 19 | # start timer 20 | start_time = time.clock() 21 | for i in range(100): 22 | whisper.create(path, archive_list) 23 | 24 | seconds_ago = 3500 25 | current_value = 0.5 26 | increment = 0.2 27 | now = time.time() 28 | # file_update closes the file so we have to reopen every time 29 | for i in range(seconds_ago): 30 | whisper.update(path, current_value, now - seconds_ago + i) 31 | current_value += increment 32 | 33 | from_time = now - seconds_ago 34 | until_time = from_time + 1000 35 | 36 | whisper.fetch(path, from_time, until_time) 37 | tear_down() 38 | 39 | # end timer 40 | end_time = time.clock() 41 | elapsed_time = end_time - start_time 42 | 43 | print "Executed 100 iterations in %ss (%i ns/op)" % (elapsed_time, (elapsed_time * 1000 * 1000 * 1000) / 100) 44 | 45 | if __name__ == "__main__": 46 | benchmark_create_update_fetch() 47 | --------------------------------------------------------------------------------